[FEAT] Support Include/Exclude Filters for Grep (#3058)

fixes `TestRepoSearch` failing occasionally

Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/3058
Reviewed-by: Gusted <gusted@noreply.codeberg.org>
Co-authored-by: Shiny Nematoda <snematoda.751k2@aleeas.com>
Co-committed-by: Shiny Nematoda <snematoda.751k2@aleeas.com>
(cherry picked from commit baac15f316)
This commit is contained in:
Shiny Nematoda 2024-04-06 13:25:39 +00:00 committed by Gusted
parent 52d217ec50
commit 328f694a33
3 changed files with 67 additions and 16 deletions

View file

@ -13,6 +13,8 @@ import (
"os"
"strconv"
"strings"
"code.gitea.io/gitea/modules/setting"
)
type GrepResult struct {
@ -58,7 +60,15 @@ func GrepSearch(ctx context.Context, repo *Repository, search string, opts GrepO
} else {
cmd.AddOptionValues("-e", strings.TrimLeft(search, "-"))
}
cmd.AddDynamicArguments(cmp.Or(opts.RefName, "HEAD"))
// pathspec
files := make([]string, 0, len(setting.Indexer.IncludePatterns)+len(setting.Indexer.ExcludePatterns))
for _, expr := range setting.Indexer.IncludePatterns {
files = append(files, expr.Pattern())
}
for _, expr := range setting.Indexer.ExcludePatterns {
files = append(files, ":^"+expr.Pattern())
}
cmd.AddDynamicArguments(cmp.Or(opts.RefName, "HEAD")).AddDashesAndList(files...)
opts.MaxResultLimit = cmp.Or(opts.MaxResultLimit, 50)
stderr := bytes.Buffer{}
err = cmd.Run(&RunOpts{

View file

@ -30,8 +30,8 @@ var Indexer = struct {
RepoConnStr string
RepoIndexerName string
MaxIndexerFileSize int64
IncludePatterns []glob.Glob
ExcludePatterns []glob.Glob
IncludePatterns []Glob
ExcludePatterns []Glob
ExcludeVendored bool
}{
IssueType: "bleve",
@ -50,6 +50,19 @@ var Indexer = struct {
ExcludeVendored: true,
}
type Glob struct {
glob glob.Glob
pattern string
}
func (g *Glob) Match(s string) bool {
return g.glob.Match(s)
}
func (g *Glob) Pattern() string {
return g.pattern
}
func loadIndexerFrom(rootCfg ConfigProvider) {
sec := rootCfg.Section("indexer")
Indexer.IssueType = sec.Key("ISSUE_INDEXER_TYPE").MustString("bleve")
@ -90,15 +103,15 @@ func loadIndexerFrom(rootCfg ConfigProvider) {
}
// IndexerGlobFromString parses a comma separated list of patterns and returns a glob.Glob slice suited for repo indexing
func IndexerGlobFromString(globstr string) []glob.Glob {
extarr := make([]glob.Glob, 0, 10)
func IndexerGlobFromString(globstr string) []Glob {
extarr := make([]Glob, 0, 10)
for _, expr := range strings.Split(strings.ToLower(globstr), ",") {
expr = strings.TrimSpace(expr)
if expr != "" {
if g, err := glob.Compile(expr, '.', '/'); err != nil {
log.Info("Invalid glob expression '%s' (skipped): %v", expr, err)
} else {
extarr = append(extarr, g)
extarr = append(extarr, Glob{glob: g, pattern: expr})
}
}
}

View file

@ -11,6 +11,7 @@ import (
repo_model "code.gitea.io/gitea/models/repo"
code_indexer "code.gitea.io/gitea/modules/indexer/code"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/test"
"code.gitea.io/gitea/tests"
"github.com/PuerkitoBio/goquery"
@ -26,30 +27,57 @@ func resultFilenames(t testing.TB, doc *HTMLDoc) []string {
return result
}
func TestSearchRepo(t *testing.T) {
func TestSearchRepoIndexer(t *testing.T) {
testSearchRepo(t, true)
}
func TestSearchRepoNoIndexer(t *testing.T) {
testSearchRepo(t, false)
}
func testSearchRepo(t *testing.T, indexer bool) {
defer tests.PrepareTestEnv(t)()
defer test.MockVariableValue(&setting.Indexer.RepoIndexerEnabled, indexer)()
repo, err := repo_model.GetRepositoryByOwnerAndName(db.DefaultContext, "user2", "repo1")
assert.NoError(t, err)
code_indexer.UpdateRepoIndexer(repo)
if indexer {
code_indexer.UpdateRepoIndexer(repo)
}
testSearch(t, "/user2/repo1/search?q=Description&page=1", []string{"README.md"})
setting.Indexer.IncludePatterns = setting.IndexerGlobFromString("**.txt")
setting.Indexer.ExcludePatterns = setting.IndexerGlobFromString("**/y/**")
defer test.MockVariableValue(&setting.Indexer.IncludePatterns, setting.IndexerGlobFromString("**.txt"))()
defer test.MockVariableValue(&setting.Indexer.ExcludePatterns, setting.IndexerGlobFromString("**/y/**"))()
repo, err = repo_model.GetRepositoryByOwnerAndName(db.DefaultContext, "user2", "glob")
assert.NoError(t, err)
code_indexer.UpdateRepoIndexer(repo)
if indexer {
code_indexer.UpdateRepoIndexer(repo)
}
testSearch(t, "/user2/glob/search?q=loren&page=1", []string{"a.txt"})
testSearch(t, "/user2/glob/search?q=loren&page=1&t=match", []string{"a.txt"})
testSearch(t, "/user2/glob/search?q=file3&page=1", []string{"x/b.txt", "a.txt"})
testSearch(t, "/user2/glob/search?q=file3&page=1&t=match", []string{"x/b.txt", "a.txt"})
testSearch(t, "/user2/glob/search?q=file4&page=1&t=match", []string{"x/b.txt", "a.txt"})
testSearch(t, "/user2/glob/search?q=file5&page=1&t=match", []string{"x/b.txt", "a.txt"})
testSearch(t, "/user2/glob/search?q=loren&page=1&fuzzy=false", []string{"a.txt"})
if indexer {
// fuzzy search: matches both file3 (x/b.txt) and file1 (a.txt)
// when indexer is enabled
testSearch(t, "/user2/glob/search?q=file3&page=1", []string{"x/b.txt", "a.txt"})
testSearch(t, "/user2/glob/search?q=file4&page=1", []string{"x/b.txt", "a.txt"})
testSearch(t, "/user2/glob/search?q=file5&page=1", []string{"x/b.txt", "a.txt"})
} else {
// fuzzy search: OR of all the keywords
// when indexer is disabled
testSearch(t, "/user2/glob/search?q=file3+file1&page=1", []string{"a.txt", "x/b.txt"})
testSearch(t, "/user2/glob/search?q=file4&page=1", []string{})
testSearch(t, "/user2/glob/search?q=file5&page=1", []string{})
}
testSearch(t, "/user2/glob/search?q=file3&page=1&fuzzy=false", []string{"x/b.txt"})
testSearch(t, "/user2/glob/search?q=file4&page=1&fuzzy=false", []string{})
testSearch(t, "/user2/glob/search?q=file5&page=1&fuzzy=false", []string{})
}
func testSearch(t *testing.T, url string, expected []string) {