From 966975a3e0e3c5d821920b458651250671652f1f Mon Sep 17 00:00:00 2001 From: Shiny Nematoda Date: Fri, 26 Apr 2024 08:08:47 +0000 Subject: [PATCH] [FIX] Set max fuzziness to 2 for bleve (#3444) closes #3443 regression from ab5f0b7558 Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/3444 Reviewed-by: Otto Co-authored-by: Shiny Nematoda Co-committed-by: Shiny Nematoda (cherry picked from commit a641ebf2213b8b69698f898ad76ccb4a52dd6cf6) --- modules/indexer/code/bleve/bleve.go | 4 +++- modules/indexer/code/indexer_test.go | 6 ++++++ modules/indexer/issues/bleve/bleve.go | 4 +++- modules/indexer/issues/internal/tests/tests.go | 14 ++++++++++++++ 4 files changed, 26 insertions(+), 2 deletions(-) diff --git a/modules/indexer/code/bleve/bleve.go b/modules/indexer/code/bleve/bleve.go index c607d780ef..ff0e37ca29 100644 --- a/modules/indexer/code/bleve/bleve.go +++ b/modules/indexer/code/bleve/bleve.go @@ -41,6 +41,8 @@ const ( maxBatchSize = 16 // fuzzyDenominator determines the levenshtein distance per each character of a keyword fuzzyDenominator = 4 + // see https://github.com/blevesearch/bleve/issues/1563#issuecomment-786822311 + maxFuzziness = 2 ) func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error { @@ -246,7 +248,7 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int phraseQuery.Analyzer = repoIndexerAnalyzer keywordQuery = phraseQuery if opts.IsKeywordFuzzy { - phraseQuery.Fuzziness = len(opts.Keyword) / fuzzyDenominator + phraseQuery.Fuzziness = min(maxFuzziness, len(opts.Keyword)/fuzzyDenominator) } if len(opts.RepoIDs) > 0 { diff --git a/modules/indexer/code/indexer_test.go b/modules/indexer/code/indexer_test.go index 8975c5ce40..2d013e08ed 100644 --- a/modules/indexer/code/indexer_test.go +++ b/modules/indexer/code/indexer_test.go @@ -49,6 +49,12 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) { IDs: []int64{}, Langs: 0, }, + { + RepoIDs: nil, + Keyword: "Description for", + IDs: []int64{repoID}, + Langs: 1, + }, { RepoIDs: nil, Keyword: "repo1", diff --git a/modules/indexer/issues/bleve/bleve.go b/modules/indexer/issues/bleve/bleve.go index 1f54be721b..e3193dbc02 100644 --- a/modules/indexer/issues/bleve/bleve.go +++ b/modules/indexer/issues/bleve/bleve.go @@ -39,6 +39,8 @@ const ( maxBatchSize = 16 // fuzzyDenominator determines the levenshtein distance per each character of a keyword fuzzyDenominator = 4 + // see https://github.com/blevesearch/bleve/issues/1563#issuecomment-786822311 + maxFuzziness = 2 ) // IndexerData an update to the issue indexer @@ -162,7 +164,7 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) ( if options.Keyword != "" { fuzziness := 0 if options.IsFuzzyKeyword { - fuzziness = len(options.Keyword) / fuzzyDenominator + fuzziness = min(maxFuzziness, len(options.Keyword)/fuzzyDenominator) } queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{ diff --git a/modules/indexer/issues/internal/tests/tests.go b/modules/indexer/issues/internal/tests/tests.go index 7f32876d80..7144174087 100644 --- a/modules/indexer/issues/internal/tests/tests.go +++ b/modules/indexer/issues/internal/tests/tests.go @@ -130,6 +130,20 @@ var cases = []*testIndexerCase{ ExpectedIDs: []int64{1002, 1001, 1000}, ExpectedTotal: 3, }, + { + Name: "Keyword Fuzzy", + ExtraData: []*internal.IndexerData{ + {ID: 1000, Title: "hi hello world"}, + {ID: 1001, Content: "hi hello world"}, + {ID: 1002, Comments: []string{"hi", "hello world"}}, + }, + SearchOptions: &internal.SearchOptions{ + Keyword: "hello wrold", + IsFuzzyKeyword: true, + }, + ExpectedIDs: []int64{1002, 1001, 1000}, + ExpectedTotal: 3, + }, { Name: "RepoIDs", ExtraData: []*internal.IndexerData{