mirror of
https://github.com/go-gitea/gitea.git
synced 2025-04-27 13:16:28 +00:00
Make SearchMode have default value and add comments (#33863)
* Make `SearchMode` have default value if it is empty * Add some comments for the "match" queries * Fix a copy-paste mistake in `buildMatchQuery` (`db.go`) * Add missing `q.Analyzer = repoIndexerAnalyzer`, it is in old code, although I do not see real difference ....
This commit is contained in:
parent
45c4139134
commit
a0e0a30d23
@ -25,6 +25,7 @@ import (
|
|||||||
"code.gitea.io/gitea/modules/setting"
|
"code.gitea.io/gitea/modules/setting"
|
||||||
"code.gitea.io/gitea/modules/timeutil"
|
"code.gitea.io/gitea/modules/timeutil"
|
||||||
"code.gitea.io/gitea/modules/typesniffer"
|
"code.gitea.io/gitea/modules/typesniffer"
|
||||||
|
"code.gitea.io/gitea/modules/util"
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/v2"
|
"github.com/blevesearch/bleve/v2"
|
||||||
analyzer_custom "github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
|
analyzer_custom "github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
|
||||||
@ -272,14 +273,18 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int
|
|||||||
pathQuery.FieldVal = "Filename"
|
pathQuery.FieldVal = "Filename"
|
||||||
pathQuery.SetBoost(10)
|
pathQuery.SetBoost(10)
|
||||||
|
|
||||||
if opts.SearchMode == indexer.SearchModeExact {
|
searchMode := util.IfZero(opts.SearchMode, b.SupportedSearchModes()[0].ModeValue)
|
||||||
|
if searchMode == indexer.SearchModeExact {
|
||||||
|
// 1.21 used NewPrefixQuery, but it seems not working well, and later releases changed to NewMatchPhraseQuery
|
||||||
q := bleve.NewMatchPhraseQuery(opts.Keyword)
|
q := bleve.NewMatchPhraseQuery(opts.Keyword)
|
||||||
|
q.Analyzer = repoIndexerAnalyzer
|
||||||
q.FieldVal = "Content"
|
q.FieldVal = "Content"
|
||||||
contentQuery = q
|
contentQuery = q
|
||||||
} else /* words */ {
|
} else /* words */ {
|
||||||
q := bleve.NewMatchQuery(opts.Keyword)
|
q := bleve.NewMatchQuery(opts.Keyword)
|
||||||
q.FieldVal = "Content"
|
q.FieldVal = "Content"
|
||||||
if opts.SearchMode == indexer.SearchModeFuzzy {
|
q.Analyzer = repoIndexerAnalyzer
|
||||||
|
if searchMode == indexer.SearchModeFuzzy {
|
||||||
// this logic doesn't seem right, it is only used to pass the test-case `Keyword: "dESCRIPTION"`, which doesn't seem to be a real-life use-case.
|
// this logic doesn't seem right, it is only used to pass the test-case `Keyword: "dESCRIPTION"`, which doesn't seem to be a real-life use-case.
|
||||||
q.Fuzziness = inner_bleve.GuessFuzzinessByKeyword(opts.Keyword)
|
q.Fuzziness = inner_bleve.GuessFuzzinessByKeyword(opts.Keyword)
|
||||||
} else {
|
} else {
|
||||||
|
@ -25,6 +25,7 @@ import (
|
|||||||
"code.gitea.io/gitea/modules/setting"
|
"code.gitea.io/gitea/modules/setting"
|
||||||
"code.gitea.io/gitea/modules/timeutil"
|
"code.gitea.io/gitea/modules/timeutil"
|
||||||
"code.gitea.io/gitea/modules/typesniffer"
|
"code.gitea.io/gitea/modules/typesniffer"
|
||||||
|
"code.gitea.io/gitea/modules/util"
|
||||||
|
|
||||||
"github.com/go-enry/go-enry/v2"
|
"github.com/go-enry/go-enry/v2"
|
||||||
"github.com/olivere/elastic/v7"
|
"github.com/olivere/elastic/v7"
|
||||||
@ -365,7 +366,9 @@ func extractAggs(searchResult *elastic.SearchResult) []*internal.SearchResultLan
|
|||||||
// Search searches for codes and language stats by given conditions.
|
// Search searches for codes and language stats by given conditions.
|
||||||
func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) {
|
func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) {
|
||||||
var contentQuery elastic.Query
|
var contentQuery elastic.Query
|
||||||
if opts.SearchMode == indexer.SearchModeExact {
|
searchMode := util.IfZero(opts.SearchMode, b.SupportedSearchModes()[0].ModeValue)
|
||||||
|
if searchMode == indexer.SearchModeExact {
|
||||||
|
// 1.21 used NewMultiMatchQuery().Type(esMultiMatchTypePhrasePrefix), but later releases changed to NewMatchPhraseQuery
|
||||||
contentQuery = elastic.NewMatchPhraseQuery("content", opts.Keyword)
|
contentQuery = elastic.NewMatchPhraseQuery("content", opts.Keyword)
|
||||||
} else /* words */ {
|
} else /* words */ {
|
||||||
contentQuery = elastic.NewMultiMatchQuery("content", opts.Keyword).Type(esMultiMatchTypeBestFields).Operator("and")
|
contentQuery = elastic.NewMultiMatchQuery("content", opts.Keyword).Type(esMultiMatchTypeBestFields).Operator("and")
|
||||||
|
@ -17,6 +17,7 @@ import (
|
|||||||
"code.gitea.io/gitea/modules/indexer/code/internal"
|
"code.gitea.io/gitea/modules/indexer/code/internal"
|
||||||
"code.gitea.io/gitea/modules/setting"
|
"code.gitea.io/gitea/modules/setting"
|
||||||
"code.gitea.io/gitea/modules/test"
|
"code.gitea.io/gitea/modules/test"
|
||||||
|
"code.gitea.io/gitea/modules/util"
|
||||||
|
|
||||||
_ "code.gitea.io/gitea/models"
|
_ "code.gitea.io/gitea/models"
|
||||||
_ "code.gitea.io/gitea/models/actions"
|
_ "code.gitea.io/gitea/models/actions"
|
||||||
@ -240,7 +241,7 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) {
|
|||||||
total, res, langs, err := indexer.Search(t.Context(), &internal.SearchOptions{
|
total, res, langs, err := indexer.Search(t.Context(), &internal.SearchOptions{
|
||||||
RepoIDs: kw.RepoIDs,
|
RepoIDs: kw.RepoIDs,
|
||||||
Keyword: kw.Keyword,
|
Keyword: kw.Keyword,
|
||||||
SearchMode: kw.SearchMode,
|
SearchMode: util.IfZero(kw.SearchMode, indexer_module.SearchModeWords),
|
||||||
Paginator: &db.ListOptions{
|
Paginator: &db.ListOptions{
|
||||||
Page: 1,
|
Page: 1,
|
||||||
PageSize: 10,
|
PageSize: 10,
|
||||||
|
@ -10,6 +10,7 @@ import (
|
|||||||
indexer_internal "code.gitea.io/gitea/modules/indexer/internal"
|
indexer_internal "code.gitea.io/gitea/modules/indexer/internal"
|
||||||
inner_bleve "code.gitea.io/gitea/modules/indexer/internal/bleve"
|
inner_bleve "code.gitea.io/gitea/modules/indexer/internal/bleve"
|
||||||
"code.gitea.io/gitea/modules/indexer/issues/internal"
|
"code.gitea.io/gitea/modules/indexer/issues/internal"
|
||||||
|
"code.gitea.io/gitea/modules/util"
|
||||||
|
|
||||||
"github.com/blevesearch/bleve/v2"
|
"github.com/blevesearch/bleve/v2"
|
||||||
"github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
|
"github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
|
||||||
@ -162,9 +163,10 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (
|
|||||||
var queries []query.Query
|
var queries []query.Query
|
||||||
|
|
||||||
if options.Keyword != "" {
|
if options.Keyword != "" {
|
||||||
if options.SearchMode == indexer.SearchModeWords || options.SearchMode == indexer.SearchModeFuzzy {
|
searchMode := util.IfZero(options.SearchMode, b.SupportedSearchModes()[0].ModeValue)
|
||||||
|
if searchMode == indexer.SearchModeWords || searchMode == indexer.SearchModeFuzzy {
|
||||||
fuzziness := 0
|
fuzziness := 0
|
||||||
if options.SearchMode == indexer.SearchModeFuzzy {
|
if searchMode == indexer.SearchModeFuzzy {
|
||||||
fuzziness = inner_bleve.GuessFuzzinessByKeyword(options.Keyword)
|
fuzziness = inner_bleve.GuessFuzzinessByKeyword(options.Keyword)
|
||||||
}
|
}
|
||||||
queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{
|
queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{
|
||||||
|
@ -13,6 +13,7 @@ import (
|
|||||||
indexer_internal "code.gitea.io/gitea/modules/indexer/internal"
|
indexer_internal "code.gitea.io/gitea/modules/indexer/internal"
|
||||||
inner_db "code.gitea.io/gitea/modules/indexer/internal/db"
|
inner_db "code.gitea.io/gitea/modules/indexer/internal/db"
|
||||||
"code.gitea.io/gitea/modules/indexer/issues/internal"
|
"code.gitea.io/gitea/modules/indexer/issues/internal"
|
||||||
|
"code.gitea.io/gitea/modules/util"
|
||||||
|
|
||||||
"xorm.io/builder"
|
"xorm.io/builder"
|
||||||
)
|
)
|
||||||
@ -46,7 +47,7 @@ func (i *Indexer) Delete(_ context.Context, _ ...int64) error {
|
|||||||
|
|
||||||
func buildMatchQuery(mode indexer.SearchModeType, colName, keyword string) builder.Cond {
|
func buildMatchQuery(mode indexer.SearchModeType, colName, keyword string) builder.Cond {
|
||||||
if mode == indexer.SearchModeExact {
|
if mode == indexer.SearchModeExact {
|
||||||
return db.BuildCaseInsensitiveLike("issue.name", keyword)
|
return db.BuildCaseInsensitiveLike(colName, keyword)
|
||||||
}
|
}
|
||||||
|
|
||||||
// match words
|
// match words
|
||||||
@ -84,16 +85,16 @@ func (i *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (
|
|||||||
repoCond = builder.Eq{"repo_id": options.RepoIDs[0]}
|
repoCond = builder.Eq{"repo_id": options.RepoIDs[0]}
|
||||||
}
|
}
|
||||||
subQuery := builder.Select("id").From("issue").Where(repoCond)
|
subQuery := builder.Select("id").From("issue").Where(repoCond)
|
||||||
|
searchMode := util.IfZero(options.SearchMode, i.SupportedSearchModes()[0].ModeValue)
|
||||||
cond = builder.Or(
|
cond = builder.Or(
|
||||||
buildMatchQuery(options.SearchMode, "issue.name", options.Keyword),
|
buildMatchQuery(searchMode, "issue.name", options.Keyword),
|
||||||
buildMatchQuery(options.SearchMode, "issue.content", options.Keyword),
|
buildMatchQuery(searchMode, "issue.content", options.Keyword),
|
||||||
builder.In("issue.id", builder.Select("issue_id").
|
builder.In("issue.id", builder.Select("issue_id").
|
||||||
From("comment").
|
From("comment").
|
||||||
Where(builder.And(
|
Where(builder.And(
|
||||||
builder.Eq{"type": issue_model.CommentTypeComment},
|
builder.Eq{"type": issue_model.CommentTypeComment},
|
||||||
builder.In("issue_id", subQuery),
|
builder.In("issue_id", subQuery),
|
||||||
buildMatchQuery(options.SearchMode, "content", options.Keyword),
|
buildMatchQuery(searchMode, "content", options.Keyword),
|
||||||
)),
|
)),
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
@ -14,6 +14,7 @@ import (
|
|||||||
indexer_internal "code.gitea.io/gitea/modules/indexer/internal"
|
indexer_internal "code.gitea.io/gitea/modules/indexer/internal"
|
||||||
inner_elasticsearch "code.gitea.io/gitea/modules/indexer/internal/elasticsearch"
|
inner_elasticsearch "code.gitea.io/gitea/modules/indexer/internal/elasticsearch"
|
||||||
"code.gitea.io/gitea/modules/indexer/issues/internal"
|
"code.gitea.io/gitea/modules/indexer/issues/internal"
|
||||||
|
"code.gitea.io/gitea/modules/util"
|
||||||
|
|
||||||
"github.com/olivere/elastic/v7"
|
"github.com/olivere/elastic/v7"
|
||||||
)
|
)
|
||||||
@ -152,7 +153,8 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (
|
|||||||
query := elastic.NewBoolQuery()
|
query := elastic.NewBoolQuery()
|
||||||
|
|
||||||
if options.Keyword != "" {
|
if options.Keyword != "" {
|
||||||
if options.SearchMode == indexer.SearchModeExact {
|
searchMode := util.IfZero(options.SearchMode, b.SupportedSearchModes()[0].ModeValue)
|
||||||
|
if searchMode == indexer.SearchModeExact {
|
||||||
query.Must(elastic.NewMultiMatchQuery(options.Keyword, "title", "content", "comments").Type(esMultiMatchTypePhrasePrefix))
|
query.Must(elastic.NewMultiMatchQuery(options.Keyword, "title", "content", "comments").Type(esMultiMatchTypePhrasePrefix))
|
||||||
} else /* words */ {
|
} else /* words */ {
|
||||||
query.Must(elastic.NewMultiMatchQuery(options.Keyword, "title", "content", "comments").Type(esMultiMatchTypeBestFields).Operator("and"))
|
query.Must(elastic.NewMultiMatchQuery(options.Keyword, "title", "content", "comments").Type(esMultiMatchTypeBestFields).Operator("and"))
|
||||||
|
@ -282,7 +282,7 @@ const (
|
|||||||
|
|
||||||
// SearchIssues search issues by options.
|
// SearchIssues search issues by options.
|
||||||
func SearchIssues(ctx context.Context, opts *SearchOptions) ([]int64, int64, error) {
|
func SearchIssues(ctx context.Context, opts *SearchOptions) ([]int64, int64, error) {
|
||||||
indexer := *globalIndexer.Load()
|
ix := *globalIndexer.Load()
|
||||||
|
|
||||||
if opts.Keyword == "" || opts.IsKeywordNumeric() {
|
if opts.Keyword == "" || opts.IsKeywordNumeric() {
|
||||||
// This is a conservative shortcut.
|
// This is a conservative shortcut.
|
||||||
@ -291,10 +291,9 @@ func SearchIssues(ctx context.Context, opts *SearchOptions) ([]int64, int64, err
|
|||||||
// So if the user creates an issue and list issues immediately, the issue may not be listed because the indexer needs time to index the issue.
|
// So if the user creates an issue and list issues immediately, the issue may not be listed because the indexer needs time to index the issue.
|
||||||
// Even worse, the external indexer like elastic search may not be available for a while,
|
// Even worse, the external indexer like elastic search may not be available for a while,
|
||||||
// and the user may not be able to list issues completely until it is available again.
|
// and the user may not be able to list issues completely until it is available again.
|
||||||
indexer = db.NewIndexer()
|
ix = db.NewIndexer()
|
||||||
}
|
}
|
||||||
|
result, err := ix.Search(ctx, opts)
|
||||||
result, err := indexer.Search(ctx, opts)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, 0, err
|
return nil, 0, err
|
||||||
}
|
}
|
||||||
|
@ -82,9 +82,11 @@ func searchIssueWithKeyword(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for _, test := range tests {
|
for _, test := range tests {
|
||||||
|
t.Run(test.opts.Keyword, func(t *testing.T) {
|
||||||
issueIDs, _, err := SearchIssues(t.Context(), &test.opts)
|
issueIDs, _, err := SearchIssues(t.Context(), &test.opts)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
assert.Equal(t, test.expectedIDs, issueIDs)
|
assert.Equal(t, test.expectedIDs, issueIDs)
|
||||||
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user