mirror of https://github.com/go-gitea/gitea.git
Refactor code_indexer to use an SearchOptions struct for PerformSearch (#29724)
similar to how it's already done for the issue_indexer --- *Sponsored by Kithara Software GmbH*
This commit is contained in:
parent
e0ea3811c4
commit
1262ff6734
|
@ -142,7 +142,7 @@ func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserErro
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if size, err = strconv.ParseInt(strings.TrimSpace(stdout), 10, 64); err != nil {
|
if size, err = strconv.ParseInt(strings.TrimSpace(stdout), 10, 64); err != nil {
|
||||||
return fmt.Errorf("Misformatted git cat-file output: %w", err)
|
return fmt.Errorf("misformatted git cat-file output: %w", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -233,26 +233,26 @@ func (b *Indexer) Delete(_ context.Context, repoID int64) error {
|
||||||
|
|
||||||
// Search searches for files in the specified repo.
|
// Search searches for files in the specified repo.
|
||||||
// Returns the matching file-paths
|
// Returns the matching file-paths
|
||||||
func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isFuzzy bool) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) {
|
func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) {
|
||||||
var (
|
var (
|
||||||
indexerQuery query.Query
|
indexerQuery query.Query
|
||||||
keywordQuery query.Query
|
keywordQuery query.Query
|
||||||
)
|
)
|
||||||
|
|
||||||
if isFuzzy {
|
if opts.IsKeywordFuzzy {
|
||||||
phraseQuery := bleve.NewMatchPhraseQuery(keyword)
|
phraseQuery := bleve.NewMatchPhraseQuery(opts.Keyword)
|
||||||
phraseQuery.FieldVal = "Content"
|
phraseQuery.FieldVal = "Content"
|
||||||
phraseQuery.Analyzer = repoIndexerAnalyzer
|
phraseQuery.Analyzer = repoIndexerAnalyzer
|
||||||
keywordQuery = phraseQuery
|
keywordQuery = phraseQuery
|
||||||
} else {
|
} else {
|
||||||
prefixQuery := bleve.NewPrefixQuery(keyword)
|
prefixQuery := bleve.NewPrefixQuery(opts.Keyword)
|
||||||
prefixQuery.FieldVal = "Content"
|
prefixQuery.FieldVal = "Content"
|
||||||
keywordQuery = prefixQuery
|
keywordQuery = prefixQuery
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(repoIDs) > 0 {
|
if len(opts.RepoIDs) > 0 {
|
||||||
repoQueries := make([]query.Query, 0, len(repoIDs))
|
repoQueries := make([]query.Query, 0, len(opts.RepoIDs))
|
||||||
for _, repoID := range repoIDs {
|
for _, repoID := range opts.RepoIDs {
|
||||||
repoQueries = append(repoQueries, inner_bleve.NumericEqualityQuery(repoID, "RepoID"))
|
repoQueries = append(repoQueries, inner_bleve.NumericEqualityQuery(repoID, "RepoID"))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -266,8 +266,8 @@ func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword
|
||||||
|
|
||||||
// Save for reuse without language filter
|
// Save for reuse without language filter
|
||||||
facetQuery := indexerQuery
|
facetQuery := indexerQuery
|
||||||
if len(language) > 0 {
|
if len(opts.Language) > 0 {
|
||||||
languageQuery := bleve.NewMatchQuery(language)
|
languageQuery := bleve.NewMatchQuery(opts.Language)
|
||||||
languageQuery.FieldVal = "Language"
|
languageQuery.FieldVal = "Language"
|
||||||
languageQuery.Analyzer = analyzer_keyword.Name
|
languageQuery.Analyzer = analyzer_keyword.Name
|
||||||
|
|
||||||
|
@ -277,12 +277,12 @@ func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
from := (page - 1) * pageSize
|
from, pageSize := opts.GetSkipTake()
|
||||||
searchRequest := bleve.NewSearchRequestOptions(indexerQuery, pageSize, from, false)
|
searchRequest := bleve.NewSearchRequestOptions(indexerQuery, pageSize, from, false)
|
||||||
searchRequest.Fields = []string{"Content", "RepoID", "Language", "CommitID", "UpdatedAt"}
|
searchRequest.Fields = []string{"Content", "RepoID", "Language", "CommitID", "UpdatedAt"}
|
||||||
searchRequest.IncludeLocations = true
|
searchRequest.IncludeLocations = true
|
||||||
|
|
||||||
if len(language) == 0 {
|
if len(opts.Language) == 0 {
|
||||||
searchRequest.AddFacet("languages", bleve.NewFacetRequest("Language", 10))
|
searchRequest.AddFacet("languages", bleve.NewFacetRequest("Language", 10))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -326,7 +326,7 @@ func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword
|
||||||
}
|
}
|
||||||
|
|
||||||
searchResultLanguages := make([]*internal.SearchResultLanguages, 0, 10)
|
searchResultLanguages := make([]*internal.SearchResultLanguages, 0, 10)
|
||||||
if len(language) > 0 {
|
if len(opts.Language) > 0 {
|
||||||
// Use separate query to go get all language counts
|
// Use separate query to go get all language counts
|
||||||
facetRequest := bleve.NewSearchRequestOptions(facetQuery, 1, 0, false)
|
facetRequest := bleve.NewSearchRequestOptions(facetQuery, 1, 0, false)
|
||||||
facetRequest.Fields = []string{"Content", "RepoID", "Language", "CommitID", "UpdatedAt"}
|
facetRequest.Fields = []string{"Content", "RepoID", "Language", "CommitID", "UpdatedAt"}
|
||||||
|
|
|
@ -281,18 +281,18 @@ func extractAggs(searchResult *elastic.SearchResult) []*internal.SearchResultLan
|
||||||
}
|
}
|
||||||
|
|
||||||
// Search searches for codes and language stats by given conditions.
|
// Search searches for codes and language stats by given conditions.
|
||||||
func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isFuzzy bool) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) {
|
func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) {
|
||||||
searchType := esMultiMatchTypePhrasePrefix
|
searchType := esMultiMatchTypePhrasePrefix
|
||||||
if isFuzzy {
|
if opts.IsKeywordFuzzy {
|
||||||
searchType = esMultiMatchTypeBestFields
|
searchType = esMultiMatchTypeBestFields
|
||||||
}
|
}
|
||||||
|
|
||||||
kwQuery := elastic.NewMultiMatchQuery(keyword, "content").Type(searchType)
|
kwQuery := elastic.NewMultiMatchQuery(opts.Keyword, "content").Type(searchType)
|
||||||
query := elastic.NewBoolQuery()
|
query := elastic.NewBoolQuery()
|
||||||
query = query.Must(kwQuery)
|
query = query.Must(kwQuery)
|
||||||
if len(repoIDs) > 0 {
|
if len(opts.RepoIDs) > 0 {
|
||||||
repoStrs := make([]any, 0, len(repoIDs))
|
repoStrs := make([]any, 0, len(opts.RepoIDs))
|
||||||
for _, repoID := range repoIDs {
|
for _, repoID := range opts.RepoIDs {
|
||||||
repoStrs = append(repoStrs, repoID)
|
repoStrs = append(repoStrs, repoID)
|
||||||
}
|
}
|
||||||
repoQuery := elastic.NewTermsQuery("repo_id", repoStrs...)
|
repoQuery := elastic.NewTermsQuery("repo_id", repoStrs...)
|
||||||
|
@ -300,16 +300,12 @@ func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword
|
||||||
}
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
start int
|
start, pageSize = opts.GetSkipTake()
|
||||||
kw = "<em>" + keyword + "</em>"
|
kw = "<em>" + opts.Keyword + "</em>"
|
||||||
aggregation = elastic.NewTermsAggregation().Field("language").Size(10).OrderByCountDesc()
|
aggregation = elastic.NewTermsAggregation().Field("language").Size(10).OrderByCountDesc()
|
||||||
)
|
)
|
||||||
|
|
||||||
if page > 0 {
|
if len(opts.Language) == 0 {
|
||||||
start = (page - 1) * pageSize
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(language) == 0 {
|
|
||||||
searchResult, err := b.inner.Client.Search().
|
searchResult, err := b.inner.Client.Search().
|
||||||
Index(b.inner.VersionedIndexName()).
|
Index(b.inner.VersionedIndexName()).
|
||||||
Aggregation("language", aggregation).
|
Aggregation("language", aggregation).
|
||||||
|
@ -330,7 +326,7 @@ func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword
|
||||||
return convertResult(searchResult, kw, pageSize)
|
return convertResult(searchResult, kw, pageSize)
|
||||||
}
|
}
|
||||||
|
|
||||||
langQuery := elastic.NewMatchQuery("language", language)
|
langQuery := elastic.NewMatchQuery("language", opts.Language)
|
||||||
countResult, err := b.inner.Client.Search().
|
countResult, err := b.inner.Client.Search().
|
||||||
Index(b.inner.VersionedIndexName()).
|
Index(b.inner.VersionedIndexName()).
|
||||||
Aggregation("language", aggregation).
|
Aggregation("language", aggregation).
|
||||||
|
|
|
@ -32,7 +32,7 @@ func getRepoChanges(ctx context.Context, repo *repo_model.Repository, revision s
|
||||||
|
|
||||||
needGenesis := len(status.CommitSha) == 0
|
needGenesis := len(status.CommitSha) == 0
|
||||||
if !needGenesis {
|
if !needGenesis {
|
||||||
hasAncestorCmd := git.NewCommand(ctx, "merge-base").AddDynamicArguments(repo.CodeIndexerStatus.CommitSha, revision)
|
hasAncestorCmd := git.NewCommand(ctx, "merge-base").AddDynamicArguments(status.CommitSha, revision)
|
||||||
stdout, _, _ := hasAncestorCmd.RunStdString(&git.RunOpts{Dir: repo.RepoPath()})
|
stdout, _, _ := hasAncestorCmd.RunStdString(&git.RunOpts{Dir: repo.RepoPath()})
|
||||||
needGenesis = len(stdout) == 0
|
needGenesis = len(stdout) == 0
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,6 +8,7 @@ import (
|
||||||
"os"
|
"os"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
"code.gitea.io/gitea/models/db"
|
||||||
"code.gitea.io/gitea/models/unittest"
|
"code.gitea.io/gitea/models/unittest"
|
||||||
"code.gitea.io/gitea/modules/git"
|
"code.gitea.io/gitea/modules/git"
|
||||||
"code.gitea.io/gitea/modules/indexer/code/bleve"
|
"code.gitea.io/gitea/modules/indexer/code/bleve"
|
||||||
|
@ -70,7 +71,15 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) {
|
||||||
|
|
||||||
for _, kw := range keywords {
|
for _, kw := range keywords {
|
||||||
t.Run(kw.Keyword, func(t *testing.T) {
|
t.Run(kw.Keyword, func(t *testing.T) {
|
||||||
total, res, langs, err := indexer.Search(context.TODO(), kw.RepoIDs, "", kw.Keyword, 1, 10, true)
|
total, res, langs, err := indexer.Search(context.TODO(), &internal.SearchOptions{
|
||||||
|
RepoIDs: kw.RepoIDs,
|
||||||
|
Keyword: kw.Keyword,
|
||||||
|
Paginator: &db.ListOptions{
|
||||||
|
Page: 1,
|
||||||
|
PageSize: 10,
|
||||||
|
},
|
||||||
|
IsKeywordFuzzy: true,
|
||||||
|
})
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
assert.Len(t, kw.IDs, int(total))
|
assert.Len(t, kw.IDs, int(total))
|
||||||
assert.Len(t, langs, kw.Langs)
|
assert.Len(t, langs, kw.Langs)
|
||||||
|
|
|
@ -7,6 +7,7 @@ import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
|
"code.gitea.io/gitea/models/db"
|
||||||
repo_model "code.gitea.io/gitea/models/repo"
|
repo_model "code.gitea.io/gitea/models/repo"
|
||||||
"code.gitea.io/gitea/modules/indexer/internal"
|
"code.gitea.io/gitea/modules/indexer/internal"
|
||||||
)
|
)
|
||||||
|
@ -16,7 +17,17 @@ type Indexer interface {
|
||||||
internal.Indexer
|
internal.Indexer
|
||||||
Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *RepoChanges) error
|
Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *RepoChanges) error
|
||||||
Delete(ctx context.Context, repoID int64) error
|
Delete(ctx context.Context, repoID int64) error
|
||||||
Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isFuzzy bool) (int64, []*SearchResult, []*SearchResultLanguages, error)
|
Search(ctx context.Context, opts *SearchOptions) (int64, []*SearchResult, []*SearchResultLanguages, error)
|
||||||
|
}
|
||||||
|
|
||||||
|
type SearchOptions struct {
|
||||||
|
RepoIDs []int64
|
||||||
|
Keyword string
|
||||||
|
Language string
|
||||||
|
|
||||||
|
IsKeywordFuzzy bool
|
||||||
|
|
||||||
|
db.Paginator
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewDummyIndexer returns a dummy indexer
|
// NewDummyIndexer returns a dummy indexer
|
||||||
|
@ -38,6 +49,6 @@ func (d *dummyIndexer) Delete(ctx context.Context, repoID int64) error {
|
||||||
return fmt.Errorf("indexer is not ready")
|
return fmt.Errorf("indexer is not ready")
|
||||||
}
|
}
|
||||||
|
|
||||||
func (d *dummyIndexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isFuzzy bool) (int64, []*SearchResult, []*SearchResultLanguages, error) {
|
func (d *dummyIndexer) Search(ctx context.Context, opts *SearchOptions) (int64, []*SearchResult, []*SearchResultLanguages, error) {
|
||||||
return 0, nil, nil, fmt.Errorf("indexer is not ready")
|
return 0, nil, nil, fmt.Errorf("indexer is not ready")
|
||||||
}
|
}
|
||||||
|
|
|
@ -32,6 +32,8 @@ type ResultLine struct {
|
||||||
|
|
||||||
type SearchResultLanguages = internal.SearchResultLanguages
|
type SearchResultLanguages = internal.SearchResultLanguages
|
||||||
|
|
||||||
|
type SearchOptions = internal.SearchOptions
|
||||||
|
|
||||||
func indices(content string, selectionStartIndex, selectionEndIndex int) (int, int) {
|
func indices(content string, selectionStartIndex, selectionEndIndex int) (int, int) {
|
||||||
startIndex := selectionStartIndex
|
startIndex := selectionStartIndex
|
||||||
numLinesBefore := 0
|
numLinesBefore := 0
|
||||||
|
@ -125,12 +127,12 @@ func searchResult(result *internal.SearchResult, startIndex, endIndex int) (*Res
|
||||||
|
|
||||||
// PerformSearch perform a search on a repository
|
// PerformSearch perform a search on a repository
|
||||||
// if isFuzzy is true set the Damerau-Levenshtein distance from 0 to 2
|
// if isFuzzy is true set the Damerau-Levenshtein distance from 0 to 2
|
||||||
func PerformSearch(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isFuzzy bool) (int, []*Result, []*internal.SearchResultLanguages, error) {
|
func PerformSearch(ctx context.Context, opts *SearchOptions) (int, []*Result, []*SearchResultLanguages, error) {
|
||||||
if len(keyword) == 0 {
|
if opts == nil || len(opts.Keyword) == 0 {
|
||||||
return 0, nil, nil, nil
|
return 0, nil, nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
total, results, resultLanguages, err := (*globalIndexer.Load()).Search(ctx, repoIDs, language, keyword, page, pageSize, isFuzzy)
|
total, results, resultLanguages, err := (*globalIndexer.Load()).Search(ctx, opts)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, nil, nil, err
|
return 0, nil, nil, err
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,6 +6,7 @@ package explore
|
||||||
import (
|
import (
|
||||||
"net/http"
|
"net/http"
|
||||||
|
|
||||||
|
"code.gitea.io/gitea/models/db"
|
||||||
repo_model "code.gitea.io/gitea/models/repo"
|
repo_model "code.gitea.io/gitea/models/repo"
|
||||||
"code.gitea.io/gitea/modules/base"
|
"code.gitea.io/gitea/modules/base"
|
||||||
code_indexer "code.gitea.io/gitea/modules/indexer/code"
|
code_indexer "code.gitea.io/gitea/modules/indexer/code"
|
||||||
|
@ -76,7 +77,16 @@ func Code(ctx *context.Context) {
|
||||||
)
|
)
|
||||||
|
|
||||||
if (len(repoIDs) > 0) || isAdmin {
|
if (len(repoIDs) > 0) || isAdmin {
|
||||||
total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(ctx, repoIDs, language, keyword, page, setting.UI.RepoSearchPagingNum, isFuzzy)
|
total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(ctx, &code_indexer.SearchOptions{
|
||||||
|
RepoIDs: repoIDs,
|
||||||
|
Keyword: keyword,
|
||||||
|
IsKeywordFuzzy: isFuzzy,
|
||||||
|
Language: language,
|
||||||
|
Paginator: &db.ListOptions{
|
||||||
|
Page: page,
|
||||||
|
PageSize: setting.UI.RepoSearchPagingNum,
|
||||||
|
},
|
||||||
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if code_indexer.IsAvailable(ctx) {
|
if code_indexer.IsAvailable(ctx) {
|
||||||
ctx.ServerError("SearchResults", err)
|
ctx.ServerError("SearchResults", err)
|
||||||
|
|
|
@ -6,6 +6,7 @@ package repo
|
||||||
import (
|
import (
|
||||||
"net/http"
|
"net/http"
|
||||||
|
|
||||||
|
"code.gitea.io/gitea/models/db"
|
||||||
"code.gitea.io/gitea/modules/base"
|
"code.gitea.io/gitea/modules/base"
|
||||||
code_indexer "code.gitea.io/gitea/modules/indexer/code"
|
code_indexer "code.gitea.io/gitea/modules/indexer/code"
|
||||||
"code.gitea.io/gitea/modules/setting"
|
"code.gitea.io/gitea/modules/setting"
|
||||||
|
@ -41,8 +42,16 @@ func Search(ctx *context.Context) {
|
||||||
page = 1
|
page = 1
|
||||||
}
|
}
|
||||||
|
|
||||||
total, searchResults, searchResultLanguages, err := code_indexer.PerformSearch(ctx, []int64{ctx.Repo.Repository.ID},
|
total, searchResults, searchResultLanguages, err := code_indexer.PerformSearch(ctx, &code_indexer.SearchOptions{
|
||||||
language, keyword, page, setting.UI.RepoSearchPagingNum, isFuzzy)
|
RepoIDs: []int64{ctx.Repo.Repository.ID},
|
||||||
|
Keyword: keyword,
|
||||||
|
IsKeywordFuzzy: isFuzzy,
|
||||||
|
Language: language,
|
||||||
|
Paginator: &db.ListOptions{
|
||||||
|
Page: page,
|
||||||
|
PageSize: setting.UI.RepoSearchPagingNum,
|
||||||
|
},
|
||||||
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if code_indexer.IsAvailable(ctx) {
|
if code_indexer.IsAvailable(ctx) {
|
||||||
ctx.ServerError("SearchResults", err)
|
ctx.ServerError("SearchResults", err)
|
||||||
|
|
|
@ -6,6 +6,7 @@ package user
|
||||||
import (
|
import (
|
||||||
"net/http"
|
"net/http"
|
||||||
|
|
||||||
|
"code.gitea.io/gitea/models/db"
|
||||||
repo_model "code.gitea.io/gitea/models/repo"
|
repo_model "code.gitea.io/gitea/models/repo"
|
||||||
"code.gitea.io/gitea/modules/base"
|
"code.gitea.io/gitea/modules/base"
|
||||||
code_indexer "code.gitea.io/gitea/modules/indexer/code"
|
code_indexer "code.gitea.io/gitea/modules/indexer/code"
|
||||||
|
@ -74,7 +75,16 @@ func CodeSearch(ctx *context.Context) {
|
||||||
)
|
)
|
||||||
|
|
||||||
if len(repoIDs) > 0 {
|
if len(repoIDs) > 0 {
|
||||||
total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(ctx, repoIDs, language, keyword, page, setting.UI.RepoSearchPagingNum, isFuzzy)
|
total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(ctx, &code_indexer.SearchOptions{
|
||||||
|
RepoIDs: repoIDs,
|
||||||
|
Keyword: keyword,
|
||||||
|
IsKeywordFuzzy: isFuzzy,
|
||||||
|
Language: language,
|
||||||
|
Paginator: &db.ListOptions{
|
||||||
|
Page: page,
|
||||||
|
PageSize: setting.UI.RepoSearchPagingNum,
|
||||||
|
},
|
||||||
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if code_indexer.IsAvailable(ctx) {
|
if code_indexer.IsAvailable(ctx) {
|
||||||
ctx.ServerError("SearchResults", err)
|
ctx.ServerError("SearchResults", err)
|
||||||
|
|
Loading…
Reference in New Issue