mirror of https://github.com/go-gitea/gitea.git
Fix issues indexer document mapping (#25619)
Fix regression of #5363 (so long ago). The old code definded a document mapping for `issueIndexerDocType`, and assigned it to `BleveIndexerData` as its type. (`BleveIndexerData` has been renamed to `IndexerData` in #25174, but nothing more.) But the old code never used `BleveIndexerData`, it wrote the index with an anonymous struct type. Nonetheless, bleve would use the default auto-mapping for struct it didn't know, so the indexer still worked. This means the custom document mapping was always dead code. The custom document mapping is not useless, it can reduce index storage, this PR brings it back and disable default mapping to prevent it from happening again. Since `IndexerData`(`BleveIndexerData`) has JSON tags, and bleve uses them first, so we should use `repo_id` as the field name instead of `RepoID`. I did a test to compare the storage size before and after this, with about 3k real comments that were migrated from some public repos. Before: ```text [ 160] . ├── [ 42] index_meta.json ├── [ 13] rupture_meta.json └── [ 128] store ├── [6.9M] 00000000005d.zap └── [256K] root.bolt ``` After: ```text [ 160] . ├── [ 42] index_meta.json ├── [ 13] rupture_meta.json └── [ 128] store ├── [3.5M] 000000000065.zap └── [256K] root.bolt ``` It saves about half the storage space. --------- Co-authored-by: Giteabot <teabot@gitea.io>
This commit is contained in:
parent
dae022ab2a
commit
9958642502
|
@ -23,7 +23,7 @@ import (
|
||||||
const (
|
const (
|
||||||
issueIndexerAnalyzer = "issueIndexer"
|
issueIndexerAnalyzer = "issueIndexer"
|
||||||
issueIndexerDocType = "issueIndexerDocType"
|
issueIndexerDocType = "issueIndexerDocType"
|
||||||
issueIndexerLatestVersion = 2
|
issueIndexerLatestVersion = 3
|
||||||
)
|
)
|
||||||
|
|
||||||
// numericEqualityQuery a numeric equality query for the given value and field
|
// numericEqualityQuery a numeric equality query for the given value and field
|
||||||
|
@ -67,15 +67,16 @@ func generateIssueIndexMapping() (mapping.IndexMapping, error) {
|
||||||
docMapping := bleve.NewDocumentMapping()
|
docMapping := bleve.NewDocumentMapping()
|
||||||
|
|
||||||
numericFieldMapping := bleve.NewNumericFieldMapping()
|
numericFieldMapping := bleve.NewNumericFieldMapping()
|
||||||
|
numericFieldMapping.Store = false
|
||||||
numericFieldMapping.IncludeInAll = false
|
numericFieldMapping.IncludeInAll = false
|
||||||
docMapping.AddFieldMappingsAt("RepoID", numericFieldMapping)
|
docMapping.AddFieldMappingsAt("repo_id", numericFieldMapping)
|
||||||
|
|
||||||
textFieldMapping := bleve.NewTextFieldMapping()
|
textFieldMapping := bleve.NewTextFieldMapping()
|
||||||
textFieldMapping.Store = false
|
textFieldMapping.Store = false
|
||||||
textFieldMapping.IncludeInAll = false
|
textFieldMapping.IncludeInAll = false
|
||||||
docMapping.AddFieldMappingsAt("Title", textFieldMapping)
|
docMapping.AddFieldMappingsAt("title", textFieldMapping)
|
||||||
docMapping.AddFieldMappingsAt("Content", textFieldMapping)
|
docMapping.AddFieldMappingsAt("content", textFieldMapping)
|
||||||
docMapping.AddFieldMappingsAt("Comments", textFieldMapping)
|
docMapping.AddFieldMappingsAt("comments", textFieldMapping)
|
||||||
|
|
||||||
if err := addUnicodeNormalizeTokenFilter(mapping); err != nil {
|
if err := addUnicodeNormalizeTokenFilter(mapping); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
@ -91,6 +92,7 @@ func generateIssueIndexMapping() (mapping.IndexMapping, error) {
|
||||||
mapping.DefaultAnalyzer = issueIndexerAnalyzer
|
mapping.DefaultAnalyzer = issueIndexerAnalyzer
|
||||||
mapping.AddDocumentMapping(issueIndexerDocType, docMapping)
|
mapping.AddDocumentMapping(issueIndexerDocType, docMapping)
|
||||||
mapping.AddDocumentMapping("_all", bleve.NewDocumentDisabledMapping())
|
mapping.AddDocumentMapping("_all", bleve.NewDocumentDisabledMapping())
|
||||||
|
mapping.DefaultMapping = bleve.NewDocumentDisabledMapping() // disable default mapping, avoid indexing unexpected structs
|
||||||
|
|
||||||
return mapping, nil
|
return mapping, nil
|
||||||
}
|
}
|
||||||
|
@ -116,17 +118,7 @@ func NewIndexer(indexDir string) *Indexer {
|
||||||
func (b *Indexer) Index(_ context.Context, issues []*internal.IndexerData) error {
|
func (b *Indexer) Index(_ context.Context, issues []*internal.IndexerData) error {
|
||||||
batch := inner_bleve.NewFlushingBatch(b.inner.Indexer, maxBatchSize)
|
batch := inner_bleve.NewFlushingBatch(b.inner.Indexer, maxBatchSize)
|
||||||
for _, issue := range issues {
|
for _, issue := range issues {
|
||||||
if err := batch.Index(indexer_internal.Base36(issue.ID), struct {
|
if err := batch.Index(indexer_internal.Base36(issue.ID), (*IndexerData)(issue)); err != nil {
|
||||||
RepoID int64
|
|
||||||
Title string
|
|
||||||
Content string
|
|
||||||
Comments []string
|
|
||||||
}{
|
|
||||||
RepoID: issue.RepoID,
|
|
||||||
Title: issue.Title,
|
|
||||||
Content: issue.Content,
|
|
||||||
Comments: issue.Comments,
|
|
||||||
}); err != nil {
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -149,7 +141,7 @@ func (b *Indexer) Delete(_ context.Context, ids ...int64) error {
|
||||||
func (b *Indexer) Search(ctx context.Context, keyword string, repoIDs []int64, limit, start int) (*internal.SearchResult, error) {
|
func (b *Indexer) Search(ctx context.Context, keyword string, repoIDs []int64, limit, start int) (*internal.SearchResult, error) {
|
||||||
var repoQueriesP []*query.NumericRangeQuery
|
var repoQueriesP []*query.NumericRangeQuery
|
||||||
for _, repoID := range repoIDs {
|
for _, repoID := range repoIDs {
|
||||||
repoQueriesP = append(repoQueriesP, numericEqualityQuery(repoID, "RepoID"))
|
repoQueriesP = append(repoQueriesP, numericEqualityQuery(repoID, "repo_id"))
|
||||||
}
|
}
|
||||||
repoQueries := make([]query.Query, len(repoQueriesP))
|
repoQueries := make([]query.Query, len(repoQueriesP))
|
||||||
for i, v := range repoQueriesP {
|
for i, v := range repoQueriesP {
|
||||||
|
@ -159,9 +151,9 @@ func (b *Indexer) Search(ctx context.Context, keyword string, repoIDs []int64, l
|
||||||
indexerQuery := bleve.NewConjunctionQuery(
|
indexerQuery := bleve.NewConjunctionQuery(
|
||||||
bleve.NewDisjunctionQuery(repoQueries...),
|
bleve.NewDisjunctionQuery(repoQueries...),
|
||||||
bleve.NewDisjunctionQuery(
|
bleve.NewDisjunctionQuery(
|
||||||
newMatchPhraseQuery(keyword, "Title", issueIndexerAnalyzer),
|
newMatchPhraseQuery(keyword, "title", issueIndexerAnalyzer),
|
||||||
newMatchPhraseQuery(keyword, "Content", issueIndexerAnalyzer),
|
newMatchPhraseQuery(keyword, "content", issueIndexerAnalyzer),
|
||||||
newMatchPhraseQuery(keyword, "Comments", issueIndexerAnalyzer),
|
newMatchPhraseQuery(keyword, "comments", issueIndexerAnalyzer),
|
||||||
))
|
))
|
||||||
search := bleve.NewSearchRequestOptions(indexerQuery, limit, start, false)
|
search := bleve.NewSearchRequestOptions(indexerQuery, limit, start, false)
|
||||||
search.SortBy([]string{"-_score"})
|
search.SortBy([]string{"-_score"})
|
||||||
|
|
Loading…
Reference in New Issue