mirror of https://github.com/go-gitea/gitea.git
186 lines
5.1 KiB
Go
186 lines
5.1 KiB
Go
// Copyright (c) 2017 Couchbase, Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package scorch
|
|
|
|
import (
|
|
"bytes"
|
|
"fmt"
|
|
"reflect"
|
|
"sync/atomic"
|
|
|
|
"github.com/blevesearch/bleve/index"
|
|
"github.com/blevesearch/bleve/index/scorch/segment"
|
|
"github.com/blevesearch/bleve/size"
|
|
)
|
|
|
|
var reflectStaticSizeIndexSnapshotTermFieldReader int
|
|
|
|
func init() {
|
|
var istfr IndexSnapshotTermFieldReader
|
|
reflectStaticSizeIndexSnapshotTermFieldReader = int(reflect.TypeOf(istfr).Size())
|
|
}
|
|
|
|
type IndexSnapshotTermFieldReader struct {
|
|
term []byte
|
|
field string
|
|
snapshot *IndexSnapshot
|
|
dicts []segment.TermDictionary
|
|
postings []segment.PostingsList
|
|
iterators []segment.PostingsIterator
|
|
segmentOffset int
|
|
includeFreq bool
|
|
includeNorm bool
|
|
includeTermVectors bool
|
|
currPosting segment.Posting
|
|
currID index.IndexInternalID
|
|
}
|
|
|
|
func (i *IndexSnapshotTermFieldReader) Size() int {
|
|
sizeInBytes := reflectStaticSizeIndexSnapshotTermFieldReader + size.SizeOfPtr +
|
|
len(i.term) +
|
|
len(i.field) +
|
|
len(i.currID)
|
|
|
|
for _, entry := range i.postings {
|
|
sizeInBytes += entry.Size()
|
|
}
|
|
|
|
for _, entry := range i.iterators {
|
|
sizeInBytes += entry.Size()
|
|
}
|
|
|
|
if i.currPosting != nil {
|
|
sizeInBytes += i.currPosting.Size()
|
|
}
|
|
|
|
return sizeInBytes
|
|
}
|
|
|
|
func (i *IndexSnapshotTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) {
|
|
rv := preAlloced
|
|
if rv == nil {
|
|
rv = &index.TermFieldDoc{}
|
|
}
|
|
// find the next hit
|
|
for i.segmentOffset < len(i.iterators) {
|
|
next, err := i.iterators[i.segmentOffset].Next()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if next != nil {
|
|
// make segment number into global number by adding offset
|
|
globalOffset := i.snapshot.offsets[i.segmentOffset]
|
|
nnum := next.Number()
|
|
rv.ID = docNumberToBytes(rv.ID, nnum+globalOffset)
|
|
i.postingToTermFieldDoc(next, rv)
|
|
|
|
i.currID = rv.ID
|
|
i.currPosting = next
|
|
return rv, nil
|
|
}
|
|
i.segmentOffset++
|
|
}
|
|
return nil, nil
|
|
}
|
|
|
|
func (i *IndexSnapshotTermFieldReader) postingToTermFieldDoc(next segment.Posting, rv *index.TermFieldDoc) {
|
|
if i.includeFreq {
|
|
rv.Freq = next.Frequency()
|
|
}
|
|
if i.includeNorm {
|
|
rv.Norm = next.Norm()
|
|
}
|
|
if i.includeTermVectors {
|
|
locs := next.Locations()
|
|
if cap(rv.Vectors) < len(locs) {
|
|
rv.Vectors = make([]*index.TermFieldVector, len(locs))
|
|
backing := make([]index.TermFieldVector, len(locs))
|
|
for i := range backing {
|
|
rv.Vectors[i] = &backing[i]
|
|
}
|
|
}
|
|
rv.Vectors = rv.Vectors[:len(locs)]
|
|
for i, loc := range locs {
|
|
*rv.Vectors[i] = index.TermFieldVector{
|
|
Start: loc.Start(),
|
|
End: loc.End(),
|
|
Pos: loc.Pos(),
|
|
ArrayPositions: loc.ArrayPositions(),
|
|
Field: loc.Field(),
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
func (i *IndexSnapshotTermFieldReader) Advance(ID index.IndexInternalID, preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) {
|
|
// FIXME do something better
|
|
// for now, if we need to seek backwards, then restart from the beginning
|
|
if i.currPosting != nil && bytes.Compare(i.currID, ID) >= 0 {
|
|
i2, err := i.snapshot.TermFieldReader(i.term, i.field,
|
|
i.includeFreq, i.includeNorm, i.includeTermVectors)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
*i = *(i2.(*IndexSnapshotTermFieldReader))
|
|
}
|
|
num, err := docInternalToNumber(ID)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("error converting to doc number % x - %v", ID, err)
|
|
}
|
|
segIndex, ldocNum := i.snapshot.segmentIndexAndLocalDocNumFromGlobal(num)
|
|
if segIndex >= len(i.snapshot.segment) {
|
|
return nil, fmt.Errorf("computed segment index %d out of bounds %d",
|
|
segIndex, len(i.snapshot.segment))
|
|
}
|
|
// skip directly to the target segment
|
|
i.segmentOffset = segIndex
|
|
next, err := i.iterators[i.segmentOffset].Advance(ldocNum)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if next == nil {
|
|
// we jumped directly to the segment that should have contained it
|
|
// but it wasn't there, so reuse Next() which should correctly
|
|
// get the next hit after it (we moved i.segmentOffset)
|
|
return i.Next(preAlloced)
|
|
}
|
|
|
|
if preAlloced == nil {
|
|
preAlloced = &index.TermFieldDoc{}
|
|
}
|
|
preAlloced.ID = docNumberToBytes(preAlloced.ID, next.Number()+
|
|
i.snapshot.offsets[segIndex])
|
|
i.postingToTermFieldDoc(next, preAlloced)
|
|
i.currID = preAlloced.ID
|
|
i.currPosting = next
|
|
return preAlloced, nil
|
|
}
|
|
|
|
func (i *IndexSnapshotTermFieldReader) Count() uint64 {
|
|
var rv uint64
|
|
for _, posting := range i.postings {
|
|
rv += posting.Count()
|
|
}
|
|
return rv
|
|
}
|
|
|
|
func (i *IndexSnapshotTermFieldReader) Close() error {
|
|
if i.snapshot != nil {
|
|
atomic.AddUint64(&i.snapshot.parent.stats.TotTermSearchersFinished, uint64(1))
|
|
i.snapshot.recycleTermFieldReader(i)
|
|
}
|
|
return nil
|
|
}
|