Skip to content

Commit 93f7b0c

Browse files
authored
matchtree: capture Stats before pruning (#607)
We now call updateStats upto twice per shard search. The intention of this is to capture statistics before pruning the matchtree. Previously we would of done work in creating a matchtree but would then prune those items away and would then never capture those statistics. In practice that work was reading just one or two varint (the size of a posting list) so likely had minimal impact on the reported statistics. However, in the next commit we want to introduce a statistic which is recorded even if we generate a noMatchTree. The main technical part of this commit is ensuring all existing updateStats functions can be called twice without overcounting. Test Plan: go test
1 parent b9e6d94 commit 93f7b0c

4 files changed

Lines changed: 31 additions & 13 deletions

File tree

eval.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,9 @@ func (d *indexData) Search(ctx context.Context, q query.Q, opts *SearchOptions)
181181
return nil, err
182182
}
183183

184+
// Capture the costs of construction before pruning
185+
updateMatchTreeStats(mt, &res.Stats)
186+
184187
mt, err = pruneMatchTree(mt)
185188
if err != nil {
186189
return nil, err
@@ -380,11 +383,8 @@ nextFileMatch:
380383
}
381384
}
382385

383-
visitMatchTree(mt, func(mt matchTree) {
384-
if atom, ok := mt.(interface{ updateStats(*Stats) }); ok {
385-
atom.updateStats(&res.Stats)
386-
}
387-
})
386+
// Update stats based on work done during document search.
387+
updateMatchTreeStats(mt, &res.Stats)
388388

389389
// If document ranking is enabled, then we can rank and truncate the files to save memory.
390390
if limit := opts.MaxDocDisplayCount; opts.UseDocumentRanks && limit > 0 && limit < len(res.Files) {

hititer.go

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -183,18 +183,19 @@ func (i *inMemoryIterator) next(limit uint32) {
183183
// compressedPostingIterator goes over a delta varint encoded posting
184184
// list.
185185
type compressedPostingIterator struct {
186-
blob, orig []byte
187-
_first uint32
188-
what ngram
186+
blob []byte
187+
indexBytesLoaded int
188+
_first uint32
189+
what ngram
189190
}
190191

191192
func newCompressedPostingIterator(b []byte, w ngram) *compressedPostingIterator {
192193
d, sz := binary.Uvarint(b)
193194
return &compressedPostingIterator{
194-
_first: uint32(d),
195-
blob: b[sz:],
196-
orig: b,
197-
what: w,
195+
_first: uint32(d),
196+
blob: b[sz:],
197+
indexBytesLoaded: sz,
198+
what: w,
198199
}
199200
}
200201

@@ -216,6 +217,7 @@ func (i *compressedPostingIterator) next(limit uint32) {
216217
for i._first <= limit && len(i.blob) > 0 {
217218
delta, sz := binary.Uvarint(i.blob)
218219
i._first += uint32(delta)
220+
i.indexBytesLoaded += sz
219221
i.blob = i.blob[sz:]
220222
}
221223

@@ -225,7 +227,8 @@ func (i *compressedPostingIterator) next(limit uint32) {
225227
}
226228

227229
func (i *compressedPostingIterator) updateStats(s *Stats) {
228-
s.IndexBytesLoaded += int64(len(i.orig) - len(i.blob))
230+
s.IndexBytesLoaded += int64(i.indexBytesLoaded)
231+
i.indexBytesLoaded = 0
229232
}
230233

231234
// mergingIterator forms the merge of a set of hitIterators, to

matchiter.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,10 @@ type matchIterator interface {
6363
docIterator
6464

6565
candidates() []*candidateMatch
66+
67+
// updateStats is called twice. After matchtree construction and after
68+
// searching is done. Implementations must take care to not report
69+
// statistics twice.
6670
updateStats(*Stats)
6771
}
6872

@@ -150,6 +154,7 @@ func (i *ngramDocIterator) prepare(nextDoc uint32) {
150154
func (i *ngramDocIterator) updateStats(s *Stats) {
151155
i.iter.updateStats(s)
152156
s.NgramMatches += i.matchCount
157+
i.matchCount = 0
153158
}
154159

155160
func (i *ngramDocIterator) candidates() []*candidateMatch {

matchtree.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -520,6 +520,16 @@ func visitMatchTree(t matchTree, f func(matchTree)) {
520520
}
521521
}
522522

523+
// updateMatchTreeStats calls updateStats on all atoms in mt which have that
524+
// function defined.
525+
func updateMatchTreeStats(mt matchTree, stats *Stats) {
526+
visitMatchTree(mt, func(mt matchTree) {
527+
if atom, ok := mt.(interface{ updateStats(*Stats) }); ok {
528+
atom.updateStats(stats)
529+
}
530+
})
531+
}
532+
523533
// visitMatches visits all atoms which can contribute matches. Note: This
524534
// skips noVisitMatchTree.
525535
func visitMatches(t matchTree, known map[matchTree]bool, f func(matchTree)) {

0 commit comments

Comments
 (0)