@@ -320,26 +320,9 @@ func (d *indexData) memoryUse() int {
320320 return sz
321321}
322322
323- const maxUInt32 = 0xffffffff
324-
325- func min2Index (xs []uint32 ) (idx0 , idx1 int ) {
326- min0 , min1 := uint32 (maxUInt32 ), uint32 (maxUInt32 )
327- for i , x := range xs {
328- if x <= min0 {
329- idx0 , idx1 = i , idx0
330- min0 , min1 = x , min0
331- } else if x <= min1 {
332- idx1 = i
333- min1 = x
334- }
335- }
336- return
337- }
338-
339323// findSelectiveNgrams returns two ngrams to pass to the distance iterator, chosen to
340- // produce a small file intersection. It finds the two lowest frequency ngrams, making
341- // sure to maximize the distance between them in case of ties. It avoids overlapping
342- // trigrams to keep their intersection as small as possible.
324+ // produce a small file intersection. It finds the two lowest frequency ngrams, but avoids
325+ // overlapping trigrams to keep their intersection as small as possible.
343326//
344327// Invariant: first will always have a smaller index than last.
345328func findSelectiveNgrams (ngramOffs []runeNgramOff , indexMap []int , frequencies []uint32 ) (first , last runeNgramOff ) {
@@ -361,27 +344,24 @@ func findSelectiveNgrams(ngramOffs []runeNgramOff, indexMap []int, frequencies [
361344 return
362345}
363346
347+ const maxUInt32 = 0xffffffff
348+
364349func minFrequencyNgramOffsets (ngramOffs []runeNgramOff , frequencies []uint32 ) (first , last runeNgramOff ) {
365- firstI , lastI := min2Index (frequencies )
366- // If the frequencies are equal lets maximise distance in the query
367- // string. This optimization normally triggers for long repeated trigrams
368- // in a string, eg a query like "AAAAA..."
369- if frequencies [firstI ] == frequencies [lastI ] {
370- for i , freq := range frequencies {
371- if freq != frequencies [firstI ] {
372- continue
373- }
374- if ngramOffs [i ].index < ngramOffs [firstI ].index {
375- firstI = i
376- }
377- if ngramOffs [i ].index > ngramOffs [lastI ].index {
378- lastI = i
379- }
350+ // Find the two lowest frequency ngrams.
351+ idx0 , idx1 := 0 , 0
352+ min0 , min1 := uint32 (maxUInt32 ), uint32 (maxUInt32 )
353+ for i , x := range frequencies {
354+ if x <= min0 {
355+ idx0 , idx1 = i , idx0
356+ min0 , min1 = x , min0
357+ } else if x <= min1 {
358+ idx1 = i
359+ min1 = x
380360 }
381361 }
382362
383- first = ngramOffs [firstI ]
384- last = ngramOffs [lastI ]
363+ first = ngramOffs [idx0 ]
364+ last = ngramOffs [idx1 ]
385365
386366 // Ensure first appears before last as a helpful invariant.
387367 if first .index > last .index {
0 commit comments