3535import org .apache .lucene .index .DocValues ;
3636import org .apache .lucene .index .IndexReader ;
3737import org .apache .lucene .index .LeafReaderContext ;
38+ import org .apache .lucene .index .NumericDocValues ;
3839import org .apache .lucene .index .SortedDocValues ;
3940import org .apache .lucene .index .SortedSetDocValues ;
4041import org .apache .lucene .index .Terms ;
41- import org .apache .lucene .search . MatchAllDocsQuery ;
42+ import org .apache .lucene .index . TermsEnum ;
4243import org .apache .lucene .search .Weight ;
4344import org .apache .lucene .util .ArrayUtil ;
4445import org .apache .lucene .util .BytesRef ;
4950import org .opensearch .common .util .LongHash ;
5051import org .opensearch .core .common .io .stream .StreamOutput ;
5152import org .opensearch .core .xcontent .XContentBuilder ;
53+ import org .opensearch .index .mapper .DocCountFieldMapper ;
5254import org .opensearch .search .DocValueFormat ;
5355import org .opensearch .search .aggregations .AggregationExecutionException ;
5456import org .opensearch .search .aggregations .Aggregator ;
6466import org .opensearch .search .aggregations .bucket .terms .SignificanceLookup .BackgroundFrequencyForBytes ;
6567import org .opensearch .search .aggregations .bucket .terms .heuristic .SignificanceHeuristic ;
6668import org .opensearch .search .aggregations .support .ValuesSource ;
67- import org .opensearch .search .aggregations .support .ValuesSource .Bytes .WithOrdinals ;
6869import org .opensearch .search .internal .SearchContext ;
6970
7071import java .io .IOException ;
7879
7980import static org .opensearch .search .aggregations .InternalOrder .isKeyOrder ;
8081import static org .apache .lucene .index .SortedSetDocValues .NO_MORE_ORDS ;
82+ import static org .apache .lucene .search .DocIdSetIterator .NO_MORE_DOCS ;
8183
8284/**
8385 * An aggregator of string values that relies on global ordinals in order to build buckets.
@@ -94,6 +96,8 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
9496 private final LongPredicate acceptedGlobalOrdinals ;
9597 private final long valueCount ;
9698
99+ private final String fieldName ;
100+
97101 private Weight weight ;
98102 private final GlobalOrdLookupFunction lookupGlobalOrd ;
99103 protected final CollectionStrategy collectionStrategy ;
@@ -146,6 +150,7 @@ public GlobalOrdinalsStringTermsAggregator(
146150 return new DenseGlobalOrds ();
147151 });
148152 }
153+ this .fieldName = ((ValuesSource .Bytes .WithOrdinals .FieldData ) valuesSource ).indexFieldData .getFieldName ();
149154 }
150155
151156 String descriptCollectionStrategy () {
@@ -156,22 +161,80 @@ public void setWeight(Weight weight) {
156161 this .weight = weight ;
157162 }
158163
159- @ Override
160- public LeafBucketCollector getLeafCollector (LeafReaderContext ctx , LeafBucketCollector sub ) throws IOException {
161- if (weight != null && weight .getQuery () instanceof MatchAllDocsQuery ) {
162- if ((weight .count (ctx ) == 0 )
163- && Terms .getTerms (ctx .reader (), String .valueOf (((WithOrdinals .FieldData ) valuesSource ).indexFieldData .getFieldName ()))
164- .size () == 0 ) {
165- return LeafBucketCollector .NO_OP_COLLECTOR ;
166- // } else if (weight.count(ctx) == ctx.reader().maxDoc() && weight.getQuery() instanceof MatchAllDocsQuery) {
167- // no deleted documents & top level query matches everything
168- // iterate over the terms - doc frequency for each termsEnum directly
169- // return appropriate LeafCollector
164+ /**
165+ Collects term frequencies for a given field from a LeafReaderContext.
166+ @param ctx The LeafReaderContext to collect terms from
167+ @param ords The SortedSetDocValues for the field's ordinals
168+ @param ordCountConsumer A consumer to accept collected term frequencies
169+ @return A LeafBucketCollector implementation that throws an exception, since collection is complete
170+ @throws IOException If an I/O error occurs during reading */
171+ LeafBucketCollector termDocFreqCollector (LeafReaderContext ctx , SortedSetDocValues ords , BiConsumer <Long , Integer > ordCountConsumer )
172+ throws IOException {
173+ // long n0 = System.nanoTime(), n1, n2, n3, n4, n5 = 0;
174+ if (weight .count (ctx ) != ctx .reader ().maxDoc ()) {
175+ // Top-level query does not match all docs in this segment.
176+ return null ;
177+ }
178+ // n1 = System.nanoTime();
179+
180+ Terms aggTerms = ctx .reader ().terms (this .fieldName );
181+ if (aggTerms == null ) {
182+ // Field is not indexed.
183+ return null ;
184+ }
185+ // n2 = System.nanoTime();
186+ NumericDocValues docCountValues = DocValues .getNumeric (ctx .reader (), DocCountFieldMapper .NAME );
187+ if (docCountValues .nextDoc () != NO_MORE_DOCS ) {
188+ // This segment has at least one document with the _doc_count field.
189+ return null ;
190+ }
191+ // n3 = System.nanoTime();
192+ TermsEnum indexTermsEnum = aggTerms .iterator ();
193+ BytesRef indexTerm = indexTermsEnum .next ();
194+ TermsEnum ordinalTermsEnum = ords .termsEnum ();
195+ BytesRef ordinalTerm = ordinalTermsEnum .next ();
196+ // n4 = System.nanoTime();
197+ while (indexTerm != null && ordinalTerm != null ) {
198+ int compare = indexTerm .compareTo (ordinalTerm );
199+ if (compare == 0 ) {
200+ if (acceptedGlobalOrdinals .test (ordinalTermsEnum .ord ())) {
201+ ordCountConsumer .accept (ordinalTermsEnum .ord (), indexTermsEnum .docFreq ());
202+ }
203+ indexTerm = indexTermsEnum .next ();
204+ ordinalTerm = ordinalTermsEnum .next ();
205+ } else if (compare < 0 ) {
206+ indexTerm = indexTermsEnum .next ();
207+ } else {
208+ ordinalTerm = ordinalTermsEnum .next ();
170209 }
210+ // n5 = System.nanoTime();
171211 }
212+ // logger.info((n1 - n0) + " " + (n2 - n1) + " " + (n3 - n2) + " " + (n4 - n3) + " " + (n5 - n4));
213+ // return new LeafBucketCollector() {
214+ // @Override
215+ // public void collect(int doc, long owningBucketOrd) {
216+ // throw new CollectionTerminatedException();
217+ // }
218+ // };
219+ return LeafBucketCollector .NO_OP_COLLECTOR ;
220+ }
172221
222+ @ Override
223+ public LeafBucketCollector getLeafCollector (LeafReaderContext ctx , LeafBucketCollector sub ) throws IOException {
173224 SortedSetDocValues globalOrds = valuesSource .globalOrdinalsValues (ctx );
174225 collectionStrategy .globalOrdsReady (globalOrds );
226+
227+ if (collectionStrategy instanceof DenseGlobalOrds && sub == LeafBucketCollector .NO_OP_COLLECTOR ) {
228+ LeafBucketCollector termDocFreqCollector = termDocFreqCollector (
229+ ctx ,
230+ globalOrds ,
231+ (o , c ) -> incrementBucketDocCount (collectionStrategy .globalOrdToBucketOrd (0 , o ), c )
232+ );
233+ if (termDocFreqCollector != null ) {
234+ return termDocFreqCollector ;
235+ }
236+ }
237+
175238 SortedDocValues singleValues = DocValues .unwrapSingleton (globalOrds );
176239 if (singleValues != null ) {
177240 segmentsWithSingleValuedOrds ++;
@@ -369,6 +432,16 @@ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCol
369432 final SortedSetDocValues segmentOrds = valuesSource .ordinalsValues (ctx );
370433 segmentDocCounts = context .bigArrays ().grow (segmentDocCounts , 1 + segmentOrds .getValueCount ());
371434 assert sub == LeafBucketCollector .NO_OP_COLLECTOR ;
435+
436+ LeafBucketCollector termDocFreqCollector = this .termDocFreqCollector (
437+ ctx ,
438+ segmentOrds ,
439+ (o , c ) -> segmentDocCounts .increment (o + 1 , c )
440+ );
441+ if (termDocFreqCollector != null ) {
442+ return termDocFreqCollector ;
443+ }
444+
372445 final SortedDocValues singleValues = DocValues .unwrapSingleton (segmentOrds );
373446 mapping = valuesSource .globalOrdinalsMapping (ctx );
374447 // Dense mode doesn't support include/exclude so we don't have to check it here.
0 commit comments