Skip to content

Commit 3a8d66e

Browse files
committed
take field density into consideration
Signed-off-by: panguixin <panguixin@bytedance.com>
1 parent 47f5073 commit 3a8d66e

6 files changed

Lines changed: 155 additions & 77 deletions

File tree

server/src/main/java/org/opensearch/search/SearchService.java

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@
134134
import org.opensearch.search.rescore.RescorerBuilder;
135135
import org.opensearch.search.searchafter.SearchAfterBuilder;
136136
import org.opensearch.search.sort.FieldSortBuilder;
137+
import org.opensearch.search.sort.FieldStats;
137138
import org.opensearch.search.sort.MinAndMax;
138139
import org.opensearch.search.sort.SortAndFormats;
139140
import org.opensearch.search.sort.SortBuilder;
@@ -1627,7 +1628,7 @@ private CanMatchResponse canMatch(ShardSearchRequest request, boolean checkRefre
16271628
final SortAndFormats primarySort = sortBuilder != null
16281629
? SortBuilder.buildSort(Collections.singletonList(sortBuilder), context).get()
16291630
: null;
1630-
MinAndMax<?> minMax = sortBuilder != null ? FieldSortBuilder.getMinMaxOrNull(context, sortBuilder) : null;
1631+
FieldStats stats = sortBuilder != null ? FieldSortBuilder.getFieldStatsForShard(context, sortBuilder) : FieldStats.UNKNOWN;
16311632
boolean canMatch;
16321633
if (canRewriteToMatchNone(request.source())) {
16331634
QueryBuilder queryBuilder = request.source().query();
@@ -1638,9 +1639,16 @@ private CanMatchResponse canMatch(ShardSearchRequest request, boolean checkRefre
16381639
}
16391640
final FieldDoc searchAfterFieldDoc = getSearchAfterFieldDoc(request, context);
16401641
final Integer trackTotalHitsUpto = request.source() == null ? null : request.source().trackTotalHitsUpTo();
1641-
canMatch = canMatch && canMatchSearchAfter(searchAfterFieldDoc, minMax, primarySort, trackTotalHitsUpto);
1642+
canMatch = canMatch
1643+
&& canMatchSearchAfter(
1644+
searchAfterFieldDoc,
1645+
stats.getMinAndMax(),
1646+
primarySort,
1647+
trackTotalHitsUpto,
1648+
stats.allDocsNonMissing()
1649+
);
16421650

1643-
return new CanMatchResponse(canMatch || hasRefreshPending, minMax);
1651+
return new CanMatchResponse(canMatch || hasRefreshPending, stats.getMinAndMax());
16441652
}
16451653
}
16461654
}
@@ -1649,7 +1657,8 @@ public static boolean canMatchSearchAfter(
16491657
FieldDoc searchAfter,
16501658
MinAndMax<?> minMax,
16511659
SortAndFormats primarySort,
1652-
Integer trackTotalHitsUpto
1660+
Integer trackTotalHitsUpto,
1661+
boolean allDocsNonMissing
16531662
) {
16541663
// Check for sort.missing == null, since in case of missing values sort queries, if segment/shard's min/max
16551664
// is out of search_after range, it still should be printed and hence we should not skip segment/shard.
@@ -1665,12 +1674,12 @@ public static boolean canMatchSearchAfter(
16651674
if (primarySortField.getReverse()) {
16661675
if (minMax.compareMin(searchAfterPrimary) > 0) {
16671676
// In Desc order, if segment/shard minimum is gt search_after, the segment/shard won't be competitive
1668-
return canMatchMissingValue(primarySortField, searchAfterPrimary);
1677+
return allDocsNonMissing == false && canMatchMissingValue(primarySortField, searchAfterPrimary);
16691678
}
16701679
} else {
16711680
if (minMax.compareMax(searchAfterPrimary) < 0) {
16721681
// In ASC order, if segment/shard maximum is lt search_after, the segment/shard won't be competitive
1673-
return canMatchMissingValue(primarySortField, searchAfterPrimary);
1682+
return allDocsNonMissing == false && canMatchMissingValue(primarySortField, searchAfterPrimary);
16741683
}
16751684
}
16761685
}

server/src/main/java/org/opensearch/search/internal/ContextIndexSearcher.java

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@
7878
import org.opensearch.search.query.QueryPhase;
7979
import org.opensearch.search.query.QuerySearchResult;
8080
import org.opensearch.search.sort.FieldSortBuilder;
81-
import org.opensearch.search.sort.MinAndMax;
81+
import org.opensearch.search.sort.FieldStats;
8282

8383
import java.io.IOException;
8484
import java.util.ArrayList;
@@ -517,17 +517,19 @@ private boolean canMatchSearchAfter(LeafReaderContext ctx) throws IOException {
517517
// Only applied on primary sort field and primary search_after.
518518
FieldSortBuilder primarySortField = FieldSortBuilder.getPrimaryFieldSortOrNull(searchContext.request().source());
519519
if (primarySortField != null) {
520-
MinAndMax<?> minMax = FieldSortBuilder.getMinMaxOrNullForSegment(
520+
FieldStats stats = FieldSortBuilder.getFieldStatsForSegment(
521521
this.searchContext.getQueryShardContext(),
522522
ctx,
523523
primarySortField,
524524
searchContext.sort()
525525
);
526+
assert stats != null;
526527
return SearchService.canMatchSearchAfter(
527528
searchContext.searchAfter(),
528-
minMax,
529+
stats.getMinAndMax(),
529530
searchContext.sort(),
530-
searchContext.trackTotalHitsUpTo()
531+
searchContext.trackTotalHitsUpTo(),
532+
stats.allDocsNonMissing()
531533
);
532534
}
533535
}

server/src/main/java/org/opensearch/search/sort/FieldSortBuilder.java

Lines changed: 34 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -606,73 +606,77 @@ public static FieldSortBuilder getPrimaryFieldSortOrNull(SearchSourceBuilder sou
606606
}
607607

608608
/**
609-
* Return the {@link MinAndMax} indexed value for shard from the provided {@link FieldSortBuilder} or <code>null</code> if unknown.
609+
* Return the {@link FieldStats} indexed value for shard from the provided {@link FieldSortBuilder} or {@link FieldStats#UNKNOWN} if unknown.
610610
* The value can be extracted on non-nested indexed mapped fields of type keyword, numeric or date, other fields
611-
* and configurations return <code>null</code>.
611+
* and configurations return {@link FieldStats#UNKNOWN}.
612612
*/
613-
public static MinAndMax<?> getMinMaxOrNull(QueryShardContext context, FieldSortBuilder sortBuilder) throws IOException {
613+
public static FieldStats getFieldStatsForShard(QueryShardContext context, FieldSortBuilder sortBuilder) throws IOException {
614614
final SortAndFormats sort = SortBuilder.buildSort(Collections.singletonList(sortBuilder), context).get();
615-
return getMinMaxOrNullInternal(context.getIndexReader(), context, sortBuilder, sort);
615+
return getFieldStatsInternal(context.getIndexReader(), context, sortBuilder, sort);
616616
}
617617

618618
/**
619-
* Return the {@link MinAndMax} indexed value for segment from the provided {@link FieldSortBuilder} or <code>null</code> if unknown.
619+
* Return the {@link FieldStats} indexed value for segment from the provided {@link FieldSortBuilder} or {@link FieldStats#UNKNOWN} if unknown.
620620
* The value can be extracted on non-nested indexed mapped fields of type keyword, numeric or date, other fields
621-
* and configurations return <code>null</code>.
621+
* and configurations return {@link FieldStats#UNKNOWN}.
622622
*/
623-
public static MinAndMax<?> getMinMaxOrNullForSegment(
623+
public static FieldStats getFieldStatsForSegment(
624624
QueryShardContext context,
625625
LeafReaderContext ctx,
626626
FieldSortBuilder sortBuilder,
627627
SortAndFormats sort
628628
) throws IOException {
629-
return getMinMaxOrNullInternal(ctx.reader(), context, sortBuilder, sort);
629+
return getFieldStatsInternal(ctx.reader(), context, sortBuilder, sort);
630630
}
631631

632-
private static MinAndMax<?> getMinMaxOrNullInternal(
632+
private static FieldStats getFieldStatsInternal(
633633
IndexReader reader,
634634
QueryShardContext context,
635635
FieldSortBuilder sortBuilder,
636636
SortAndFormats sort
637637
) throws IOException {
638638
SortField sortField = sort.sort.getSort()[0];
639639
if (sortField.getField() == null) {
640-
return null;
640+
return FieldStats.UNKNOWN;
641641
}
642642
MappedFieldType fieldType = context.fieldMapper(sortField.getField());
643643
if (reader == null || (fieldType == null || fieldType.isSearchable() == false)) {
644-
return null;
644+
return FieldStats.UNKNOWN;
645645
}
646646
switch (IndexSortConfig.getSortFieldType(sortField)) {
647647
case LONG:
648648
case INT:
649649
case DOUBLE:
650650
case FLOAT:
651-
return extractNumericMinAndMax(reader, sortField, fieldType, sortBuilder);
651+
return extractNumericFieldStats(reader, sortField, fieldType, sortBuilder);
652652
case STRING:
653653
case STRING_VAL:
654654
if (fieldType instanceof KeywordFieldMapper.KeywordFieldType) {
655655
Terms terms = MultiTerms.getTerms(reader, fieldType.name());
656656
if (terms == null) {
657-
return null;
657+
return FieldStats.UNKNOWN;
658658
}
659-
return terms.getMin() != null ? new MinAndMax<>(terms.getMin(), terms.getMax()) : null;
659+
MinAndMax<?> minAndMax = terms.getMin() != null ? new MinAndMax<>(terms.getMin(), terms.getMax()) : null;
660+
return new FieldStats(minAndMax, terms.getDocCount() == reader.maxDoc());
660661
}
661662
break;
662663
}
663-
return null;
664+
return FieldStats.UNKNOWN;
664665
}
665666

666-
private static MinAndMax<?> extractNumericMinAndMax(
667+
private static FieldStats extractNumericFieldStats(
667668
IndexReader reader,
668669
SortField sortField,
669670
MappedFieldType fieldType,
670671
FieldSortBuilder sortBuilder
671672
) throws IOException {
672673
String fieldName = fieldType.name();
673-
if (PointValues.size(reader, fieldName) == 0) {
674-
return null;
674+
final int docCount = PointValues.getDocCount(reader, fieldName);
675+
if (docCount == 0) {
676+
return FieldStats.UNKNOWN;
675677
}
678+
final boolean allDocsNonMissing = docCount == reader.maxDoc();
679+
MinAndMax<?> minAndMax = null;
676680
if (fieldType instanceof NumberFieldType) {
677681
NumberFieldType numberFieldType = (NumberFieldType) fieldType;
678682
Number minPoint = numberFieldType.parsePoint(PointValues.getMinPackedValue(reader, fieldName));
@@ -681,27 +685,31 @@ private static MinAndMax<?> extractNumericMinAndMax(
681685
case LONG:
682686
if (numberFieldType.numericType() == NumericType.UNSIGNED_LONG) {
683687
// The min and max are expected to be BigInteger numbers
684-
return new MinAndMax<>((BigInteger) minPoint, (BigInteger) maxPoint);
688+
minAndMax = new MinAndMax<>((BigInteger) minPoint, (BigInteger) maxPoint);
685689
} else {
686-
return new MinAndMax<>(minPoint.longValue(), maxPoint.longValue());
690+
minAndMax = new MinAndMax<>(minPoint.longValue(), maxPoint.longValue());
687691
}
692+
break;
688693
case INT:
689-
return new MinAndMax<>(minPoint.intValue(), maxPoint.intValue());
694+
minAndMax = new MinAndMax<>(minPoint.intValue(), maxPoint.intValue());
695+
break;
690696
case DOUBLE:
691-
return new MinAndMax<>(minPoint.doubleValue(), maxPoint.doubleValue());
697+
minAndMax = new MinAndMax<>(minPoint.doubleValue(), maxPoint.doubleValue());
698+
break;
692699
case FLOAT:
693-
return new MinAndMax<>(minPoint.floatValue(), maxPoint.floatValue());
700+
minAndMax = new MinAndMax<>(minPoint.floatValue(), maxPoint.floatValue());
701+
break;
694702
default:
695-
return null;
703+
// no-op
696704
}
697705
} else if (fieldType instanceof DateFieldType) {
698706
DateFieldType dateFieldType = (DateFieldType) fieldType;
699707
Function<byte[], Long> dateConverter = createDateConverter(sortBuilder, dateFieldType);
700708
Long min = dateConverter.apply(PointValues.getMinPackedValue(reader, fieldName));
701709
Long max = dateConverter.apply(PointValues.getMaxPackedValue(reader, fieldName));
702-
return new MinAndMax<>(min, max);
710+
minAndMax = new MinAndMax<>(min, max);
703711
}
704-
return null;
712+
return new FieldStats(minAndMax, allDocsNonMissing);
705713
}
706714

707715
private static Function<byte[], Long> createDateConverter(FieldSortBuilder sortBuilder, DateFieldType dateFieldType) {
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.opensearch.search.sort;
10+
11+
/**
12+
* A class that encapsulates some stats about a field, including min/max etc.
13+
*
14+
* @opensearch.internal
15+
*/
16+
public class FieldStats {
17+
public static final FieldStats UNKNOWN = new FieldStats(null, false);
18+
19+
private final MinAndMax<?> minAndMax;
20+
private final boolean allDocsNonMissing;
21+
22+
public FieldStats(MinAndMax<?> minAndMax, boolean allDocsNonMissing) {
23+
this.minAndMax = minAndMax;
24+
this.allDocsNonMissing = allDocsNonMissing;
25+
}
26+
27+
/**
28+
* Return the minimum and maximum value.
29+
*/
30+
public MinAndMax<?> getMinAndMax() {
31+
return minAndMax;
32+
}
33+
34+
/**
35+
* Indicates whether all docs have values for corresponding field
36+
*/
37+
public boolean allDocsNonMissing() {
38+
return allDocsNonMissing;
39+
}
40+
}

0 commit comments

Comments
 (0)