Skip to content

Commit 5c32256

Browse files
authored
Adding back [Time series based workload desc order optimization through reverse segment read (#7244)] with fixes (#7967)
* Revert "Revert "Time series based workload desc order optimization through reverse segment read (#7244)" (#7892)" This reverts commit bb26536. Signed-off-by: gashutos <gashutos@amazon.com> * Enable time series optimization only if it is not IndexSorted index, also ASC order reverse should only consider in @timestamp field Signed-off-by: gashutos <gashutos@amazon.com> * Modifying CHANGELOG Signed-off-by: gashutos <gashutos@amazon.com> * Adding integ test for scroll API where sort by _doc is getting early termination Signed-off-by: gashutos <gashutos@amazon.com> --------- Signed-off-by: gashutos <gashutos@amazon.com>
1 parent 9b2b3c9 commit 5c32256

12 files changed

Lines changed: 288 additions & 6 deletions

File tree

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
9595
- Add task cancellation monitoring service ([#7642](https://github.com/opensearch-project/OpenSearch/pull/7642))
9696
- Add TokenManager Interface ([#7452](https://github.com/opensearch-project/OpenSearch/pull/7452))
9797
- Add Remote store as a segment replication source ([#7653](https://github.com/opensearch-project/OpenSearch/pull/7653))
98+
- Add descending order search optimization through reverse segment read. ([#7967](https://github.com/opensearch-project/OpenSearch/pull/7967))
99+
98100

99101
### Dependencies
100102
- Bump `jackson` from 2.15.1 to 2.15.2 ([#7897](https://github.com/opensearch-project/OpenSearch/pull/7897))
Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
---
2+
"Basic scroll on time series workload for reversed leaf sorter":
3+
- do:
4+
indices.create:
5+
index: test_scroll_time_series
6+
body:
7+
mappings:
8+
properties:
9+
name:
10+
type: keyword
11+
'@timestamp':
12+
type: date
13+
14+
- do:
15+
bulk:
16+
refresh: true
17+
index: test_scroll_time_series
18+
body:
19+
- '{"index": {}}'
20+
- '{"name": "1", "@timestamp": "2010-03-12T01:07:00"}'
21+
- '{"index": {}}'
22+
- '{"name": "2", "@timestamp": "2010-03-12T01:07:01"}'
23+
- '{"index": {}}'
24+
- '{"name": "3", "@timestamp": "2010-03-12T01:07:02"}'
25+
- '{"index": {}}'
26+
- '{"name": "4", "@timestamp": "2010-03-12T01:07:03"}'
27+
- '{"index": {}}'
28+
- '{"name": "5", "@timestamp": "2010-03-12T01:07:04"}'
29+
- '{"index": {}}'
30+
- '{"name": "6", "@timestamp": "2010-03-12T01:07:05"}'
31+
- '{"index": {}}'
32+
- '{"name": "7", "@timestamp": "2010-03-12T01:07:06"}'
33+
- '{"index": {}}'
34+
- '{"name": "8", "@timestamp": "2010-03-12T01:07:07"}'
35+
- '{"index": {}}'
36+
- '{"name": "9", "@timestamp": "2010-03-12T01:07:08"}'
37+
- '{"index": {}}'
38+
- '{"name": "10", "@timestamp": "2010-03-12T01:07:09"}'
39+
- do:
40+
indices.refresh: {}
41+
- do:
42+
bulk:
43+
refresh: true
44+
index: test_scroll_time_series
45+
body:
46+
- '{"index": {}}'
47+
- '{"name": "11", "@timestamp": "2010-03-12T01:07:10"}'
48+
- '{"index": {}}'
49+
- '{"name": "12", "@timestamp": "2010-03-12T01:07:11"}'
50+
- '{"index": {}}'
51+
- '{"name": "13", "@timestamp": "2010-03-12T01:07:12"}'
52+
- '{"index": {}}'
53+
- '{"name": "14", "@timestamp": "2010-03-12T01:07:13"}'
54+
- '{"index": {}}'
55+
- '{"name": "15", "@timestamp": "2010-03-12T01:07:14"}'
56+
- '{"index": {}}'
57+
- '{"name": "16", "@timestamp": "2010-03-12T01:07:15"}'
58+
- '{"index": {}}'
59+
- '{"name": "17", "@timestamp": "2010-03-12T01:07:16"}'
60+
- '{"index": {}}'
61+
- '{"name": "18", "@timestamp": "2010-03-12T01:07:17"}'
62+
- '{"index": {}}'
63+
- '{"name": "19", "@timestamp": "2010-03-12T01:07:18"}'
64+
- '{"index": {}}'
65+
- '{"name": "20", "@timestamp": "2010-03-12T01:07:19"}'
66+
- do:
67+
indices.refresh: { }
68+
- do:
69+
bulk:
70+
refresh: true
71+
index: test_scroll_time_series
72+
body:
73+
- '{"index": {}}'
74+
- '{"name": "21", "@timestamp": "2010-03-12T01:07:20"}'
75+
- '{"index": {}}'
76+
- '{"name": "22", "@timestamp": "2010-03-12T01:07:21"}'
77+
- '{"index": {}}'
78+
- '{"name": "23", "@timestamp": "2010-03-12T01:07:22"}'
79+
- '{"index": {}}'
80+
- '{"name": "24", "@timestamp": "2010-03-12T01:07:23"}'
81+
- '{"index": {}}'
82+
- '{"name": "25", "@timestamp": "2010-03-12T01:07:24"}'
83+
- '{"index": {}}'
84+
- '{"name": "26", "@timestamp": "2010-03-12T01:07:25"}'
85+
- '{"index": {}}'
86+
- '{"name": "27", "@timestamp": "2010-03-12T01:07:26"}'
87+
- '{"index": {}}'
88+
- '{"name": "28", "@timestamp": "2010-03-12T01:07:27"}'
89+
- '{"index": {}}'
90+
- '{"name": "29", "@timestamp": "2010-03-12T01:07:28"}'
91+
- '{"index": {}}'
92+
- '{"name": "30", "@timestamp": "2010-03-12T01:07:29"}'
93+
- do:
94+
indices.refresh: { }
95+
96+
- do:
97+
search:
98+
rest_total_hits_as_int: true
99+
index: test_scroll_time_series
100+
size: 5
101+
scroll: 1m
102+
sort: _doc
103+
body:
104+
query:
105+
match_all: {}
106+
107+
- set: {_scroll_id: scroll_id}
108+
- match: {hits.total: 30 }
109+
- length: {hits.hits: 5 }
110+
111+
- do:
112+
scroll:
113+
rest_total_hits_as_int: true
114+
body: { "scroll_id": "$scroll_id", "scroll": "1m"}
115+
116+
- match: {hits.total: 30 }
117+
- length: {hits.hits: 5 }
118+
119+
- do:
120+
scroll:
121+
rest_total_hits_as_int: true
122+
body: { "scroll_id": "$scroll_id", "scroll": "1m" }
123+
124+
- match: { hits.total: 30 }
125+
- length: { hits.hits: 5 }
126+
127+
- do:
128+
scroll:
129+
rest_total_hits_as_int: true
130+
body: { "scroll_id": "$scroll_id", "scroll": "1m" }
131+
132+
- match: { hits.total: 30 }
133+
- length: { hits.hits: 5 }
134+
135+
- do:
136+
scroll:
137+
rest_total_hits_as_int: true
138+
body: { "scroll_id": "$scroll_id", "scroll": "1m" }
139+
140+
- match: { hits.total: 30 }
141+
- length: { hits.hits: 5 }
142+
143+
- do:
144+
scroll:
145+
rest_total_hits_as_int: true
146+
body: { "scroll_id": "$scroll_id", "scroll": "1m" }
147+
148+
- match: { hits.total: 30 }
149+
- length: { hits.hits: 5 }
150+
151+
- do:
152+
scroll:
153+
rest_total_hits_as_int: true
154+
body: { "scroll_id": "$scroll_id", "scroll": "1m" }
155+
156+
- match: { hits.total: 30 }
157+
- length: { hits.hits: 0 }
158+
159+
- do:
160+
clear_scroll:
161+
scroll_id: $scroll_id

server/src/main/java/org/opensearch/cluster/metadata/DataStream.java

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@
3131

3232
package org.opensearch.cluster.metadata;
3333

34+
import org.apache.lucene.document.LongPoint;
35+
import org.apache.lucene.index.LeafReader;
36+
import org.apache.lucene.index.PointValues;
37+
import org.opensearch.OpenSearchException;
3438
import org.opensearch.cluster.AbstractDiffable;
3539
import org.opensearch.cluster.Diff;
3640
import org.opensearch.core.ParseField;
@@ -46,6 +50,7 @@
4650
import java.io.IOException;
4751
import java.util.ArrayList;
4852
import java.util.Collections;
53+
import java.util.Comparator;
4954
import java.util.List;
5055
import java.util.Locale;
5156
import java.util.Map;
@@ -59,6 +64,24 @@
5964
public final class DataStream extends AbstractDiffable<DataStream> implements ToXContentObject {
6065

6166
public static final String BACKING_INDEX_PREFIX = ".ds-";
67+
public static final String TIMESERIES_FIELDNAME = "@timestamp";
68+
public static final Comparator<LeafReader> TIMESERIES_LEAF_SORTER = Comparator.comparingLong((LeafReader r) -> {
69+
try {
70+
PointValues points = r.getPointValues(TIMESERIES_FIELDNAME);
71+
if (points != null) {
72+
// could be a multipoint (probably not) but get the maximum time value anyway
73+
byte[] sortValue = points.getMaxPackedValue();
74+
// decode the first dimension because this should not be a multi dimension field
75+
// it's a bug in the date field if it is
76+
return LongPoint.decodeDimension(sortValue, 0);
77+
} else {
78+
// segment does not have a timestamp field, just return the minimum value
79+
return Long.MIN_VALUE;
80+
}
81+
} catch (IOException e) {
82+
throw new OpenSearchException("Not a timeseries Index! Field [{}] not found!", TIMESERIES_FIELDNAME);
83+
}
84+
}).reversed();
6285

6386
private final String name;
6487
private final TimestampField timeStampField;

server/src/main/java/org/opensearch/index/IndexSettings.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -665,6 +665,7 @@ private void setRetentionLeaseMillis(final TimeValue retentionLease) {
665665
private volatile long mappingTotalFieldsLimit;
666666
private volatile long mappingDepthLimit;
667667
private volatile long mappingFieldNameLengthLimit;
668+
private volatile boolean searchSegmentOrderReversed;
668669

669670
/**
670671
* The maximum number of refresh listeners allows on this shard.
@@ -905,6 +906,10 @@ public IndexSettings(final IndexMetadata indexMetadata, final Settings nodeSetti
905906
scopedSettings.addSettingsUpdateConsumer(DEFAULT_SEARCH_PIPELINE, this::setDefaultSearchPipeline);
906907
}
907908

909+
private void setSearchSegmentOrderReversed(boolean reversed) {
910+
this.searchSegmentOrderReversed = reversed;
911+
}
912+
908913
private void setSearchIdleAfter(TimeValue searchIdleAfter) {
909914
this.searchIdleAfter = searchIdleAfter;
910915
}
@@ -1084,6 +1089,13 @@ public Settings getNodeSettings() {
10841089
return nodeSettings;
10851090
}
10861091

1092+
/**
1093+
* Returns true if index level setting for leaf reverse order search optimization is enabled
1094+
*/
1095+
public boolean getSearchSegmentOrderReversed() {
1096+
return this.searchSegmentOrderReversed;
1097+
}
1098+
10871099
/**
10881100
* Updates the settings and index metadata and notifies all registered settings consumers with the new settings iff at least one
10891101
* setting has changed.

server/src/main/java/org/opensearch/index/engine/EngineConfig.java

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333

3434
import org.apache.lucene.analysis.Analyzer;
3535
import org.apache.lucene.codecs.Codec;
36+
import org.apache.lucene.index.LeafReader;
3637
import org.apache.lucene.index.MergePolicy;
3738
import org.apache.lucene.search.QueryCache;
3839
import org.apache.lucene.search.QueryCachingPolicy;
@@ -59,6 +60,7 @@
5960
import org.opensearch.indices.breaker.CircuitBreakerService;
6061
import org.opensearch.threadpool.ThreadPool;
6162

63+
import java.util.Comparator;
6264
import java.util.List;
6365
import java.util.Objects;
6466
import java.util.function.BooleanSupplier;
@@ -102,6 +104,7 @@ public final class EngineConfig {
102104
private final Supplier<RetentionLeases> retentionLeasesSupplier;
103105
private final boolean isReadOnlyReplica;
104106
private final BooleanSupplier primaryModeSupplier;
107+
private final Comparator<LeafReader> leafSorter;
105108

106109
/**
107110
* A supplier of the outstanding retention leases. This is used during merged operations to determine which operations that have been
@@ -204,6 +207,7 @@ private EngineConfig(Builder builder) {
204207
this.isReadOnlyReplica = builder.isReadOnlyReplica;
205208
this.primaryModeSupplier = builder.primaryModeSupplier;
206209
this.translogFactory = builder.translogFactory;
210+
this.leafSorter = builder.leafSorter;
207211
}
208212

209213
/**
@@ -451,6 +455,15 @@ public TranslogDeletionPolicyFactory getCustomTranslogDeletionPolicyFactory() {
451455
return translogDeletionPolicyFactory;
452456
}
453457

458+
/**
459+
* Returns subReaderSorter for org.apache.lucene.index.BaseCompositeReader.
460+
* This gets used in lucene IndexReader and decides order of segment read.
461+
* @return comparator
462+
*/
463+
public Comparator<LeafReader> getLeafSorter() {
464+
return this.leafSorter;
465+
}
466+
454467
/**
455468
* Builder for EngineConfig class
456469
*
@@ -483,6 +496,7 @@ public static class Builder {
483496
private boolean isReadOnlyReplica;
484497
private BooleanSupplier primaryModeSupplier;
485498
private TranslogFactory translogFactory = new InternalTranslogFactory();
499+
Comparator<LeafReader> leafSorter;
486500

487501
public Builder shardId(ShardId shardId) {
488502
this.shardId = shardId;
@@ -614,6 +628,11 @@ public Builder translogFactory(TranslogFactory translogFactory) {
614628
return this;
615629
}
616630

631+
public Builder leafSorter(Comparator<LeafReader> leafSorter) {
632+
this.leafSorter = leafSorter;
633+
return this;
634+
}
635+
617636
public EngineConfig build() {
618637
return new EngineConfig(this);
619638
}

server/src/main/java/org/opensearch/index/engine/EngineConfigFactory.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
import org.apache.logging.log4j.Logger;
1212
import org.apache.lucene.analysis.Analyzer;
13+
import org.apache.lucene.index.LeafReader;
1314
import org.apache.lucene.index.MergePolicy;
1415
import org.apache.lucene.search.QueryCache;
1516
import org.apache.lucene.search.QueryCachingPolicy;
@@ -36,6 +37,7 @@
3637

3738
import java.util.Collection;
3839
import java.util.Collections;
40+
import java.util.Comparator;
3941
import java.util.List;
4042
import java.util.Optional;
4143
import java.util.function.BooleanSupplier;
@@ -151,7 +153,8 @@ public EngineConfig newEngineConfig(
151153
EngineConfig.TombstoneDocSupplier tombstoneDocSupplier,
152154
boolean isReadOnlyReplica,
153155
BooleanSupplier primaryModeSupplier,
154-
TranslogFactory translogFactory
156+
TranslogFactory translogFactory,
157+
Comparator<LeafReader> leafSorter
155158
) {
156159
CodecService codecServiceToUse = codecService;
157160
if (codecService == null && this.codecServiceFactory != null) {
@@ -184,6 +187,7 @@ public EngineConfig newEngineConfig(
184187
.readOnlyReplica(isReadOnlyReplica)
185188
.primaryModeSupplier(primaryModeSupplier)
186189
.translogFactory(translogFactory)
190+
.leafSorter(leafSorter)
187191
.build();
188192
}
189193

server/src/main/java/org/opensearch/index/engine/InternalEngine.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2322,6 +2322,9 @@ private IndexWriterConfig getIndexWriterConfig() {
23222322
if (config().getIndexSort() != null) {
23232323
iwc.setIndexSort(config().getIndexSort());
23242324
}
2325+
if (config().getLeafSorter() != null) {
2326+
iwc.setLeafSorter(config().getLeafSorter()); // The default segment search order
2327+
}
23252328
return iwc;
23262329
}
23272330

server/src/main/java/org/opensearch/index/mapper/MappingLookup.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
package org.opensearch.index.mapper;
3434

3535
import org.apache.lucene.analysis.Analyzer;
36+
import org.opensearch.cluster.metadata.DataStream;
3637
import org.opensearch.index.IndexSettings;
3738
import org.opensearch.index.analysis.FieldNameAnalyzer;
3839

@@ -261,6 +262,15 @@ public String getNestedScope(String path) {
261262
return null;
262263
}
263264

265+
/**
266+
* If this index contains @timestamp field with Date type, it will return true
267+
* @return true or false based on above condition
268+
*/
269+
public boolean containsTimeStampField() {
270+
MappedFieldType timeSeriesFieldType = this.fieldTypeLookup.get(DataStream.TIMESERIES_FIELDNAME);
271+
return timeSeriesFieldType != null && timeSeriesFieldType instanceof DateFieldMapper.DateFieldType; // has to be Date field type
272+
}
273+
264274
private static String parentObject(String field) {
265275
int lastDot = field.lastIndexOf('.');
266276
if (lastDot == -1) {

0 commit comments

Comments
 (0)