Skip to content

Commit 2f64c40

Browse files
authored
Remove unnecessary filter for DateHistogram aggregation (#4877)
* Remove unnecessary filter for DataHistogram aggregation Signed-off-by: Lantao Jin <ltjin@amazon.com> * revert changes of interval selection in DataHistogram Signed-off-by: Lantao Jin <ltjin@amazon.com> * typo Signed-off-by: Lantao Jin <ltjin@amazon.com> * revert one it Signed-off-by: Lantao Jin <ltjin@amazon.com> --------- Signed-off-by: Lantao Jin <ltjin@amazon.com>
1 parent 885230f commit 2f64c40

31 files changed

Lines changed: 185 additions & 497 deletions

integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -464,22 +464,22 @@ public void testStatsBySpanNonBucketNullable() throws IOException {
464464

465465
@Test
466466
public void testStatsByTimeSpan() throws IOException {
467-
String expected = loadExpectedPlan("explain_stats_by_timespan.json");
468-
assertJsonEqualsIgnoreId(
467+
String expected = loadExpectedPlan("explain_stats_by_timespan.yaml");
468+
assertYamlEqualsIgnoreId(
469469
expected,
470-
explainQueryToString(
470+
explainQueryYaml(
471471
String.format("source=%s | stats count() by span(birthdate,1m)", TEST_INDEX_BANK)));
472472

473-
expected = loadExpectedPlan("explain_stats_by_timespan2.json");
474-
assertJsonEqualsIgnoreId(
473+
expected = loadExpectedPlan("explain_stats_by_timespan2.yaml");
474+
assertYamlEqualsIgnoreId(
475475
expected,
476-
explainQueryToString(
476+
explainQueryYaml(
477477
String.format("source=%s | stats count() by span(birthdate,1M)", TEST_INDEX_BANK)));
478478

479479
// bucket_nullable doesn't impact by-span-time
480-
assertJsonEqualsIgnoreId(
480+
assertYamlEqualsIgnoreId(
481481
expected,
482-
explainQueryToString(
482+
explainQueryYaml(
483483
String.format(
484484
"source=%s | stats bucket_nullable=false count() by span(birthdate,1M)",
485485
TEST_INDEX_BANK)));

integ-test/src/test/java/org/opensearch/sql/ppl/PPLIntegTestCase.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,8 @@ protected String executeQueryToString(String query) throws IOException {
5959
return getResponseBody(response, true);
6060
}
6161

62+
/** Deprecated, use {@link #explainQueryYaml(String)} */
63+
@Deprecated
6264
protected String explainQueryToString(String query) throws IOException {
6365
return explainQueryToString(query, false);
6466
}

integ-test/src/test/java/org/opensearch/sql/util/MatcherUtils.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -411,7 +411,11 @@ public static void assertJsonEquals(String expected, String actual) {
411411
JsonParser.parseString(eliminatePid(actual)));
412412
}
413413

414-
/** Compare two JSON string are equals with ignoring the RelNode id in the Calcite plan. */
414+
/**
415+
* Compare two JSON string are equals with ignoring the RelNode id in the Calcite plan.
416+
* Deprecated, use {@link #assertYamlEqualsIgnoreId(String, String)}
417+
*/
418+
@Deprecated
415419
public static void assertJsonEqualsIgnoreId(String expected, String actual) {
416420
assertJsonEquals(cleanUpId(expected), cleanUpId(actual));
417421
}
@@ -434,6 +438,7 @@ private static String eliminatePid(String s) {
434438
return s.replaceAll("pitId=[^,]+,", "pitId=*,");
435439
}
436440

441+
/** Compare two YAML strings are equals with ignoring the RelNode id in the Calcite plan. */
437442
public static void assertYamlEqualsIgnoreId(String expectedYaml, String actualYaml) {
438443
String cleanedYaml = cleanUpYaml(actualYaml);
439444
assertYamlEquals(expectedYaml, cleanedYaml);

integ-test/src/test/resources/big5/queries/composite_date_histogram_daily.ppl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,4 +31,5 @@
3131
*/
3232
source = big5
3333
| where `@timestamp` >= '2022-12-30 00:00:00' and `@timestamp` < '2023-01-07 12:00:00'
34-
| stats count() by span(`@timestamp`, 1d)
34+
| stats count() by span(`@timestamp`, 1d)
35+
| head 10

integ-test/src/test/resources/big5/queries/composite_terms.ppl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,4 +29,5 @@
2929
source = big5
3030
| where `@timestamp` >= '2023-01-02 00:00:00' and `@timestamp` < '2023-01-02 10:00:00'
3131
| stats count() by `process.name`, `cloud.region`
32-
| sort - `process.name`, + `cloud.region`
32+
| sort - `process.name`, + `cloud.region`
33+
| head 10

integ-test/src/test/resources/big5/queries/composite_terms_keyword.ppl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,4 +30,5 @@
3030
source = big5
3131
| where `@timestamp` >= '2023-01-02 00:00:00' and `@timestamp` < '2023-01-02 10:00:00'
3232
| stats count() by `process.name`, `cloud.region`, `aws.cloudwatch.log_stream`
33-
| sort - `process.name`, + `cloud.region`, + `aws.cloudwatch.log_stream`
33+
| sort - `process.name`, + `cloud.region`, + `aws.cloudwatch.log_stream`
34+
| head 10

integ-test/src/test/resources/big5/queries/optimized/composite_terms.ppl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,4 +29,5 @@
2929
source = big5
3030
| where `@timestamp` >= '2023-01-02 00:00:00' and `@timestamp` < '2023-01-02 10:00:00'
3131
| stats bucket_nullable = false count() by `process.name`, `cloud.region`
32-
| sort - `process.name`, + `cloud.region`
32+
| sort - `process.name`, + `cloud.region`
33+
| head 10

integ-test/src/test/resources/big5/queries/optimized/composite_terms_keyword.ppl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,4 +30,5 @@
3030
source = big5
3131
| where `@timestamp` >= '2023-01-02 00:00:00' and `@timestamp` < '2023-01-02 10:00:00'
3232
| stats bucket_nullable = false count() by `process.name`, `cloud.region`, `aws.cloudwatch.log_stream`
33-
| sort - `process.name`, + `cloud.region`, + `aws.cloudwatch.log_stream`
33+
| sort - `process.name`, + `cloud.region`, + `aws.cloudwatch.log_stream`
34+
| head 10
Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
calcite:
22
logical: |
33
LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])
4-
LogicalProject(count()=[$1], span(`@timestamp`,1d)=[$0])
5-
LogicalAggregate(group=[{0}], count()=[COUNT()])
6-
LogicalProject(span(`@timestamp`,1d)=[SPAN($17, 1, 'd')])
7-
LogicalFilter(condition=[IS NOT NULL($17)])
8-
LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2022-12-30 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-07 12:00:00':VARCHAR)))])
9-
CalciteLogicalIndexScan(table=[[OpenSearch, big5]])
4+
LogicalSort(fetch=[10])
5+
LogicalProject(count()=[$1], span(`@timestamp`,1d)=[$0])
6+
LogicalAggregate(group=[{0}], count()=[COUNT()])
7+
LogicalProject(span(`@timestamp`,1d)=[SPAN($17, 1, 'd')])
8+
LogicalFilter(condition=[IS NOT NULL($17)])
9+
LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2022-12-30 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-07 12:00:00':VARCHAR)))])
10+
CalciteLogicalIndexScan(table=[[OpenSearch, big5]])
1011
physical: |
11-
CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[@timestamp], FILTER->SEARCH($0, Sarg[['2022-12-30 00:00:00':VARCHAR..'2023-01-07 12:00:00':VARCHAR); NULL AS FALSE]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(`@timestamp`,1d)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"range":{"@timestamp":{"from":"2022-12-30T00:00:00.000Z","to":"2023-01-07T12:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},{"exists":{"field":"@timestamp","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["@timestamp"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10000,"sources":[{"span(`@timestamp`,1d)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1d"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])
12+
CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[@timestamp], FILTER->SEARCH($0, Sarg[['2022-12-30 00:00:00':VARCHAR..'2023-01-07 12:00:00':VARCHAR); NULL AS FALSE]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(`@timestamp`,1d)], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2022-12-30T00:00:00.000Z","to":"2023-01-07T12:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["@timestamp"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10,"sources":[{"span(`@timestamp`,1d)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1d"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
calcite:
22
logical: |
33
LogicalSystemLimit(sort0=[$1], sort1=[$2], dir0=[DESC-nulls-last], dir1=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT])
4-
LogicalSort(sort0=[$1], sort1=[$2], dir0=[DESC-nulls-last], dir1=[ASC-nulls-first])
4+
LogicalSort(sort0=[$1], sort1=[$2], dir0=[DESC-nulls-last], dir1=[ASC-nulls-first], fetch=[10])
55
LogicalProject(count()=[$2], process.name=[$0], cloud.region=[$1])
66
LogicalAggregate(group=[{0, 1}], count()=[COUNT()])
77
LogicalProject(process.name=[$7], cloud.region=[$14])
88
LogicalFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($14))])
99
LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-02 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-02 10:00:00':VARCHAR)))])
1010
CalciteLogicalIndexScan(table=[[OpenSearch, big5]])
1111
physical: |
12-
CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[process.name, cloud.region, @timestamp], FILTER->SEARCH($2, Sarg[['2023-01-02 00:00:00':VARCHAR..'2023-01-02 10:00:00':VARCHAR)]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), process.name, cloud.region], SORT->[1 DESC LAST, 2 ASC FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-02T00:00:00.000Z","to":"2023-01-02T10:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["process.name","cloud.region","@timestamp"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10000,"sources":[{"process.name":{"terms":{"field":"process.name","missing_bucket":false,"order":"desc"}}},{"cloud.region":{"terms":{"field":"cloud.region","missing_bucket":false,"order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])
12+
CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[process.name, cloud.region, @timestamp], FILTER->SEARCH($2, Sarg[['2023-01-02 00:00:00':VARCHAR..'2023-01-02 10:00:00':VARCHAR)]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), process.name, cloud.region], SORT->[1 DESC LAST, 2 ASC FIRST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-02T00:00:00.000Z","to":"2023-01-02T10:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["process.name","cloud.region","@timestamp"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10,"sources":[{"process.name":{"terms":{"field":"process.name","missing_bucket":false,"order":"desc"}}},{"cloud.region":{"terms":{"field":"cloud.region","missing_bucket":false,"order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])

0 commit comments

Comments
 (0)