Skip to content

Commit fe6d01a

Browse files
committed
Remove unnecessary filter for DataHistogram aggregation
Signed-off-by: Lantao Jin <ltjin@amazon.com>
1 parent 885230f commit fe6d01a

39 files changed

+202
-523
lines changed

integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -464,22 +464,22 @@ public void testStatsBySpanNonBucketNullable() throws IOException {
464464

465465
@Test
466466
public void testStatsByTimeSpan() throws IOException {
467-
String expected = loadExpectedPlan("explain_stats_by_timespan.json");
468-
assertJsonEqualsIgnoreId(
467+
String expected = loadExpectedPlan("explain_stats_by_timespan.yaml");
468+
assertYamlEqualsIgnoreId(
469469
expected,
470-
explainQueryToString(
470+
explainQueryYaml(
471471
String.format("source=%s | stats count() by span(birthdate,1m)", TEST_INDEX_BANK)));
472472

473-
expected = loadExpectedPlan("explain_stats_by_timespan2.json");
474-
assertJsonEqualsIgnoreId(
473+
expected = loadExpectedPlan("explain_stats_by_timespan2.yaml");
474+
assertYamlEqualsIgnoreId(
475475
expected,
476-
explainQueryToString(
476+
explainQueryYaml(
477477
String.format("source=%s | stats count() by span(birthdate,1M)", TEST_INDEX_BANK)));
478478

479479
// bucket_nullable doesn't impact by-span-time
480-
assertJsonEqualsIgnoreId(
480+
assertYamlEqualsIgnoreId(
481481
expected,
482-
explainQueryToString(
482+
explainQueryYaml(
483483
String.format(
484484
"source=%s | stats bucket_nullable=false count() by span(birthdate,1M)",
485485
TEST_INDEX_BANK)));

integ-test/src/test/java/org/opensearch/sql/ppl/PPLIntegTestCase.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,8 @@ protected String executeQueryToString(String query) throws IOException {
5959
return getResponseBody(response, true);
6060
}
6161

62+
/** Deprecated, use {@link #explainQueryYaml(String)} */
63+
@Deprecated
6264
protected String explainQueryToString(String query) throws IOException {
6365
return explainQueryToString(query, false);
6466
}

integ-test/src/test/java/org/opensearch/sql/util/MatcherUtils.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -411,7 +411,11 @@ public static void assertJsonEquals(String expected, String actual) {
411411
JsonParser.parseString(eliminatePid(actual)));
412412
}
413413

414-
/** Compare two JSON string are equals with ignoring the RelNode id in the Calcite plan. */
414+
/**
415+
* Compare two JSON string are equals with ignoring the RelNode id in the Calcite plan.
416+
* Deprecated, use {@link #assertYamlEqualsIgnoreId(String, String)}
417+
*/
418+
@Deprecated // use assertYamlEqualsIgnoreId instead
415419
public static void assertJsonEqualsIgnoreId(String expected, String actual) {
416420
assertJsonEquals(cleanUpId(expected), cleanUpId(actual));
417421
}
@@ -434,6 +438,7 @@ private static String eliminatePid(String s) {
434438
return s.replaceAll("pitId=[^,]+,", "pitId=*,");
435439
}
436440

441+
/** Compare two YAML strings are equals with ignoring the RelNode id in the Calcite plan. */
437442
public static void assertYamlEqualsIgnoreId(String expectedYaml, String actualYaml) {
438443
String cleanedYaml = cleanUpYaml(actualYaml);
439444
assertYamlEquals(expectedYaml, cleanedYaml);

integ-test/src/test/resources/big5/queries/composite_date_histogram_daily.ppl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,4 +31,5 @@
3131
*/
3232
source = big5
3333
| where `@timestamp` >= '2022-12-30 00:00:00' and `@timestamp` < '2023-01-07 12:00:00'
34-
| stats count() by span(`@timestamp`, 1d)
34+
| stats count() by span(`@timestamp`, 1d)
35+
| head 10

integ-test/src/test/resources/big5/queries/composite_terms.ppl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,4 +29,5 @@
2929
source = big5
3030
| where `@timestamp` >= '2023-01-02 00:00:00' and `@timestamp` < '2023-01-02 10:00:00'
3131
| stats count() by `process.name`, `cloud.region`
32-
| sort - `process.name`, + `cloud.region`
32+
| sort - `process.name`, + `cloud.region`
33+
| head 10

integ-test/src/test/resources/big5/queries/composite_terms_keyword.ppl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,4 +30,5 @@
3030
source = big5
3131
| where `@timestamp` >= '2023-01-02 00:00:00' and `@timestamp` < '2023-01-02 10:00:00'
3232
| stats count() by `process.name`, `cloud.region`, `aws.cloudwatch.log_stream`
33-
| sort - `process.name`, + `cloud.region`, + `aws.cloudwatch.log_stream`
33+
| sort - `process.name`, + `cloud.region`, + `aws.cloudwatch.log_stream`
34+
| head 10

integ-test/src/test/resources/big5/queries/optimized/composite_terms.ppl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,4 +29,5 @@
2929
source = big5
3030
| where `@timestamp` >= '2023-01-02 00:00:00' and `@timestamp` < '2023-01-02 10:00:00'
3131
| stats bucket_nullable = false count() by `process.name`, `cloud.region`
32-
| sort - `process.name`, + `cloud.region`
32+
| sort - `process.name`, + `cloud.region`
33+
| head 10

integ-test/src/test/resources/big5/queries/optimized/composite_terms_keyword.ppl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,4 +30,5 @@
3030
source = big5
3131
| where `@timestamp` >= '2023-01-02 00:00:00' and `@timestamp` < '2023-01-02 10:00:00'
3232
| stats bucket_nullable = false count() by `process.name`, `cloud.region`, `aws.cloudwatch.log_stream`
33-
| sort - `process.name`, + `cloud.region`, + `aws.cloudwatch.log_stream`
33+
| sort - `process.name`, + `cloud.region`, + `aws.cloudwatch.log_stream`
34+
| head 10

integ-test/src/test/resources/expectedOutput/calcite/agg_composite_date_range_push.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,4 @@ calcite:
88
LogicalProject(@timestamp=[$0], category=[$1], value=[$2], timestamp=[$3], _id=[$4], _index=[$5], _score=[$6], _maxscore=[$7], _sort=[$8], _routing=[$9], value_range=[CASE(<($2, 7000), 'small':VARCHAR, 'large':VARCHAR)])
99
CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]])
1010
physical: |
11-
CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[FILTER->IS NOT NULL($0), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 2},avg(value)=AVG($1)), PROJECT->[avg(value), span(@timestamp,1h), value_range], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"exists":{"field":"@timestamp","boost":1.0}},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"span(@timestamp,1h)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1h"}}}]},"aggregations":{"value_range":{"range":{"field":"value","ranges":[{"key":"small","to":7000.0},{"key":"large","from":7000.0}],"keyed":true},"aggregations":{"avg(value)":{"avg":{"field":"value"}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])
11+
CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[FILTER->IS NOT NULL($0), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 2},avg(value)=AVG($1)), PROJECT->[avg(value), span(@timestamp,1h), value_range], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"exists":{"field":"@timestamp","boost":1.0}},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"span(@timestamp,1h)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","calendar_interval":"1h"}}}]},"aggregations":{"value_range":{"range":{"field":"value","ranges":[{"key":"small","to":7000.0},{"key":"large","from":7000.0}],"keyed":true},"aggregations":{"avg(value)":{"avg":{"field":"value"}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])
Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
calcite:
22
logical: |
33
LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])
4-
LogicalProject(count()=[$1], span(`@timestamp`,1d)=[$0])
5-
LogicalAggregate(group=[{0}], count()=[COUNT()])
6-
LogicalProject(span(`@timestamp`,1d)=[SPAN($17, 1, 'd')])
7-
LogicalFilter(condition=[IS NOT NULL($17)])
8-
LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2022-12-30 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-07 12:00:00':VARCHAR)))])
9-
CalciteLogicalIndexScan(table=[[OpenSearch, big5]])
4+
LogicalSort(fetch=[10])
5+
LogicalProject(count()=[$1], span(`@timestamp`,1d)=[$0])
6+
LogicalAggregate(group=[{0}], count()=[COUNT()])
7+
LogicalProject(span(`@timestamp`,1d)=[SPAN($17, 1, 'd')])
8+
LogicalFilter(condition=[IS NOT NULL($17)])
9+
LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2022-12-30 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-07 12:00:00':VARCHAR)))])
10+
CalciteLogicalIndexScan(table=[[OpenSearch, big5]])
1011
physical: |
11-
CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[@timestamp], FILTER->SEARCH($0, Sarg[['2022-12-30 00:00:00':VARCHAR..'2023-01-07 12:00:00':VARCHAR); NULL AS FALSE]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(`@timestamp`,1d)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"range":{"@timestamp":{"from":"2022-12-30T00:00:00.000Z","to":"2023-01-07T12:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},{"exists":{"field":"@timestamp","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["@timestamp"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10000,"sources":[{"span(`@timestamp`,1d)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1d"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])
12+
CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[@timestamp], FILTER->SEARCH($0, Sarg[['2022-12-30 00:00:00':VARCHAR..'2023-01-07 12:00:00':VARCHAR); NULL AS FALSE]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(`@timestamp`,1d)], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2022-12-30T00:00:00.000Z","to":"2023-01-07T12:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["@timestamp"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10,"sources":[{"span(`@timestamp`,1d)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","calendar_interval":"1d"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])

0 commit comments

Comments
 (0)