From 62df8d2a51ef33ce4519a44fcc626981d3b739d0 Mon Sep 17 00:00:00 2001 From: Songkan Tang Date: Wed, 4 Mar 2026 16:46:13 +0800 Subject: [PATCH 1/8] Fix PushDownContext shallow copy bug Signed-off-by: Songkan Tang --- .../sql/calcite/remote/CalciteExplainIT.java | 11 + .../chart_timestamp_span_and_category.yaml | 2 +- ...lain_agg_consecutive_sorts_issue_5125.yaml | 11 + .../calcite/explain_agg_paginating_join1.yaml | 2 +- .../calcite/explain_agg_paginating_join3.yaml | 2 +- .../calcite/explain_agg_paginating_join4.yaml | 4 +- .../calcite/explain_agg_sort_on_measure1.yaml | 2 +- .../calcite/explain_agg_sort_on_measure2.yaml | 2 +- .../calcite/explain_agg_sort_on_measure3.yaml | 2 +- .../calcite/explain_agg_sort_on_measure4.yaml | 2 +- .../explain_agg_sort_on_measure_complex1.yaml | 2 +- .../explain_agg_sort_on_measure_complex2.yaml | 2 +- ...plain_agg_sort_on_measure_multi_terms.yaml | 2 +- ...gg_sort_on_measure_multi_terms_script.yaml | 2 +- .../explain_agg_sort_on_measure_script.yaml | 2 +- .../rest-api-spec/test/issues/5125.yml | 66 +++ .../storage/scan/CalciteLogicalIndexScan.java | 2 +- .../scan/context/AggPushDownAction.java | 471 ++++++++++++++---- .../storage/scan/context/PushDownContext.java | 20 +- 19 files changed, 484 insertions(+), 125 deletions(-) create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_agg_consecutive_sorts_issue_5125.yaml create mode 100644 integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5125.yml diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index 8e980d8973b..402c5e12f05 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -2766,4 +2766,15 @@ public void testNoMvWithEval() throws IOException { "Expected explain to contain both CONCAT and ARRAY_JOIN", result.toLowerCase().contains("concat") && result.toLowerCase().contains("array_join")); } + + @Test + public void testExplainConsecutiveSortsAfterAgg() throws IOException { + String expected = loadExpectedPlan("explain_agg_consecutive_sorts_issue_5125.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + String.format( + "source=%s | stats count() as c by gender | sort gender | sort - gender", + TEST_INDEX_BANK))); + } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/chart_timestamp_span_and_category.yaml b/integ-test/src/test/resources/expectedOutput/calcite/chart_timestamp_span_and_category.yaml index 9267e6faab1..610d0b3f11b 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/chart_timestamp_span_and_category.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/chart_timestamp_span_and_category.yaml @@ -28,4 +28,4 @@ calcite: EnumerableCalc(expr#0..2=[{inputs}], category=[$t0], $1=[$t2]) EnumerableWindow(window#0=[window(order by [1 DESC-nulls-last] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) EnumerableAggregate(group=[{0}], __grand_total__=[SUM($1)]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[FILTER->AND(IS NOT NULL($2), IS NOT NULL($1)), FILTER->IS NOT NULL($0), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 2},max(value)=MAX($1)), PROJECT->[category, max(value)]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"filter":[{"bool":{"must":[{"exists":{"field":"timestamp","boost":1.0}},{"exists":{"field":"value","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"category","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"category":{"terms":{"field":"category","missing_bucket":true,"missing_order":"first","order":"asc"}}},{"timestamp0":{"date_histogram":{"field":"timestamp","missing_bucket":false,"order":"asc","calendar_interval":"1w"}}}]},"aggregations":{"max(value)":{"max":{"field":"value"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[FILTER->AND(IS NOT NULL($2), IS NOT NULL($1)), FILTER->IS NOT NULL($0), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 2},max(value)=MAX($1)), PROJECT->[category, max(value)]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"filter":[{"bool":{"must":[{"exists":{"field":"timestamp","boost":1.0}},{"exists":{"field":"value","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"category","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"category":{"terms":{"field":"category","missing_bucket":true,"missing_order":"first","order":"asc"}}},{"timestamp0":{"date_histogram":{"field":"timestamp","missing_bucket":false,"order":"asc","calendar_interval":"1w"}}}]},"aggregations":{"max(value)":{"max":{"field":"value"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_consecutive_sorts_issue_5125.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_consecutive_sorts_issue_5125.yaml new file mode 100644 index 00000000000..349251ab4fc --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_consecutive_sorts_issue_5125.yaml @@ -0,0 +1,11 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$1], dir0=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalSort(sort0=[$1], dir0=[DESC-nulls-last]) + LogicalSort(sort0=[$1], dir0=[ASC-nulls-first]) + LogicalProject(c=[$1], gender=[$0]) + LogicalAggregate(group=[{0}], c=[COUNT()]) + LogicalProject(gender=[$4]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT()), PROJECT->[c, gender], SORT->[1 DESC LAST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"last","order":"desc"}}}]}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_join1.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_join1.yaml index ea76cdee61e..e47f3210546 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_join1.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_join1.yaml @@ -16,4 +16,4 @@ calcite: EnumerableLimit(fetch=[10000]) EnumerableMergeJoin(condition=[=($1, $3)], joinType=[inner]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT()), PROJECT->[c, state], SORT->[1]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":2,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"last","order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT()), PROJECT->[c, state], LIMIT->50000, SORT->[1]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":2,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"last","order":"asc"}}}]}}}}, requestedTotalSize=50000, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT()), PROJECT->[c, state], LIMIT->50000, SORT->[1]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":2,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"last","order":"asc"}}}]}}}}, requestedTotalSize=50000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_join3.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_join3.yaml index 1326030ea7e..9bda406291c 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_join3.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_join3.yaml @@ -15,5 +15,5 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableHashJoin(condition=[=($1, $2)], joinType=[semi]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT()), PROJECT->[c, state], LIMIT->50000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":2,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"last","order":"asc"}}}]}}}}, requestedTotalSize=50000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT()), PROJECT->[c, state], LIMIT->50000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":2,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]}}}}, requestedTotalSize=50000, pageSize=null, startFrom=0)]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0}), SORT->[0]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":2,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"last","order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_join4.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_join4.yaml index 36bf1245a2d..cc79c0dc2f4 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_join4.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_join4.yaml @@ -17,5 +17,5 @@ calcite: EnumerableLimit(fetch=[10000]) EnumerableCalc(expr#0..2=[{inputs}], c=[$t1], state=[$t2]) EnumerableHashJoin(condition=[=($0, $2)], joinType=[inner]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0}), LIMIT->10], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":2,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"last","order":"asc"}}}]}}}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT()), PROJECT->[c, state], LIMIT->50000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":2,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"last","order":"asc"}}}]}}}}, requestedTotalSize=50000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0}), LIMIT->10], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":2,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]}}}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT()), PROJECT->[c, state], LIMIT->50000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":2,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]}}}}, requestedTotalSize=50000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure1.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure1.yaml index 75389120405..c08c533bc60 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure1.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure1.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[IS NOT NULL($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), SORT_AGG_METRICS->[1 ASC FIRST], PROJECT->[count(), state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"state":{"terms":{"field":"state.keyword","size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"asc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), state], SORT_AGG_METRICS->[0 ASC FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"state":{"terms":{"field":"state.keyword","size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"asc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure2.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure2.yaml index be021c55e23..9c41efa9139 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure2.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure2.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[IS NOT NULL($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},sum=SUM($0)), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[sum, state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"state":{"terms":{"field":"state.keyword","size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"sum":"desc"},{"_key":"asc"}]},"aggregations":{"sum":{"sum":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},sum=SUM($0)), PROJECT->[sum, state], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"state":{"terms":{"field":"state.keyword","size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"sum":"desc"},{"_key":"asc"}]},"aggregations":{"sum":{"sum":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure3.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure3.yaml index e60bbe90fdc..b8fcb2e28df 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure3.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure3.yaml @@ -9,4 +9,4 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | EnumerableLimit(fetch=[10000]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},cnt=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[cnt, span(birthdate,1d)]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"span(birthdate,1d)":{"date_histogram":{"field":"birthdate","fixed_interval":"1d","offset":0,"order":[{"_count":"desc"},{"_key":"asc"}],"keyed":false,"min_doc_count":0}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},cnt=COUNT()), PROJECT->[cnt, span(birthdate,1d)], SORT_AGG_METRICS->[0 DESC LAST]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"span(birthdate,1d)":{"date_histogram":{"field":"birthdate","fixed_interval":"1d","offset":0,"order":[{"_count":"desc"},{"_key":"asc"}],"keyed":false,"min_doc_count":0}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure4.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure4.yaml index 57132615c41..f02b46a0938 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure4.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure4.yaml @@ -9,4 +9,4 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | EnumerableLimit(fetch=[10000]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},sum(balance)=SUM($0)), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[sum(balance), span(age,5)]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"span(age,5)":{"histogram":{"field":"age","interval":5.0,"offset":0.0,"order":[{"sum(balance)":"desc"},{"_key":"asc"}],"keyed":false,"min_doc_count":0},"aggregations":{"sum(balance)":{"sum":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},sum(balance)=SUM($0)), PROJECT->[sum(balance), span(age,5)], SORT_AGG_METRICS->[0 DESC LAST]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"span(age,5)":{"histogram":{"field":"age","interval":5.0,"offset":0.0,"order":[{"sum(balance)":"desc"},{"_key":"asc"}],"keyed":false,"min_doc_count":0},"aggregations":{"sum(balance)":{"sum":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex1.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex1.yaml index 3215115297a..cd0355241fe 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex1.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex1.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[IS NOT NULL($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={2},sum(balance)=SUM($0),c=COUNT(),dc(employer)=COUNT(DISTINCT $1)), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[sum(balance), c, dc(employer), state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"state":{"terms":{"field":"state.keyword","size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(balance)":{"sum":{"field":"balance"}},"dc(employer)":{"cardinality":{"field":"employer.keyword"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={2},sum(balance)=SUM($0),c=COUNT(),dc(employer)=COUNT(DISTINCT $1)), PROJECT->[sum(balance), c, dc(employer), state], SORT_AGG_METRICS->[1 DESC LAST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"state":{"terms":{"field":"state.keyword","size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(balance)":{"sum":{"field":"balance"}},"dc(employer)":{"cardinality":{"field":"employer.keyword"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex2.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex2.yaml index 211aa979ce0..59cd137ca59 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex2.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex2.yaml @@ -9,4 +9,4 @@ calcite: LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], new_state=[LOWER($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},sum(balance)=SUM($2),count()=COUNT(),d=COUNT(DISTINCT $3)), SORT_AGG_METRICS->[4 DESC LAST], PROJECT->[sum(balance), count(), d, gender, new_state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"gender|new_state":{"multi_terms":{"terms":[{"field":"gender.keyword"},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}}}],"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"d":"desc"},{"_key":"asc"}]},"aggregations":{"sum(balance)":{"sum":{"field":"balance"}},"d":{"cardinality":{"field":"employer.keyword"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},sum(balance)=SUM($2),count()=COUNT(),d=COUNT(DISTINCT $3)), PROJECT->[sum(balance), count(), d, gender, new_state], SORT_AGG_METRICS->[2 DESC LAST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"gender|new_state":{"multi_terms":{"terms":[{"field":"gender.keyword"},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}}}],"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"d":"desc"},{"_key":"asc"}]},"aggregations":{"sum(balance)":{"sum":{"field":"balance"}},"d":{"cardinality":{"field":"employer.keyword"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms.yaml index 4caf7759fc6..b584249d91a 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($7))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), SORT_AGG_METRICS->[2 ASC FIRST], PROJECT->[count(), gender, state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"gender|state":{"multi_terms":{"terms":[{"field":"gender.keyword"},{"field":"state.keyword"}],"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"asc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), gender, state], SORT_AGG_METRICS->[0 ASC FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"gender|state":{"multi_terms":{"terms":[{"field":"gender.keyword"},{"field":"state.keyword"}],"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"asc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms_script.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms_script.yaml index 13d8350c11f..44a51b2171d 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms_script.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms_script.yaml @@ -9,4 +9,4 @@ calcite: LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], new_gender=[LOWER($4)], new_state=[LOWER($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), SORT_AGG_METRICS->[2 ASC FIRST], PROJECT->[count(), new_gender, new_state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"new_gender|new_state":{"multi_terms":{"terms":[{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}}},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}}}],"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"asc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), new_gender, new_state], SORT_AGG_METRICS->[0 ASC FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"new_gender|new_state":{"multi_terms":{"terms":[{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}}},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}}}],"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"asc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_script.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_script.yaml index 7e010cba2ad..e24043592fe 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_script.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_script.yaml @@ -9,4 +9,4 @@ calcite: LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], new_state=[LOWER($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), SORT_AGG_METRICS->[1 ASC FIRST], PROJECT->[count(), new_state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"new_state":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"asc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), new_state], SORT_AGG_METRICS->[0 ASC FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"new_state":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"asc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5125.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5125.yml new file mode 100644 index 00000000000..dd0335f73d5 --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5125.yml @@ -0,0 +1,66 @@ +setup: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: true + - do: + indices.create: + index: issue5125 + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + gender: + type: keyword + - do: + bulk: + refresh: true + body: + - '{"index": {"_index": "issue5125", "_id": "1"}}' + - '{"gender": "F"}' + - '{"index": {"_index": "issue5125", "_id": "2"}}' + - '{"gender": "F"}' + - '{"index": {"_index": "issue5125", "_id": "3"}}' + - '{"gender": "F"}' + - '{"index": {"_index": "issue5125", "_id": "4"}}' + - '{"gender": "M"}' + - '{"index": {"_index": "issue5125", "_id": "5"}}' + - '{"gender": "M"}' + - '{"index": {"_index": "issue5125", "_id": "6"}}' + - '{"gender": "M"}' + - '{"index": {"_index": "issue5125", "_id": "7"}}' + - '{"gender": "M"}' + +--- +teardown: + - do: + indices.delete: + index: issue5125 + ignore_unavailable: true + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: false + +--- +"Issue 5125: consecutive sorts after agg should honor latest sort direction": + - skip: + features: + - headers + - allowed_warnings + - do: + allowed_warnings: + - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled' + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=issue5125 | stats count() as c by gender | sort gender | sort - gender + + - match: { total: 2 } + - match: { schema: [ { name: c, type: bigint }, { name: gender, type: string } ] } + - match: { datarows: [ [ 4, "M" ], [ 3, "F" ] ] } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java index dbe8306d4b2..248339e8bcf 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java @@ -433,7 +433,7 @@ public AbstractRelNode pushDownLimit(LogicalSort sort, Integer limit, Integer of // can update the aggregation builder boolean canUpdate = canReduceEstimatedRowsCount - || pushDownContext.getAggPushDownAction().pushDownLimitIntoBucketSize(totalSize); + || pushDownContext.getAggPushDownAction().canPushDownLimitIntoBucketSize(totalSize); if (!canUpdate && offset > 0) return null; CalciteLogicalIndexScan newScan = this.copyWithNewSchema(getRowType()); if (canUpdate) { diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggPushDownAction.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggPushDownAction.java index 7c15586d143..2acf79477ee 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggPushDownAction.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggPushDownAction.java @@ -12,6 +12,7 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.function.Consumer; import java.util.stream.Collectors; import lombok.EqualsAndHashCode; import lombok.Getter; @@ -27,6 +28,7 @@ import org.opensearch.search.aggregations.bucket.composite.HistogramValuesSourceBuilder; import org.opensearch.search.aggregations.bucket.composite.TermsValuesSourceBuilder; import org.opensearch.search.aggregations.bucket.histogram.DateHistogramAggregationBuilder; +import org.opensearch.search.aggregations.bucket.histogram.DateHistogramInterval; import org.opensearch.search.aggregations.bucket.histogram.HistogramAggregationBuilder; import org.opensearch.search.aggregations.bucket.missing.MissingOrder; import org.opensearch.search.aggregations.bucket.nested.NestedAggregationBuilder; @@ -64,6 +66,127 @@ public AggPushDownAction( this.bucketNames = bucketNames; } + private static AggregatorFactories.Builder copySubAggregations(AggregationBuilder source) { + AggregatorFactories.Builder copiedFactories = new AggregatorFactories.Builder(); + source.getSubAggregations().forEach(copiedFactories::addAggregator); + source.getPipelineAggregations().forEach(copiedFactories::addPipelineAggregator); + return copiedFactories; + } + + private static Map copyMetadataOrNull(AggregationBuilder source) { + Map metadata = source.getMetadata(); + return metadata == null || metadata.isEmpty() ? null : metadata; + } + + private static final class CompositeAggregationBuilderCopy extends CompositeAggregationBuilder { + private CompositeAggregationBuilderCopy(CompositeAggregationBuilder source) { + super(source, copySubAggregations(source), copyMetadataOrNull(source)); + } + } + + private static final class TermsAggregationBuilderCopy extends TermsAggregationBuilder { + private TermsAggregationBuilderCopy(TermsAggregationBuilder source) { + super(source, copySubAggregations(source), copyMetadataOrNull(source)); + } + } + + private static final class MultiTermsAggregationBuilderCopy extends MultiTermsAggregationBuilder { + private MultiTermsAggregationBuilderCopy(MultiTermsAggregationBuilder source) { + super(source, copySubAggregations(source), copyMetadataOrNull(source)); + } + } + + private static final class TopHitsAggregationBuilderCopy extends TopHitsAggregationBuilder { + private TopHitsAggregationBuilderCopy(TopHitsAggregationBuilder source) { + super(source, copySubAggregations(source), copyMetadataOrNull(source)); + } + } + + private static final class NestedAggregationBuilderCopy extends NestedAggregationBuilder { + private NestedAggregationBuilderCopy(NestedAggregationBuilder source) { + super(source, copySubAggregations(source), copyMetadataOrNull(source)); + } + } + + private static CompositeAggregationBuilder copyCompositeAggregationBuilder( + CompositeAggregationBuilder source) { + return new CompositeAggregationBuilderCopy(source); + } + + private static TermsAggregationBuilder copyTermsAggregationBuilder( + TermsAggregationBuilder source) { + return new TermsAggregationBuilderCopy(source); + } + + private static MultiTermsAggregationBuilder copyMultiTermsAggregationBuilder( + MultiTermsAggregationBuilder source) { + return new MultiTermsAggregationBuilderCopy(source); + } + + private static TopHitsAggregationBuilder copyTopHitsAggregationBuilder( + TopHitsAggregationBuilder source) { + return new TopHitsAggregationBuilderCopy(source); + } + + private static NestedAggregationBuilder copyNestedAggregationBuilder( + NestedAggregationBuilder source) { + return new NestedAggregationBuilderCopy(source); + } + + private static AggregationBuilder copyAggregationBuilder(AggregationBuilder builder) { + if (builder instanceof CompositeAggregationBuilder composite) { + return copyCompositeAggregationBuilder(composite); + } + if (builder instanceof TermsAggregationBuilder terms) { + return copyTermsAggregationBuilder(terms); + } + if (builder instanceof MultiTermsAggregationBuilder multiTerms) { + return copyMultiTermsAggregationBuilder(multiTerms); + } + if (builder instanceof TopHitsAggregationBuilder topHits) { + return copyTopHitsAggregationBuilder(topHits); + } + if (builder instanceof NestedAggregationBuilder nested) { + return copyNestedAggregationBuilder(nested); + } + return builder; + } + + private static AggregationBuilder unwrapNestedBuilder(AggregationBuilder rootBuilder) { + if (rootBuilder instanceof NestedAggregationBuilder nested + && !nested.getSubAggregations().isEmpty()) { + return nested.getSubAggregations().iterator().next(); + } + return rootBuilder; + } + + private void replaceRootBuilder( + AggregationBuilder originalRoot, AggregationBuilder newInnerBuilder) { + AggregationBuilder finalBuilder = newInnerBuilder; + if (originalRoot instanceof NestedAggregationBuilder nested) { + finalBuilder = + AggregationBuilders.nested(nested.getName(), nested.path()) + .subAggregation(newInnerBuilder); + } + builderAndParser = + Pair.of(Collections.singletonList(finalBuilder), builderAndParser.getRight()); + } + + /** + * Create a deep copy of this action. New lists are created for builders and bucketNames so that + * mutations (sort/limit pushdown) on the copy do not affect the original. + */ + public AggPushDownAction copy() { + List copiedBuilders = + builderAndParser.getLeft().stream() + .map(AggPushDownAction::copyAggregationBuilder) + .collect(Collectors.toCollection(ArrayList::new)); + return new AggPushDownAction( + Pair.of(copiedBuilders, builderAndParser.getRight()), + extendedTypeMapping, + new ArrayList<>(bucketNames)); + } + private static int getScriptCount(AggregationBuilder aggBuilder) { if (aggBuilder instanceof NestedAggregationBuilder) { aggBuilder = aggBuilder.getSubAggregations().iterator().next(); @@ -256,6 +379,36 @@ private TermsAggregationBuilder buildTermsAggregationBuilder( return termsBuilder; } + /** + * Build a new {@link TermsAggregationBuilder} by copying from an existing one with a new size. + * This keeps all existing terms options (e.g. include/exclude, collect mode, shard sizing). + */ + private static TermsAggregationBuilder buildTermsAggregationBuilder( + TermsAggregationBuilder source, int newSize) { + TermsAggregationBuilder termsBuilder = copyTermsAggregationBuilder(source); + termsBuilder.size(newSize); + return termsBuilder; + } + + private static void copyDateHistogramInterval( + DateHistogramValuesSourceBuilder source, + Consumer fixedIntervalSetter, + Consumer calendarIntervalSetter) { + try { + fixedIntervalSetter.accept(source.getIntervalAsFixed()); + return; + } catch (IllegalArgumentException | IllegalStateException ignored) { + // Fallback to calendar interval. + } + try { + calendarIntervalSetter.accept(source.getIntervalAsCalendar()); + return; + } catch (IllegalArgumentException | IllegalStateException ignored) { + throw new OpenSearchRequestBuilder.PushDownUnSupportedException( + "Cannot copy interval for date histogram bucket " + source.name()); + } + } + /** Build a {@link DateHistogramAggregationBuilder} by {@link DateHistogramValuesSourceBuilder} */ private DateHistogramAggregationBuilder buildDateHistogramAggregationBuilder( DateHistogramValuesSourceBuilder dateHisto, BucketOrder bucketOrder) { @@ -267,11 +420,8 @@ private DateHistogramAggregationBuilder buildDateHistogramAggregationBuilder( if (dateHisto.script() != null) { dateHistoBuilder.script(dateHisto.script()); } - try { - dateHistoBuilder.fixedInterval(dateHisto.getIntervalAsFixed()); - } catch (IllegalArgumentException e) { - dateHistoBuilder.calendarInterval(dateHisto.getIntervalAsCalendar()); - } + copyDateHistogramInterval( + dateHisto, dateHistoBuilder::fixedInterval, dateHistoBuilder::calendarInterval); if (dateHisto.userValuetypeHint() != null) { dateHistoBuilder.userValueTypeHint(dateHisto.userValuetypeHint()); } @@ -352,83 +502,138 @@ private AggregationBuilder attachSubAggregations( return aggregationBuilder; } + /** + * Create a copy of a {@link CompositeValuesSourceBuilder} to avoid in-place mutation of shared + * bucket objects across different PushDownContext instances. + */ + @SuppressWarnings("unchecked") + private static CompositeValuesSourceBuilder copyCompositeBucket( + CompositeValuesSourceBuilder bucket) { + CompositeValuesSourceBuilder copy; + if (bucket instanceof TermsValuesSourceBuilder terms) { + TermsValuesSourceBuilder termsCopy = new TermsValuesSourceBuilder(terms.name()); + if (terms.field() != null) termsCopy.field(terms.field()); + if (terms.script() != null) termsCopy.script(terms.script()); + if (terms.userValuetypeHint() != null) termsCopy.userValuetypeHint(terms.userValuetypeHint()); + if (terms.format() != null) termsCopy.format(terms.format()); + copy = termsCopy; + } else if (bucket instanceof DateHistogramValuesSourceBuilder dateHisto) { + DateHistogramValuesSourceBuilder dhCopy = + new DateHistogramValuesSourceBuilder(dateHisto.name()); + if (dateHisto.field() != null) dhCopy.field(dateHisto.field()); + if (dateHisto.script() != null) dhCopy.script(dateHisto.script()); + if (dateHisto.userValuetypeHint() != null) + dhCopy.userValuetypeHint(dateHisto.userValuetypeHint()); + copyDateHistogramInterval(dateHisto, dhCopy::fixedInterval, dhCopy::calendarInterval); + if (dateHisto.timeZone() != null) dhCopy.timeZone(dateHisto.timeZone()); + if (dateHisto.offset() != 0) dhCopy.offset(dateHisto.offset()); + if (dateHisto.format() != null) dhCopy.format(dateHisto.format()); + copy = dhCopy; + } else if (bucket instanceof HistogramValuesSourceBuilder histo) { + HistogramValuesSourceBuilder hCopy = new HistogramValuesSourceBuilder(histo.name()); + if (histo.field() != null) hCopy.field(histo.field()); + if (histo.script() != null) hCopy.script(histo.script()); + if (histo.userValuetypeHint() != null) hCopy.userValuetypeHint(histo.userValuetypeHint()); + hCopy.interval(histo.interval()); + if (histo.format() != null) hCopy.format(histo.format()); + copy = hCopy; + } else { + throw new OpenSearchRequestBuilder.PushDownUnSupportedException( + "Unsupported CompositeValuesSourceBuilder: " + bucket.getClass()); + } + copy.missingBucket(bucket.missingBucket()); + copy.missingOrder(bucket.missingOrder()); + copy.order(bucket.order()); + return copy; + } + public void pushDownSortIntoAggBucket( List collations, List fieldNames) { // aggregationBuilder.getLeft() could be empty when count agg optimization works if (builderAndParser.getLeft().isEmpty()) return; AggregationBuilder original = builderAndParser.getLeft().getFirst(); - AggregationBuilder builder; - if (original instanceof NestedAggregationBuilder) { - builder = original.getSubAggregations().iterator().next(); - } else { - builder = original; - } - List selected = new ArrayList<>(collations.size()); + AggregationBuilder builder = unwrapNestedBuilder(original); if (builder instanceof CompositeAggregationBuilder compositeAggBuilder) { - // It will always use a single CompositeAggregationBuilder for the aggregation with GroupBy - // See {@link AggregateAnalyzer} - List> buckets = compositeAggBuilder.sources(); - List> newBuckets = new ArrayList<>(buckets.size()); - List newBucketNames = new ArrayList<>(buckets.size()); - // Have to put the collation required buckets first, then the rest of buckets. - collations.forEach( - collation -> { - /* - Must find the bucket by field name because: - 1. The sequence of buckets may have changed after sort push-down. - 2. The schema of scan operator may be inconsistent with the sequence of buckets - after project push-down. - */ - String bucketName = fieldNames.get(collation.getFieldIndex()); - CompositeValuesSourceBuilder bucket = buckets.get(bucketNames.indexOf(bucketName)); - RelFieldCollation.Direction direction = collation.getDirection(); - RelFieldCollation.NullDirection nullDirection = collation.nullDirection; - SortOrder order = - RelFieldCollation.Direction.DESCENDING.equals(direction) - ? SortOrder.DESC - : SortOrder.ASC; - if (bucket.missingBucket()) { - MissingOrder missingOrder = - switch (nullDirection) { - case FIRST -> MissingOrder.FIRST; - case LAST -> MissingOrder.LAST; - default -> MissingOrder.DEFAULT; - }; - bucket.missingOrder(missingOrder); - } - newBuckets.add(bucket.order(order)); - newBucketNames.add(bucketName); - selected.add(bucketName); - }); - buckets.stream() - .map(CompositeValuesSourceBuilder::name) - .filter(name -> !selected.contains(name)) - .forEach( - name -> { - newBuckets.add(buckets.get(bucketNames.indexOf(name))); - newBucketNames.add(name); - }); - AggregatorFactories.Builder newAggBuilder = new AggregatorFactories.Builder(); - compositeAggBuilder.getSubAggregations().forEach(newAggBuilder::addAggregator); - AggregationBuilder finalBuilder = - AggregationBuilders.composite("composite_buckets", newBuckets) - .subAggregations(newAggBuilder) - .size(compositeAggBuilder.size()); - if (original instanceof NestedAggregationBuilder nested) { - finalBuilder = - AggregationBuilders.nested(nested.getName(), nested.path()) - .subAggregation(finalBuilder); - } - builderAndParser = - Pair.of(Collections.singletonList(finalBuilder), builderAndParser.getRight()); - bucketNames = newBucketNames; + pushDownSortIntoCompositeBucket(original, compositeAggBuilder, collations, fieldNames); + return; } if (builder instanceof TermsAggregationBuilder termsAggBuilder) { - termsAggBuilder.order(BucketOrder.key(!collations.getFirst().getDirection().isDescending())); + pushDownSortIntoTermsBucket(original, termsAggBuilder, collations); } // TODO for MultiTermsAggregationBuilder } + private void pushDownSortIntoCompositeBucket( + AggregationBuilder original, + CompositeAggregationBuilder compositeAggBuilder, + List collations, + List fieldNames) { + // It will always use a single CompositeAggregationBuilder for the aggregation with GroupBy. + // See {@link AggregateAnalyzer} + List> buckets = compositeAggBuilder.sources(); + List> newBuckets = new ArrayList<>(buckets.size()); + List newBucketNames = new ArrayList<>(buckets.size()); + List selected = new ArrayList<>(collations.size()); + + // Have to put the collation required buckets first, then the rest of buckets. + collations.forEach( + collation -> { + /* + Must find the bucket by field name because: + 1. The sequence of buckets may have changed after sort push-down. + 2. The schema of scan operator may be inconsistent with the sequence of buckets + after project push-down. + */ + String bucketName = fieldNames.get(collation.getFieldIndex()); + CompositeValuesSourceBuilder bucket = buckets.get(bucketNames.indexOf(bucketName)); + RelFieldCollation.Direction direction = collation.getDirection(); + RelFieldCollation.NullDirection nullDirection = collation.nullDirection; + SortOrder order = + RelFieldCollation.Direction.DESCENDING.equals(direction) + ? SortOrder.DESC + : SortOrder.ASC; + CompositeValuesSourceBuilder newBucket = copyCompositeBucket(bucket); + if (newBucket.missingBucket()) { + MissingOrder missingOrder = + switch (nullDirection) { + case FIRST -> MissingOrder.FIRST; + case LAST -> MissingOrder.LAST; + default -> MissingOrder.DEFAULT; + }; + newBucket.missingOrder(missingOrder); + } + newBuckets.add(newBucket.order(order)); + newBucketNames.add(bucketName); + selected.add(bucketName); + }); + + buckets.stream() + .map(CompositeValuesSourceBuilder::name) + .filter(name -> !selected.contains(name)) + .forEach( + name -> { + newBuckets.add(buckets.get(bucketNames.indexOf(name))); + newBucketNames.add(name); + }); + + AggregationBuilder finalBuilder = + AggregationBuilders.composite(compositeAggBuilder.getName(), newBuckets) + .subAggregations(copySubAggregations(compositeAggBuilder)) + .size(compositeAggBuilder.size()); + replaceRootBuilder(original, finalBuilder); + bucketNames = newBucketNames; + } + + private void pushDownSortIntoTermsBucket( + AggregationBuilder original, + TermsAggregationBuilder termsAggBuilder, + List collations) { + TermsAggregationBuilder newTermsBuilder = + buildTermsAggregationBuilder(termsAggBuilder, termsAggBuilder.size()); + newTermsBuilder.order(BucketOrder.key(!collations.getFirst().getDirection().isDescending())); + replaceRootBuilder(original, newTermsBuilder); + } + public boolean isCompositeAggregation() { return builderAndParser.getLeft().stream() .anyMatch( @@ -439,56 +644,108 @@ public boolean isCompositeAggregation() { instanceof CompositeAggregationBuilder)); } - /** - * Check if the limit can be pushed down into aggregation bucket when the limit size is less than - * bucket number. - */ - public boolean pushDownLimitIntoBucketSize(Integer size) { - // aggregationBuilder.getLeft() could be empty when count agg optimization works - if (builderAndParser.getLeft().isEmpty()) return false; - AggregationBuilder builder = builderAndParser.getLeft().getFirst(); - if (builder instanceof NestedAggregationBuilder) { - builder = builder.getSubAggregations().iterator().next(); + private static CompositeAggregationBuilder copyAndResizeCompositeAggregationBuilder( + CompositeAggregationBuilder source, int size) { + CompositeAggregationBuilder copy = copyCompositeAggregationBuilder(source); + copy.size(size); + return copy; + } + + private static MultiTermsAggregationBuilder copyAndResizeMultiTermsAggregationBuilder( + MultiTermsAggregationBuilder source, int size) { + MultiTermsAggregationBuilder copy = copyMultiTermsAggregationBuilder(source); + copy.size(size); + return copy; + } + + private static TopHitsAggregationBuilder copyAndResizeTopHitsAggregationBuilder( + TopHitsAggregationBuilder source, int size) { + TopHitsAggregationBuilder copy = copyTopHitsAggregationBuilder(source); + copy.size(size); + return copy; + } + + private static Integer getBucketSize(AggregationBuilder builder) { + if (builder instanceof CompositeAggregationBuilder compositeAggBuilder) { + return compositeAggBuilder.size(); } + if (builder instanceof TermsAggregationBuilder termsAggBuilder) { + return termsAggBuilder.size(); + } + if (builder instanceof MultiTermsAggregationBuilder multiTermsAggBuilder) { + return multiTermsAggBuilder.size(); + } + if (builder instanceof TopHitsAggregationBuilder topHitsAggBuilder) { + return topHitsAggBuilder.size(); + } + return null; + } + + private static AggregationBuilder copyAndResizeBucketBuilder( + AggregationBuilder builder, int size) { if (builder instanceof CompositeAggregationBuilder compositeAggBuilder) { - if (size < compositeAggBuilder.size()) { - compositeAggBuilder.size(size); - return true; - } else { - return false; - } + return copyAndResizeCompositeAggregationBuilder(compositeAggBuilder, size); } if (builder instanceof TermsAggregationBuilder termsAggBuilder) { - if (size < termsAggBuilder.size()) { - termsAggBuilder.size(size); - return true; - } else { - return false; - } + return buildTermsAggregationBuilder(termsAggBuilder, size); } if (builder instanceof MultiTermsAggregationBuilder multiTermsAggBuilder) { - if (size < multiTermsAggBuilder.size()) { - multiTermsAggBuilder.size(size); - return true; - } else { - return false; - } + return copyAndResizeMultiTermsAggregationBuilder(multiTermsAggBuilder, size); } if (builder instanceof TopHitsAggregationBuilder topHitsAggBuilder) { - if (size < topHitsAggBuilder.size()) { - topHitsAggBuilder.size(size); - return true; - } else { - return false; - } + return copyAndResizeTopHitsAggregationBuilder(topHitsAggBuilder, size); + } + throw new IllegalStateException( + "Not a resizable bucket aggregation builder: " + builder.getClass().getSimpleName()); + } + + private AggregationBuilder resizeAggregationForLimit(AggregationBuilder builder, int size) { + Integer bucketSize = getBucketSize(builder); + if (bucketSize != null) { + return size < bucketSize ? copyAndResizeBucketBuilder(builder, size) : null; + } + if (builder instanceof ValuesSourceAggregationBuilder.LeafOnly) { + // all metric aggregations generate one row and are effectively already limited. + return builder; + } + throw new OpenSearchRequestBuilder.PushDownUnSupportedException( + "Unknown aggregation builder " + builder.getClass().getSimpleName()); + } + + /** + * Read-only probe: check if the limit can be pushed down into aggregation bucket without + * modifying any builder state. + */ + public boolean canPushDownLimitIntoBucketSize(Integer size) { + if (builderAndParser.getLeft().isEmpty()) return false; + AggregationBuilder builder = unwrapNestedBuilder(builderAndParser.getLeft().getFirst()); + Integer bucketSize = getBucketSize(builder); + if (bucketSize != null) { + return size < bucketSize; } - // now we only have Composite, Terms and MultiTerms bucket aggregations, - // add code here when we could support more in the future. if (builder instanceof ValuesSourceAggregationBuilder.LeafOnly) { - // Note: all metric aggregations will be treated as pushed since it generates only one row. return true; } throw new OpenSearchRequestBuilder.PushDownUnSupportedException( "Unknown aggregation builder " + builder.getClass().getSimpleName()); } + + /** + * Check if the limit can be pushed down into aggregation bucket when the limit size is less than + * bucket number. + */ + public boolean pushDownLimitIntoBucketSize(Integer size) { + // aggregationBuilder.getLeft() could be empty when count agg optimization works + if (builderAndParser.getLeft().isEmpty()) return false; + AggregationBuilder original = builderAndParser.getLeft().getFirst(); + AggregationBuilder builder = unwrapNestedBuilder(original); + AggregationBuilder resizedBuilder = resizeAggregationForLimit(builder, size); + if (resizedBuilder == null) { + return false; + } + if (resizedBuilder != builder) { + replaceRootBuilder(original, resizedBuilder); + } + return true; + } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java index 2d236207c10..fb95a11414e 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java @@ -49,7 +49,9 @@ public PushDownContext(OpenSearchIndex osIndex) { @Override public PushDownContext clone() { PushDownContext newContext = new PushDownContext(osIndex); - newContext.addAll(this); + for (PushDownOperation operation : this) { + newContext.add(maybeCopyAggOperation(operation)); + } return newContext; } @@ -62,7 +64,7 @@ public PushDownContext cloneWithoutSort() { PushDownContext newContext = new PushDownContext(osIndex); for (PushDownOperation action : this) { if (action.type() != PushDownType.SORT && action.type() != PushDownType.SORT_EXPR) { - newContext.add(action); + newContext.add(maybeCopyAggOperation(action)); } } return newContext; @@ -108,11 +110,23 @@ public PushDownContext cloneForAggregate(Aggregate aggregate, @Nullable Project continue; } } - newContext.add(operation); + newContext.add(maybeCopyAggOperation(operation)); } return newContext; } + /** + * Deep-copy AGGREGATION operations so that the cloned context gets its own AggPushDownAction + * instance. Other operation types are immutable lambdas and can be shared safely. + */ + private PushDownOperation maybeCopyAggOperation(PushDownOperation operation) { + if (operation.type() == PushDownType.AGGREGATION + && operation.action() instanceof AggPushDownAction aggAction) { + return new PushDownOperation(operation.type(), operation.digest(), aggAction.copy()); + } + return operation; + } + @NotNull @Override public Iterator iterator() { From 21f894095a37e2a83fe4f521cc9f2a232dc72ab5 Mon Sep 17 00:00:00 2001 From: Songkan Tang Date: Fri, 6 Mar 2026 14:38:40 +0800 Subject: [PATCH 2/8] Fix test issues Signed-off-by: Songkan Tang --- .../sql/calcite/remote/CalciteExplainIT.java | 3 +- .../calcite/big5/keyword_terms.yaml | 2 +- .../big5/keyword_terms_low_cardinality.yaml | 2 +- .../calcite/big5/multi_terms_keyword.yaml | 2 +- .../calcite/clickbench/q10.yaml | 2 +- .../calcite/clickbench/q11.yaml | 2 +- .../calcite/clickbench/q12.yaml | 2 +- .../calcite/clickbench/q13.yaml | 2 +- .../calcite/clickbench/q14.yaml | 2 +- .../calcite/clickbench/q15.yaml | 2 +- .../calcite/clickbench/q16.yaml | 2 +- .../calcite/clickbench/q17.yaml | 2 +- .../calcite/clickbench/q19.yaml | 2 +- .../calcite/clickbench/q22.yaml | 2 +- .../calcite/clickbench/q23.yaml | 2 +- .../calcite/clickbench/q31.yaml | 2 +- .../calcite/clickbench/q32.yaml | 2 +- .../calcite/clickbench/q33.yaml | 2 +- .../calcite/clickbench/q34.yaml | 2 +- .../calcite/clickbench/q37.yaml | 2 +- .../calcite/clickbench/q38.yaml | 2 +- .../calcite/clickbench/q39.yaml | 2 +- .../calcite/clickbench/q40.yaml | 2 +- .../calcite/clickbench/q41.yaml | 2 +- .../calcite/clickbench/q42.yaml | 2 +- .../expectedOutput/calcite/clickbench/q8.yaml | 2 +- .../expectedOutput/calcite/clickbench/q9.yaml | 2 +- .../calcite/explain_agg_sort_on_measure3.yaml | 2 +- .../calcite/explain_agg_sort_on_measure4.yaml | 2 +- .../scan/context/AggPushDownAction.java | 96 +++++++++++++++---- 30 files changed, 105 insertions(+), 50 deletions(-) diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index 402c5e12f05..be4535ce7b3 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -2768,7 +2768,8 @@ public void testNoMvWithEval() throws IOException { } @Test - public void testExplainConsecutiveSortsAfterAgg() throws IOException { + public void testExplainConsecutiveSortsAfterAggIssue5125() throws IOException { + enabledOnlyWhenPushdownIsEnabled(); String expected = loadExpectedPlan("explain_agg_consecutive_sorts_issue_5125.yaml"); assertYamlEqualsIgnoreId( expected, diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/keyword_terms.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/keyword_terms.yaml index 7718e89b02f..a73f4f508d2 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/keyword_terms.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/keyword_terms.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[IS NOT NULL($34)]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},station=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[station, aws.cloudwatch.log_stream], LIMIT->500, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"aws.cloudwatch.log_stream":{"terms":{"field":"aws.cloudwatch.log_stream","size":500,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},station=COUNT()), PROJECT->[station, aws.cloudwatch.log_stream], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->500, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"aws.cloudwatch.log_stream":{"terms":{"field":"aws.cloudwatch.log_stream","size":500,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/keyword_terms_low_cardinality.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/keyword_terms_low_cardinality.yaml index 2ce4f996236..b0808ca93f4 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/keyword_terms_low_cardinality.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/keyword_terms_low_cardinality.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[IS NOT NULL($34)]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},country=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[country, aws.cloudwatch.log_stream], LIMIT->50, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"aws.cloudwatch.log_stream":{"terms":{"field":"aws.cloudwatch.log_stream","size":50,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},country=COUNT()), PROJECT->[country, aws.cloudwatch.log_stream], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->50, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"aws.cloudwatch.log_stream":{"terms":{"field":"aws.cloudwatch.log_stream","size":50,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/multi_terms_keyword.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/multi_terms_keyword.yaml index b0c896f61fb..c277bb44fb8 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/multi_terms_keyword.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/multi_terms_keyword.yaml @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-05 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-05 05:00:00':VARCHAR)))]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[FILTER->AND(SEARCH($2, Sarg[['2023-01-05 00:00:00':VARCHAR..'2023-01-05 05:00:00':VARCHAR)]:VARCHAR), IS NOT NULL($0), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[count(), process.name, cloud.region], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"range":{"@timestamp":{"from":"2023-01-05T00:00:00.000Z","to":"2023-01-05T05:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},{"exists":{"field":"process.name","boost":1.0}},{"exists":{"field":"cloud.region","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"process.name|cloud.region":{"multi_terms":{"terms":[{"field":"process.name"},{"field":"cloud.region"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[FILTER->AND(SEARCH($2, Sarg[['2023-01-05 00:00:00':VARCHAR..'2023-01-05 05:00:00':VARCHAR)]:VARCHAR), IS NOT NULL($0), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), process.name, cloud.region], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"range":{"@timestamp":{"from":"2023-01-05T00:00:00.000Z","to":"2023-01-05T05:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},{"exists":{"field":"process.name","boost":1.0}},{"exists":{"field":"cloud.region","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"process.name|cloud.region":{"multi_terms":{"terms":[{"field":"process.name"},{"field":"cloud.region"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q10.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q10.yaml index ae4cade06e5..f900b2ccbec 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q10.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q10.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[IS NOT NULL($68)]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},sum(AdvEngineID)=SUM($0),c=COUNT(),avg(ResolutionWidth)=AVG($2),dc(UserID)=COUNT(DISTINCT $3)), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[sum(AdvEngineID), c, avg(ResolutionWidth), dc(UserID), RegionID], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"RegionID":{"terms":{"field":"RegionID","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(AdvEngineID)":{"sum":{"field":"AdvEngineID"}},"avg(ResolutionWidth)":{"avg":{"field":"ResolutionWidth"}},"dc(UserID)":{"cardinality":{"field":"UserID"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},sum(AdvEngineID)=SUM($0),c=COUNT(),avg(ResolutionWidth)=AVG($2),dc(UserID)=COUNT(DISTINCT $3)), PROJECT->[sum(AdvEngineID), c, avg(ResolutionWidth), dc(UserID), RegionID], SORT_AGG_METRICS->[1 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"RegionID":{"terms":{"field":"RegionID","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(AdvEngineID)":{"sum":{"field":"AdvEngineID"}},"avg(ResolutionWidth)":{"avg":{"field":"ResolutionWidth"}},"dc(UserID)":{"cardinality":{"field":"UserID"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q11.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q11.yaml index 7a8da847554..aa43e743192 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q11.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q11.yaml @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[<>($31, '')]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER-><>($0, ''), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},u=COUNT(DISTINCT $1)), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[u, MobilePhoneModel], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"MobilePhoneModel","boost":1.0}}],"must_not":[{"term":{"MobilePhoneModel":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"MobilePhoneModel":{"terms":{"field":"MobilePhoneModel","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"u":"desc"},{"_key":"asc"}]},"aggregations":{"u":{"cardinality":{"field":"UserID"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER-><>($0, ''), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},u=COUNT(DISTINCT $1)), PROJECT->[u, MobilePhoneModel], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"MobilePhoneModel","boost":1.0}}],"must_not":[{"term":{"MobilePhoneModel":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"MobilePhoneModel":{"terms":{"field":"MobilePhoneModel","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"u":"desc"},{"_key":"asc"}]},"aggregations":{"u":{"cardinality":{"field":"UserID"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q12.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q12.yaml index be24923eeea..a4691fd7e38 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q12.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q12.yaml @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[<>($31, '')]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(<>($0, ''), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},u=COUNT(DISTINCT $2)), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[u, MobilePhone, MobilePhoneModel], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"bool":{"must":[{"exists":{"field":"MobilePhoneModel","boost":1.0}}],"must_not":[{"term":{"MobilePhoneModel":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"MobilePhone","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"MobilePhoneModel|MobilePhone":{"multi_terms":{"terms":[{"field":"MobilePhoneModel"},{"field":"MobilePhone"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"u":"desc"},{"_key":"asc"}]},"aggregations":{"u":{"cardinality":{"field":"UserID"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(<>($0, ''), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},u=COUNT(DISTINCT $2)), PROJECT->[u, MobilePhone, MobilePhoneModel], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"bool":{"must":[{"exists":{"field":"MobilePhoneModel","boost":1.0}}],"must_not":[{"term":{"MobilePhoneModel":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"MobilePhone","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"MobilePhoneModel|MobilePhone":{"multi_terms":{"terms":[{"field":"MobilePhoneModel"},{"field":"MobilePhone"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"u":"desc"},{"_key":"asc"}]},"aggregations":{"u":{"cardinality":{"field":"UserID"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q13.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q13.yaml index dd4f502bbde..0110be323ee 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q13.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q13.yaml @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[<>($63, '')]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER-><>($0, ''), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[c, SearchPhrase], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"SearchPhrase":{"terms":{"field":"SearchPhrase","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER-><>($0, ''), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT()), PROJECT->[c, SearchPhrase], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"SearchPhrase":{"terms":{"field":"SearchPhrase","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q14.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q14.yaml index d5c34e6a7f2..7dbe85eb016 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q14.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q14.yaml @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[<>($63, '')]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER-><>($0, ''), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},u=COUNT(DISTINCT $1)), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[u, SearchPhrase], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"SearchPhrase":{"terms":{"field":"SearchPhrase","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"u":"desc"},{"_key":"asc"}]},"aggregations":{"u":{"cardinality":{"field":"UserID"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER-><>($0, ''), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},u=COUNT(DISTINCT $1)), PROJECT->[u, SearchPhrase], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"SearchPhrase":{"terms":{"field":"SearchPhrase","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"u":"desc"},{"_key":"asc"}]},"aggregations":{"u":{"cardinality":{"field":"UserID"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q15.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q15.yaml index b13cc1a62ca..1c275e53363 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q15.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q15.yaml @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[<>($63, '')]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(<>($0, ''), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},c=COUNT()), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[c, SearchEngineID, SearchPhrase], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"SearchEngineID","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"SearchPhrase|SearchEngineID":{"multi_terms":{"terms":[{"field":"SearchPhrase"},{"field":"SearchEngineID"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(<>($0, ''), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},c=COUNT()), PROJECT->[c, SearchEngineID, SearchPhrase], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"SearchEngineID","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"SearchPhrase|SearchEngineID":{"multi_terms":{"terms":[{"field":"SearchPhrase"},{"field":"SearchEngineID"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q16.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q16.yaml index 3f0fb7644a9..f18539393d7 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q16.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q16.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[IS NOT NULL($84)]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[count(), UserID], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"UserID":{"terms":{"field":"UserID","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), UserID], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"UserID":{"terms":{"field":"UserID","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q17.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q17.yaml index d9efea667c2..54dfa746d4d 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q17.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q17.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[AND(IS NOT NULL($84), IS NOT NULL($63))]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[count(), UserID, SearchPhrase], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"SearchPhrase|UserID":{"multi_terms":{"terms":[{"field":"SearchPhrase"},{"field":"UserID"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), UserID, SearchPhrase], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"SearchPhrase|UserID":{"multi_terms":{"terms":[{"field":"SearchPhrase"},{"field":"UserID"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q19.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q19.yaml index c4005b2ea4f..74f1ebab575 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q19.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q19.yaml @@ -9,4 +9,4 @@ calcite: LogicalProject(EventDate=[$0], URLRegionID=[$1], HasGCLID=[$2], Income=[$3], Interests=[$4], Robotness=[$5], BrowserLanguage=[$6], CounterClass=[$7], BrowserCountry=[$8], OriginalURL=[$9], ClientTimeZone=[$10], RefererHash=[$11], TraficSourceID=[$12], HitColor=[$13], RefererRegionID=[$14], URLCategoryID=[$15], LocalEventTime=[$16], EventTime=[$17], UTMTerm=[$18], AdvEngineID=[$19], UserAgentMinor=[$20], UserAgentMajor=[$21], RemoteIP=[$22], Sex=[$23], JavaEnable=[$24], URLHash=[$25], URL=[$26], ParamOrderID=[$27], OpenstatSourceID=[$28], HTTPError=[$29], SilverlightVersion3=[$30], MobilePhoneModel=[$31], SilverlightVersion4=[$32], SilverlightVersion1=[$33], SilverlightVersion2=[$34], IsDownload=[$35], IsParameter=[$36], CLID=[$37], FlashMajor=[$38], FlashMinor=[$39], UTMMedium=[$40], WatchID=[$41], DontCountHits=[$42], CookieEnable=[$43], HID=[$44], SocialAction=[$45], WindowName=[$46], ConnectTiming=[$47], PageCharset=[$48], IsLink=[$49], IsArtifical=[$50], JavascriptEnable=[$51], ClientEventTime=[$52], DNSTiming=[$53], CodeVersion=[$54], ResponseEndTiming=[$55], FUniqID=[$56], WindowClientHeight=[$57], OpenstatServiceName=[$58], UTMContent=[$59], HistoryLength=[$60], IsOldCounter=[$61], MobilePhone=[$62], SearchPhrase=[$63], FlashMinor2=[$64], SearchEngineID=[$65], IsEvent=[$66], UTMSource=[$67], RegionID=[$68], OpenstatAdID=[$69], UTMCampaign=[$70], GoodEvent=[$71], IsRefresh=[$72], ParamCurrency=[$73], Params=[$74], ResolutionHeight=[$75], ClientIP=[$76], FromTag=[$77], ParamCurrencyID=[$78], ResponseStartTiming=[$79], ResolutionWidth=[$80], SendTiming=[$81], RefererCategoryID=[$82], OpenstatCampaignID=[$83], UserID=[$84], WithHash=[$85], UserAgent=[$86], ParamPrice=[$87], ResolutionDepth=[$88], IsMobile=[$89], Age=[$90], SocialSourceNetworkID=[$91], OpenerName=[$92], OS=[$93], IsNotBounce=[$94], Referer=[$95], NetMinor=[$96], Title=[$97], NetMajor=[$98], IPNetworkID=[$99], FetchTiming=[$100], SocialNetwork=[$101], SocialSourcePage=[$102], CounterID=[$103], WindowClientWidth=[$104], _id=[$105], _index=[$106], _score=[$107], _maxscore=[$108], _sort=[$109], _routing=[$110], m=[EXTRACT('minute':VARCHAR, $17)]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1, 2},count()=COUNT()), SORT_AGG_METRICS->[3 DESC LAST], PROJECT->[count(), UserID, m, SearchPhrase], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"UserID|m|SearchPhrase":{"multi_terms":{"terms":[{"field":"UserID"},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCZXsKICAib3AiOiB7CiAgICAibmFtZSI6ICJFWFRSQUNUIiwKICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICJzeW50YXgiOiAiRlVOQ1RJT04iCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMSwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInVkdCI6ICJFWFBSX1RJTUVTVEFNUCIsCiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0sCiAgImNsYXNzIjogIm9yZy5vcGVuc2VhcmNoLnNxbC5leHByZXNzaW9uLmZ1bmN0aW9uLlVzZXJEZWZpbmVkRnVuY3Rpb25CdWlsZGVyJDEiLAogICJ0eXBlIjogewogICAgInR5cGUiOiAiQklHSU5UIiwKICAgICJudWxsYWJsZSI6IHRydWUKICB9LAogICJkZXRlcm1pbmlzdGljIjogdHJ1ZSwKICAiZHluYW1pYyI6IGZhbHNlCn0=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[2,0],"DIGESTS":["minute","EventTime"]}},"value_type":"long"},{"field":"SearchPhrase"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1, 2},count()=COUNT()), PROJECT->[count(), UserID, m, SearchPhrase], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"UserID|m|SearchPhrase":{"multi_terms":{"terms":[{"field":"UserID"},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCZXsKICAib3AiOiB7CiAgICAibmFtZSI6ICJFWFRSQUNUIiwKICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICJzeW50YXgiOiAiRlVOQ1RJT04iCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMSwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInVkdCI6ICJFWFBSX1RJTUVTVEFNUCIsCiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0sCiAgImNsYXNzIjogIm9yZy5vcGVuc2VhcmNoLnNxbC5leHByZXNzaW9uLmZ1bmN0aW9uLlVzZXJEZWZpbmVkRnVuY3Rpb25CdWlsZGVyJDEiLAogICJ0eXBlIjogewogICAgInR5cGUiOiAiQklHSU5UIiwKICAgICJudWxsYWJsZSI6IHRydWUKICB9LAogICJkZXRlcm1pbmlzdGljIjogdHJ1ZSwKICAiZHluYW1pYyI6IGZhbHNlCn0=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[2,0],"DIGESTS":["minute","EventTime"]}},"value_type":"long"},{"field":"SearchPhrase"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q22.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q22.yaml index a28945e87d0..edd3dabd8d8 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q22.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q22.yaml @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[AND(LIKE($26, '%google%', '\'), <>($63, ''))]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(LIKE($0, '%google%', '\'), <>($1, '')), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},c=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[c, SearchPhrase], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"wildcard":{"URL":{"wildcard":"*google*","boost":1.0}}},{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"SearchPhrase":{"terms":{"field":"SearchPhrase","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(LIKE($0, '%google%', '\'), <>($1, '')), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},c=COUNT()), PROJECT->[c, SearchPhrase], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"wildcard":{"URL":{"wildcard":"*google*","boost":1.0}}},{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"SearchPhrase":{"terms":{"field":"SearchPhrase","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q23.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q23.yaml index f5b8ec9c184..242d499a2fe 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q23.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q23.yaml @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[AND(LIKE($97, '%Google%', '\'), <>($63, ''), NOT(LIKE($26, '%.google.%', '\')))]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(LIKE($3, '%Google%', '\'), <>($1, ''), NOT(LIKE($0, '%.google.%', '\'))), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},c=COUNT(),dc(UserID)=COUNT(DISTINCT $2)), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[c, dc(UserID), SearchPhrase], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"wildcard":{"Title":{"wildcard":"*Google*","boost":1.0}}},{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"bool":{"must_not":[{"wildcard":{"URL":{"wildcard":"*.google.*","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"SearchPhrase":{"terms":{"field":"SearchPhrase","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"dc(UserID)":{"cardinality":{"field":"UserID"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(LIKE($3, '%Google%', '\'), <>($1, ''), NOT(LIKE($0, '%.google.%', '\'))), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},c=COUNT(),dc(UserID)=COUNT(DISTINCT $2)), PROJECT->[c, dc(UserID), SearchPhrase], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"wildcard":{"Title":{"wildcard":"*Google*","boost":1.0}}},{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"bool":{"must_not":[{"wildcard":{"URL":{"wildcard":"*.google.*","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"SearchPhrase":{"terms":{"field":"SearchPhrase","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"dc(UserID)":{"cardinality":{"field":"UserID"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q31.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q31.yaml index a8ac7eaf9b7..bf40fe857ed 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q31.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q31.yaml @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[<>($63, '')]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(<>($0, ''), IS NOT NULL($1), IS NOT NULL($3)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1, 3},c=COUNT(),sum(IsRefresh)=SUM($2),avg(ResolutionWidth)=AVG($4)), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[c, sum(IsRefresh), avg(ResolutionWidth), SearchEngineID, ClientIP], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"SearchEngineID","boost":1.0}},{"exists":{"field":"ClientIP","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"SearchEngineID|ClientIP":{"multi_terms":{"terms":[{"field":"SearchEngineID"},{"field":"ClientIP"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(IsRefresh)":{"sum":{"field":"IsRefresh"}},"avg(ResolutionWidth)":{"avg":{"field":"ResolutionWidth"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(<>($0, ''), IS NOT NULL($1), IS NOT NULL($3)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1, 3},c=COUNT(),sum(IsRefresh)=SUM($2),avg(ResolutionWidth)=AVG($4)), PROJECT->[c, sum(IsRefresh), avg(ResolutionWidth), SearchEngineID, ClientIP], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"SearchEngineID","boost":1.0}},{"exists":{"field":"ClientIP","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"SearchEngineID|ClientIP":{"multi_terms":{"terms":[{"field":"SearchEngineID"},{"field":"ClientIP"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(IsRefresh)":{"sum":{"field":"IsRefresh"}},"avg(ResolutionWidth)":{"avg":{"field":"ResolutionWidth"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q32.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q32.yaml index 5cf8f54b258..81236b33d51 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q32.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q32.yaml @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[<>($63, '')]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(<>($1, ''), IS NOT NULL($0), IS NOT NULL($3)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 3},c=COUNT(),sum(IsRefresh)=SUM($2),avg(ResolutionWidth)=AVG($4)), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[c, sum(IsRefresh), avg(ResolutionWidth), WatchID, ClientIP], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"WatchID","boost":1.0}},{"exists":{"field":"ClientIP","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"WatchID|ClientIP":{"multi_terms":{"terms":[{"field":"WatchID"},{"field":"ClientIP"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(IsRefresh)":{"sum":{"field":"IsRefresh"}},"avg(ResolutionWidth)":{"avg":{"field":"ResolutionWidth"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(<>($1, ''), IS NOT NULL($0), IS NOT NULL($3)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 3},c=COUNT(),sum(IsRefresh)=SUM($2),avg(ResolutionWidth)=AVG($4)), PROJECT->[c, sum(IsRefresh), avg(ResolutionWidth), WatchID, ClientIP], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"WatchID","boost":1.0}},{"exists":{"field":"ClientIP","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"WatchID|ClientIP":{"multi_terms":{"terms":[{"field":"WatchID"},{"field":"ClientIP"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(IsRefresh)":{"sum":{"field":"IsRefresh"}},"avg(ResolutionWidth)":{"avg":{"field":"ResolutionWidth"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q33.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q33.yaml index e9b5c203f20..ccda84ba38a 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q33.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q33.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[AND(IS NOT NULL($41), IS NOT NULL($76))]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 2},c=COUNT(),sum(IsRefresh)=SUM($1),avg(ResolutionWidth)=AVG($3)), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[c, sum(IsRefresh), avg(ResolutionWidth), WatchID, ClientIP], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"WatchID|ClientIP":{"multi_terms":{"terms":[{"field":"WatchID"},{"field":"ClientIP"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(IsRefresh)":{"sum":{"field":"IsRefresh"}},"avg(ResolutionWidth)":{"avg":{"field":"ResolutionWidth"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 2},c=COUNT(),sum(IsRefresh)=SUM($1),avg(ResolutionWidth)=AVG($3)), PROJECT->[c, sum(IsRefresh), avg(ResolutionWidth), WatchID, ClientIP], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"WatchID|ClientIP":{"multi_terms":{"terms":[{"field":"WatchID"},{"field":"ClientIP"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(IsRefresh)":{"sum":{"field":"IsRefresh"}},"avg(ResolutionWidth)":{"avg":{"field":"ResolutionWidth"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q34.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q34.yaml index e2fd395e0ec..69dc8c94239 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q34.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q34.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[IS NOT NULL($26)]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[c, URL], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"URL":{"terms":{"field":"URL","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT()), PROJECT->[c, URL], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"URL":{"terms":{"field":"URL","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q37.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q37.yaml index 44a4218baf5..6ea79300182 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q37.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q37.yaml @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[AND(=($103, 62), >=($0, TIMESTAMP('2013-07-01 00:00:00':VARCHAR)), <=($0, TIMESTAMP('2013-07-31 00:00:00':VARCHAR)), =($42, 0), =($72, 0), <>($26, ''))]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(=($4, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($2, 0), =($3, 0), <>($1, '')), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},PageViews=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[PageViews, URL], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"DontCountHits":{"value":0,"boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"bool":{"must":[{"exists":{"field":"URL","boost":1.0}}],"must_not":[{"term":{"URL":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"URL":{"terms":{"field":"URL","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(=($4, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($2, 0), =($3, 0), <>($1, '')), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},PageViews=COUNT()), PROJECT->[PageViews, URL], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"DontCountHits":{"value":0,"boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"bool":{"must":[{"exists":{"field":"URL","boost":1.0}}],"must_not":[{"term":{"URL":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"URL":{"terms":{"field":"URL","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q38.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q38.yaml index cd15e03f941..d4cbbe1fc48 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q38.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q38.yaml @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[AND(=($103, 62), >=($0, TIMESTAMP('2013-07-01 00:00:00':VARCHAR)), <=($0, TIMESTAMP('2013-07-31 00:00:00':VARCHAR)), =($42, 0), =($72, 0), <>($97, ''))]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(=($4, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($1, 0), =($2, 0), <>($3, '')), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={3},PageViews=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[PageViews, Title], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"DontCountHits":{"value":0,"boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"bool":{"must":[{"exists":{"field":"Title","boost":1.0}}],"must_not":[{"term":{"Title":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"Title":{"terms":{"field":"Title","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(=($4, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($1, 0), =($2, 0), <>($3, '')), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={3},PageViews=COUNT()), PROJECT->[PageViews, Title], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"DontCountHits":{"value":0,"boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"bool":{"must":[{"exists":{"field":"Title","boost":1.0}}],"must_not":[{"term":{"Title":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"Title":{"terms":{"field":"Title","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q39.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q39.yaml index c4bc303bfb2..6b85d93899f 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q39.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q39.yaml @@ -11,4 +11,4 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableLimit(offset=[1000], fetch=[10]) - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(=($5, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($4, 0), <>($3, 0), =($2, 0), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},PageViews=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[PageViews, URL], LIMIT->[10 from 1000]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"bool":{"must":[{"exists":{"field":"IsLink","boost":1.0}}],"must_not":[{"term":{"IsLink":{"value":0,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"term":{"IsDownload":{"value":0,"boost":1.0}}},{"exists":{"field":"URL","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"URL":{"terms":{"field":"URL","size":1010,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(=($5, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($4, 0), <>($3, 0), =($2, 0), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},PageViews=COUNT()), PROJECT->[PageViews, URL], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->[10 from 1000]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"bool":{"must":[{"exists":{"field":"IsLink","boost":1.0}}],"must_not":[{"term":{"IsLink":{"value":0,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"term":{"IsDownload":{"value":0,"boost":1.0}}},{"exists":{"field":"URL","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"URL":{"terms":{"field":"URL","size":1010,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q40.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q40.yaml index e9eefc046b2..d9eb1a4c263 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q40.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q40.yaml @@ -12,4 +12,4 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableLimit(offset=[1000], fetch=[10]) - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(=($7, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($5, 0)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1, 2, 3, 4},PageViews=COUNT()), SORT_AGG_METRICS->[5 DESC LAST], PROJECT->[PageViews, TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst], LIMIT->[10 from 1000]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"TraficSourceID|AdvEngineID|SearchEngineID|Src|Dst":{"multi_terms":{"terms":[{"field":"TraficSourceID"},{"field":"AdvEngineID"},{"field":"SearchEngineID"},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQGAXsKICAib3AiOiB7CiAgICAibmFtZSI6ICJDQVNFIiwKICAgICJraW5kIjogIkNBU0UiLAogICAgInN5bnRheCI6ICJTUEVDSUFMIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiQU5EIiwKICAgICAgICAia2luZCI6ICJBTkQiLAogICAgICAgICJzeW50YXgiOiAiQklOQVJZIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgIm9wIjogewogICAgICAgICAgICAibmFtZSI6ICI9IiwKICAgICAgICAgICAgImtpbmQiOiAiRVFVQUxTIiwKICAgICAgICAgICAgInN5bnRheCI6ICJCSU5BUlkiCiAgICAgICAgICB9LAogICAgICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgICAgICB7CiAgICAgICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAgICAgICAgICAgIm51bGxhYmxlIjogdHJ1ZQogICAgICAgICAgICAgIH0KICAgICAgICAgICAgfSwKICAgICAgICAgICAgewogICAgICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICAgIF0KICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJvcCI6IHsKICAgICAgICAgICAgIm5hbWUiOiAiPSIsCiAgICAgICAgICAgICJraW5kIjogIkVRVUFMUyIsCiAgICAgICAgICAgICJzeW50YXgiOiAiQklOQVJZIgogICAgICAgICAgfSwKICAgICAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICAgICAgewogICAgICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAyLAogICAgICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0sCiAgICAgICAgICAgIHsKICAgICAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMywKICAgICAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgICAgICJ0eXBlIjogIkJJR0lOVCIsCiAgICAgICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlCiAgICAgICAgICAgICAgfQogICAgICAgICAgICB9CiAgICAgICAgICBdCiAgICAgICAgfQogICAgICBdCiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogNCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDUsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0KICBdCn0=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2,0,2,0,2],"DIGESTS":["SearchEngineID",0,"AdvEngineID",0,"Referer",""]}}},{"field":"URL"}],"size":1010,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(=($7, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($5, 0)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1, 2, 3, 4},PageViews=COUNT()), PROJECT->[PageViews, TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->[10 from 1000]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"TraficSourceID|AdvEngineID|SearchEngineID|Src|Dst":{"multi_terms":{"terms":[{"field":"TraficSourceID"},{"field":"AdvEngineID"},{"field":"SearchEngineID"},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQGAXsKICAib3AiOiB7CiAgICAibmFtZSI6ICJDQVNFIiwKICAgICJraW5kIjogIkNBU0UiLAogICAgInN5bnRheCI6ICJTUEVDSUFMIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiQU5EIiwKICAgICAgICAia2luZCI6ICJBTkQiLAogICAgICAgICJzeW50YXgiOiAiQklOQVJZIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgIm9wIjogewogICAgICAgICAgICAibmFtZSI6ICI9IiwKICAgICAgICAgICAgImtpbmQiOiAiRVFVQUxTIiwKICAgICAgICAgICAgInN5bnRheCI6ICJCSU5BUlkiCiAgICAgICAgICB9LAogICAgICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgICAgICB7CiAgICAgICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAgICAgICAgICAgIm51bGxhYmxlIjogdHJ1ZQogICAgICAgICAgICAgIH0KICAgICAgICAgICAgfSwKICAgICAgICAgICAgewogICAgICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICAgIF0KICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJvcCI6IHsKICAgICAgICAgICAgIm5hbWUiOiAiPSIsCiAgICAgICAgICAgICJraW5kIjogIkVRVUFMUyIsCiAgICAgICAgICAgICJzeW50YXgiOiAiQklOQVJZIgogICAgICAgICAgfSwKICAgICAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICAgICAgewogICAgICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAyLAogICAgICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0sCiAgICAgICAgICAgIHsKICAgICAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMywKICAgICAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgICAgICJ0eXBlIjogIkJJR0lOVCIsCiAgICAgICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlCiAgICAgICAgICAgICAgfQogICAgICAgICAgICB9CiAgICAgICAgICBdCiAgICAgICAgfQogICAgICBdCiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogNCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDUsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0KICBdCn0=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2,0,2,0,2],"DIGESTS":["SearchEngineID",0,"AdvEngineID",0,"Referer",""]}}},{"field":"URL"}],"size":1010,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q41.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q41.yaml index c23839c1674..6482c38bddb 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q41.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q41.yaml @@ -11,4 +11,4 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableLimit(offset=[100], fetch=[10]) - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(=($5, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($4, 0), SEARCH($2, Sarg[-1, 6]), =($1, 3594120000172545465), IS NOT NULL($3)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 3},PageViews=COUNT()), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[PageViews, URLHash, EventDate], LIMIT->[10 from 100]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"terms":{"TraficSourceID":[-1.0,6.0],"boost":1.0}},{"term":{"RefererHash":{"value":3594120000172545465,"boost":1.0}}},{"exists":{"field":"URLHash","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"EventDate|URLHash":{"multi_terms":{"terms":[{"field":"EventDate","value_type":"long"},{"field":"URLHash"}],"size":110,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(=($5, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($4, 0), SEARCH($2, Sarg[-1, 6]), =($1, 3594120000172545465), IS NOT NULL($3)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 3},PageViews=COUNT()), PROJECT->[PageViews, URLHash, EventDate], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->[10 from 100]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"terms":{"TraficSourceID":[-1.0,6.0],"boost":1.0}},{"term":{"RefererHash":{"value":3594120000172545465,"boost":1.0}}},{"exists":{"field":"URLHash","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"EventDate|URLHash":{"multi_terms":{"terms":[{"field":"EventDate","value_type":"long"},{"field":"URLHash"}],"size":110,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q42.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q42.yaml index 7a7d97c857a..96ee9b2a304 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q42.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q42.yaml @@ -11,4 +11,4 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableLimit(offset=[10000], fetch=[10]) - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(=($5, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($4, 0), =($2, 0), =($1, 2868770270353813622), IS NOT NULL($6), IS NOT NULL($3)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={3, 6},PageViews=COUNT()), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[PageViews, WindowClientWidth, WindowClientHeight]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"term":{"DontCountHits":{"value":0,"boost":1.0}}},{"term":{"URLHash":{"value":2868770270353813622,"boost":1.0}}},{"exists":{"field":"WindowClientWidth","boost":1.0}},{"exists":{"field":"WindowClientHeight","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"WindowClientHeight|WindowClientWidth":{"multi_terms":{"terms":[{"field":"WindowClientHeight"},{"field":"WindowClientWidth"}],"size":10000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(=($5, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($4, 0), =($2, 0), =($1, 2868770270353813622), IS NOT NULL($6), IS NOT NULL($3)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={3, 6},PageViews=COUNT()), PROJECT->[PageViews, WindowClientWidth, WindowClientHeight], SORT_AGG_METRICS->[0 DESC LAST]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"term":{"DontCountHits":{"value":0,"boost":1.0}}},{"term":{"URLHash":{"value":2868770270353813622,"boost":1.0}}},{"exists":{"field":"WindowClientWidth","boost":1.0}},{"exists":{"field":"WindowClientHeight","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"WindowClientHeight|WindowClientWidth":{"multi_terms":{"terms":[{"field":"WindowClientHeight"},{"field":"WindowClientWidth"}],"size":10000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q8.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q8.yaml index a71532d4271..05de3cbdcf9 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q8.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q8.yaml @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[<>($19, 0)]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER-><>($0, 0), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[count(), AdvEngineID], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"AdvEngineID","boost":1.0}}],"must_not":[{"term":{"AdvEngineID":{"value":0,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"AdvEngineID":{"terms":{"field":"AdvEngineID","size":10000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER-><>($0, 0), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), AdvEngineID], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"AdvEngineID","boost":1.0}}],"must_not":[{"term":{"AdvEngineID":{"value":0,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"AdvEngineID":{"terms":{"field":"AdvEngineID","size":10000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q9.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q9.yaml index 6ea001905ce..5e6bc1617c5 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q9.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q9.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[IS NOT NULL($68)]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},u=COUNT(DISTINCT $1)), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[u, RegionID], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"RegionID":{"terms":{"field":"RegionID","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"u":"desc"},{"_key":"asc"}]},"aggregations":{"u":{"cardinality":{"field":"UserID"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},u=COUNT(DISTINCT $1)), PROJECT->[u, RegionID], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"RegionID":{"terms":{"field":"RegionID","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"u":"desc"},{"_key":"asc"}]},"aggregations":{"u":{"cardinality":{"field":"UserID"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure3.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure3.yaml index b8fcb2e28df..b48e10e20c8 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure3.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure3.yaml @@ -9,4 +9,4 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | EnumerableLimit(fetch=[10000]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},cnt=COUNT()), PROJECT->[cnt, span(birthdate,1d)], SORT_AGG_METRICS->[0 DESC LAST]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"span(birthdate,1d)":{"date_histogram":{"field":"birthdate","fixed_interval":"1d","offset":0,"order":[{"_count":"desc"},{"_key":"asc"}],"keyed":false,"min_doc_count":0}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},cnt=COUNT()), PROJECT->[cnt, span(birthdate,1d)], SORT_AGG_METRICS->[0 DESC LAST]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"span(birthdate,1d)":{"date_histogram":{"field":"birthdate","fixed_interval":"1d","offset":0,"order":[{"_count":"desc"},{"_key":"asc"}],"keyed":false,"min_doc_count":1}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure4.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure4.yaml index f02b46a0938..f2105ce0d3c 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure4.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure4.yaml @@ -9,4 +9,4 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | EnumerableLimit(fetch=[10000]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},sum(balance)=SUM($0)), PROJECT->[sum(balance), span(age,5)], SORT_AGG_METRICS->[0 DESC LAST]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"span(age,5)":{"histogram":{"field":"age","interval":5.0,"offset":0.0,"order":[{"sum(balance)":"desc"},{"_key":"asc"}],"keyed":false,"min_doc_count":0},"aggregations":{"sum(balance)":{"sum":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},sum(balance)=SUM($0)), PROJECT->[sum(balance), span(age,5)], SORT_AGG_METRICS->[0 DESC LAST]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"span(age,5)":{"histogram":{"field":"age","interval":5.0,"offset":0.0,"order":[{"sum(balance)":"desc"},{"_key":"asc"}],"keyed":false,"min_doc_count":1},"aggregations":{"sum(balance)":{"sum":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggPushDownAction.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggPushDownAction.java index 2acf79477ee..936c9de71cc 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggPushDownAction.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggPushDownAction.java @@ -96,6 +96,19 @@ private MultiTermsAggregationBuilderCopy(MultiTermsAggregationBuilder source) { } } + private static final class DateHistogramAggregationBuilderCopy + extends DateHistogramAggregationBuilder { + private DateHistogramAggregationBuilderCopy(DateHistogramAggregationBuilder source) { + super(source, copySubAggregations(source), copyMetadataOrNull(source)); + } + } + + private static final class HistogramAggregationBuilderCopy extends HistogramAggregationBuilder { + private HistogramAggregationBuilderCopy(HistogramAggregationBuilder source) { + super(source, copySubAggregations(source), copyMetadataOrNull(source)); + } + } + private static final class TopHitsAggregationBuilderCopy extends TopHitsAggregationBuilder { private TopHitsAggregationBuilderCopy(TopHitsAggregationBuilder source) { super(source, copySubAggregations(source), copyMetadataOrNull(source)); @@ -123,6 +136,16 @@ private static MultiTermsAggregationBuilder copyMultiTermsAggregationBuilder( return new MultiTermsAggregationBuilderCopy(source); } + private static DateHistogramAggregationBuilder copyDateHistogramAggregationBuilder( + DateHistogramAggregationBuilder source) { + return new DateHistogramAggregationBuilderCopy(source); + } + + private static HistogramAggregationBuilder copyHistogramAggregationBuilder( + HistogramAggregationBuilder source) { + return new HistogramAggregationBuilderCopy(source); + } + private static TopHitsAggregationBuilder copyTopHitsAggregationBuilder( TopHitsAggregationBuilder source) { return new TopHitsAggregationBuilderCopy(source); @@ -143,6 +166,12 @@ private static AggregationBuilder copyAggregationBuilder(AggregationBuilder buil if (builder instanceof MultiTermsAggregationBuilder multiTerms) { return copyMultiTermsAggregationBuilder(multiTerms); } + if (builder instanceof DateHistogramAggregationBuilder dateHistogram) { + return copyDateHistogramAggregationBuilder(dateHistogram); + } + if (builder instanceof HistogramAggregationBuilder histogram) { + return copyHistogramAggregationBuilder(histogram); + } if (builder instanceof TopHitsAggregationBuilder topHits) { return copyTopHitsAggregationBuilder(topHits); } @@ -409,22 +438,56 @@ private static void copyDateHistogramInterval( } } + private static void copyDateHistogramBucketOptions( + DateHistogramValuesSourceBuilder source, DateHistogramAggregationBuilder target) { + if (source.field() != null) { + target.field(source.field()); + } + if (source.script() != null) { + target.script(source.script()); + } + copyDateHistogramInterval(source, target::fixedInterval, target::calendarInterval); + if (source.userValuetypeHint() != null) { + target.userValueTypeHint(source.userValuetypeHint()); + } + if (source.timeZone() != null) { + target.timeZone(source.timeZone()); + } + if (source.offset() != 0) { + target.offset(source.offset()); + } + if (source.format() != null) { + target.format(source.format()); + } + // Composite group-by only returns buckets with documents. Preserve that when rewriting. + target.minDocCount(1); + } + + private static void copyHistogramBucketOptions( + HistogramValuesSourceBuilder source, HistogramAggregationBuilder target) { + if (source.field() != null) { + target.field(source.field()); + } + if (source.script() != null) { + target.script(source.script()); + } + target.interval(source.interval()); + if (source.userValuetypeHint() != null) { + target.userValueTypeHint(source.userValuetypeHint()); + } + if (source.format() != null) { + target.format(source.format()); + } + // Composite group-by only returns buckets with documents. Preserve that when rewriting. + target.minDocCount(1); + } + /** Build a {@link DateHistogramAggregationBuilder} by {@link DateHistogramValuesSourceBuilder} */ private DateHistogramAggregationBuilder buildDateHistogramAggregationBuilder( DateHistogramValuesSourceBuilder dateHisto, BucketOrder bucketOrder) { DateHistogramAggregationBuilder dateHistoBuilder = new DateHistogramAggregationBuilder(dateHisto.name()); - if (dateHisto.field() != null) { - dateHistoBuilder.field(dateHisto.field()); - } - if (dateHisto.script() != null) { - dateHistoBuilder.script(dateHisto.script()); - } - copyDateHistogramInterval( - dateHisto, dateHistoBuilder::fixedInterval, dateHistoBuilder::calendarInterval); - if (dateHisto.userValuetypeHint() != null) { - dateHistoBuilder.userValueTypeHint(dateHisto.userValuetypeHint()); - } + copyDateHistogramBucketOptions(dateHisto, dateHistoBuilder); dateHistoBuilder.order(bucketOrder); return dateHistoBuilder; } @@ -433,16 +496,7 @@ private DateHistogramAggregationBuilder buildDateHistogramAggregationBuilder( private HistogramAggregationBuilder buildHistogramAggregationBuilder( HistogramValuesSourceBuilder histo, BucketOrder bucketOrder) { HistogramAggregationBuilder histoBuilder = new HistogramAggregationBuilder(histo.name()); - if (histo.field() != null) { - histoBuilder.field(histo.field()); - } - if (histo.script() != null) { - histoBuilder.script(histo.script()); - } - histoBuilder.interval(histo.interval()); - if (histo.userValuetypeHint() != null) { - histoBuilder.userValueTypeHint(histo.userValuetypeHint()); - } + copyHistogramBucketOptions(histo, histoBuilder); histoBuilder.order(bucketOrder); return histoBuilder; } From 9fe51926b8d966d0747e005f852bb42589d16687 Mon Sep 17 00:00:00 2001 From: Songkan Tang Date: Mon, 16 Mar 2026 16:59:29 +0800 Subject: [PATCH 3/8] Refactor aggregation pushdown state to use immutable AggSpec Signed-off-by: Songkan Tang --- .../request/OpenSearchRequestBuilder.java | 4 + .../scan/AbstractCalciteIndexScan.java | 20 +- .../storage/scan/CalciteLogicalIndexScan.java | 76 +-- .../scan/context/AggPushDownAction.java | 274 ++--------- .../storage/scan/context/AggSpec.java | 437 ++++++++++++++++++ .../context/AggregationBuilderAction.java | 16 - .../storage/scan/context/PushDownContext.java | 36 +- 7 files changed, 531 insertions(+), 332 deletions(-) create mode 100644 opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggSpec.java delete mode 100644 opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggregationBuilderAction.java diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java index 435cef22ef4..a694e0fea06 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java @@ -78,6 +78,10 @@ public static class PushDownUnSupportedException extends RuntimeException { public PushDownUnSupportedException(String message) { super(message); } + + public PushDownUnSupportedException(String message, Throwable cause) { + super(message, cause); + } } /** Constructor. */ diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java index 3ab40caee27..7e9a4cfa237 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java @@ -57,8 +57,6 @@ import org.opensearch.sql.opensearch.request.PredicateAnalyzer; import org.opensearch.sql.opensearch.storage.OpenSearchIndex; import org.opensearch.sql.opensearch.storage.scan.context.AbstractAction; -import org.opensearch.sql.opensearch.storage.scan.context.AggPushDownAction; -import org.opensearch.sql.opensearch.storage.scan.context.AggregationBuilderAction; import org.opensearch.sql.opensearch.storage.scan.context.FilterDigest; import org.opensearch.sql.opensearch.storage.scan.context.LimitDigest; import org.opensearch.sql.opensearch.storage.scan.context.OSRequestBuilderAction; @@ -174,7 +172,7 @@ public double estimateRowCount(RelMetadataQuery mq) { switch (operation.type()) { case AGGREGATION -> { dRows = mq.getRowCount((RelNode) operation.digest()); - dCpu += dRows * getAggMultiplier(operation); + dCpu += dRows * getAggMultiplier(operation, pushDownContext); } // Ignored Project in cost accumulation, but it will affect the external cost case PROJECT -> {} @@ -233,7 +231,8 @@ public double estimateRowCount(RelMetadataQuery mq) { } /** See source in {@link org.apache.calcite.rel.core.Aggregate::computeSelfCost} */ - private static float getAggMultiplier(PushDownOperation operation) { + private static float getAggMultiplier( + PushDownOperation operation, PushDownContext pushDownContext) { // START CALCITE List aggCalls = ((Aggregate) operation.digest()).getAggCallList(); float multiplier = 1f + (float) aggCalls.size() * 0.125f; @@ -248,7 +247,9 @@ private static float getAggMultiplier(PushDownOperation operation) { // For script aggregation, we need to multiply the multiplier by 1.1 to make up the cost. As we // prefer to have non-script agg push down after optimized by {@link PPLAggregateConvertRule} - multiplier *= (float) Math.pow(1.1f, ((AggPushDownAction) operation.action()).getScriptCount()); + long scriptCount = + pushDownContext.getAggSpec() == null ? 0 : pushDownContext.getAggSpec().getScriptCount(); + multiplier *= (float) Math.pow(1.1f, scriptCount); return multiplier; } @@ -325,10 +326,11 @@ && isAnyCollationNameInAggregators(collationNames)) { Object digest; if (pushDownContext.isAggregatePushed()) { // Push down the sort into the aggregation bucket - action = - (AggregationBuilderAction) - aggAction -> - aggAction.pushDownSortIntoAggBucket(collations, getRowType().getFieldNames()); + pushDownContextWithoutSort.setAggSpec( + pushDownContextWithoutSort + .getAggSpec() + .withBucketSort(collations, getRowType().getFieldNames())); + action = (OSRequestBuilderAction) requestBuilder -> {}; digest = collations; pushDownContextWithoutSort.add(PushDownType.SORT, digest, action); return buildScan( diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java index 248339e8bcf..b21d24d2c66 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java @@ -39,7 +39,6 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.opensearch.search.aggregations.AggregationBuilder; -import org.opensearch.search.aggregations.bucket.composite.CompositeAggregationBuilder; import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory; import org.opensearch.sql.calcite.utils.PPLHintUtils; import org.opensearch.sql.common.setting.Settings; @@ -54,8 +53,7 @@ import org.opensearch.sql.opensearch.response.agg.OpenSearchAggregationResponseParser; import org.opensearch.sql.opensearch.storage.OpenSearchIndex; import org.opensearch.sql.opensearch.storage.scan.context.AbstractAction; -import org.opensearch.sql.opensearch.storage.scan.context.AggPushDownAction; -import org.opensearch.sql.opensearch.storage.scan.context.AggregationBuilderAction; +import org.opensearch.sql.opensearch.storage.scan.context.AggSpec; import org.opensearch.sql.opensearch.storage.scan.context.FilterDigest; import org.opensearch.sql.opensearch.storage.scan.context.LimitDigest; import org.opensearch.sql.opensearch.storage.scan.context.OSRequestBuilderAction; @@ -274,7 +272,7 @@ public CalciteLogicalIndexScan pushDownProject(List selectedColumns) { AbstractAction action; if (pushDownContext.isAggregatePushed()) { // For aggregate, we do nothing on query builder but only change the schema of the scan. - action = (AggregationBuilderAction) aggAction -> {}; + action = (OSRequestBuilderAction) requestBuilder -> {}; } else { action = (OSRequestBuilderAction) @@ -308,13 +306,8 @@ private RelTraitSet reIndexCollations(List selectedColumns) { public CalciteLogicalIndexScan pushDownSortAggregateMeasure(Sort sort) { try { - if (!pushDownContext.isAggregatePushed()) return null; - List aggregationBuilders = - pushDownContext.getAggPushDownAction().getBuilderAndParser().getLeft(); - if (aggregationBuilders.size() != 1) { - return null; - } - if (!(aggregationBuilders.getFirst() instanceof CompositeAggregationBuilder)) { + AggSpec aggSpec = pushDownContext.getAggSpec(); + if (aggSpec == null || !aggSpec.isCompositeAggregation()) { return null; } List collationNames = getCollationNames(sort.getCollation().getFieldCollations()); @@ -322,11 +315,9 @@ public CalciteLogicalIndexScan pushDownSortAggregateMeasure(Sort sort) { return null; } CalciteLogicalIndexScan newScan = copyWithNewTraitSet(sort.getTraitSet()); - newScan - .pushDownContext - .getAggPushDownAction() - .rePushDownSortAggMeasure( - sort.getCollation().getFieldCollations(), rowType.getFieldNames()); + newScan.pushDownContext.setAggSpec( + aggSpec.withSortMeasure( + sort.getCollation().getFieldCollations(), rowType.getFieldNames())); AbstractAction action = (OSRequestBuilderAction) requestAction -> requestAction.resetRequestTotal(); Object digest = sort.getCollation().getFieldCollations(); @@ -343,7 +334,7 @@ public CalciteLogicalIndexScan pushDownSortAggregateMeasure(Sort sort) { public CalciteLogicalIndexScan pushDownRareTop(Project project, RareTopDigest digest) { try { CalciteLogicalIndexScan newScan = copyWithNewSchema(project.getRowType()); - newScan.pushDownContext.getAggPushDownAction().rePushDownRareTop(digest); + newScan.pushDownContext.setAggSpec(pushDownContext.getAggSpec().withRareTop(digest)); AbstractAction action = (OSRequestBuilderAction) requestAction -> requestAction.resetRequestTotal(); newScan.pushDownContext.add(PushDownType.RARE_TOP, digest, action); @@ -400,9 +391,22 @@ public AbstractRelNode pushDownAggregate(Aggregate aggregate, @Nullable Project OpenSearchDataType.of( OpenSearchTypeFactory.convertRelDataTypeToExprType( field.getType())))); - AggPushDownAction action = - new AggPushDownAction(builderAndParser, extendedTypeMapping, bucketNames); - newScan.pushDownContext.add(PushDownType.AGGREGATION, aggregate, action); + AggSpec aggSpec = + AggSpec.create( + aggregate, + project, + outputFields, + getRowType(), + fieldTypes, + getCluster(), + bucketNullable, + queryBucketSize, + extendedTypeMapping, + bucketNames, + builderAndParser); + newScan.pushDownContext.setAggSpec(aggSpec); + newScan.pushDownContext.add( + PushDownType.AGGREGATION, aggregate, (OSRequestBuilderAction) requestBuilder -> {}); return newScan; } catch (Exception e) { if (LOG.isDebugEnabled()) { @@ -416,9 +420,7 @@ public AbstractRelNode pushDownLimit(LogicalSort sort, Integer limit, Integer of try { if (pushDownContext.isAggregatePushed()) { int totalSize = limit + offset; - // Since the AggPushDownAction is shared among different PushDownContext, its size() may be - // inaccurate(<= the actual size). - // So take the previous limit into account to decide whether it can update the context. + AggSpec aggSpec = pushDownContext.getAggSpec(); boolean canReduceEstimatedRowsCount = !pushDownContext.isLimitPushed() || pushDownContext.getQueue().reversed().stream() @@ -428,27 +430,33 @@ public AbstractRelNode pushDownLimit(LogicalSort sort, Integer limit, Integer of .map(op -> (LimitDigest) op.digest()) .map(d -> totalSize < d.offset() + d.limit()) .orElse(true); + boolean canUpdateBuilder = aggSpec.canPushDownLimitIntoBucketSize(totalSize); + boolean alreadyBoundedByCurrentBucketSize = + aggSpec.getBucketSize() != null && totalSize <= aggSpec.getBucketSize(); + boolean alreadyEnforcedByExistingLimit = + pushDownContext.isLimitPushed() && !canReduceEstimatedRowsCount; + boolean canEnforceLimit = + aggSpec.isCompositeAggregation() + || canUpdateBuilder + || alreadyBoundedByCurrentBucketSize + || alreadyEnforcedByExistingLimit + || (aggSpec.isSingleRowAggregation() && offset == 0); // Push down the limit into the aggregation bucket in advance to detect whether the limit // can update the aggregation builder - boolean canUpdate = - canReduceEstimatedRowsCount - || pushDownContext.getAggPushDownAction().canPushDownLimitIntoBucketSize(totalSize); - if (!canUpdate && offset > 0) return null; + boolean canUpdate = canReduceEstimatedRowsCount || canUpdateBuilder; + if (!canEnforceLimit || (!canUpdate && offset > 0)) return null; CalciteLogicalIndexScan newScan = this.copyWithNewSchema(getRowType()); - if (canUpdate) { - newScan - .pushDownContext - .getAggPushDownAction() - .pushDownLimitIntoBucketSize(limit + offset); + if (canUpdateBuilder) { + newScan.pushDownContext.setAggSpec(aggSpec.withLimit(limit + offset)); } AbstractAction action; - if (pushDownContext.getAggPushDownAction().isCompositeAggregation()) { + if (newScan.pushDownContext.getAggSpec().isCompositeAggregation()) { action = (OSRequestBuilderAction) requestBuilder -> requestBuilder.pushDownLimitToRequestTotal(limit, offset); } else { - action = (AggregationBuilderAction) aggAction -> {}; + action = (OSRequestBuilderAction) requestBuilder -> {}; } newScan.pushDownContext.add(PushDownType.LIMIT, new LimitDigest(limit, offset), action); return offset > 0 ? sort.copy(sort.getTraitSet(), List.of(newScan)) : newScan; diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggPushDownAction.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggPushDownAction.java index 936c9de71cc..64ad826dffe 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggPushDownAction.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggPushDownAction.java @@ -13,7 +13,6 @@ import java.util.List; import java.util.Map; import java.util.function.Consumer; -import java.util.stream.Collectors; import lombok.EqualsAndHashCode; import lombok.Getter; import org.apache.calcite.rel.RelFieldCollation; @@ -34,7 +33,6 @@ import org.opensearch.search.aggregations.bucket.nested.NestedAggregationBuilder; import org.opensearch.search.aggregations.bucket.terms.MultiTermsAggregationBuilder; import org.opensearch.search.aggregations.bucket.terms.TermsAggregationBuilder; -import org.opensearch.search.aggregations.metrics.TopHitsAggregationBuilder; import org.opensearch.search.aggregations.support.MultiTermsValuesSourceConfig; import org.opensearch.search.aggregations.support.ValuesSourceAggregationBuilder; import org.opensearch.search.sort.SortOrder; @@ -73,114 +71,6 @@ private static AggregatorFactories.Builder copySubAggregations(AggregationBuilde return copiedFactories; } - private static Map copyMetadataOrNull(AggregationBuilder source) { - Map metadata = source.getMetadata(); - return metadata == null || metadata.isEmpty() ? null : metadata; - } - - private static final class CompositeAggregationBuilderCopy extends CompositeAggregationBuilder { - private CompositeAggregationBuilderCopy(CompositeAggregationBuilder source) { - super(source, copySubAggregations(source), copyMetadataOrNull(source)); - } - } - - private static final class TermsAggregationBuilderCopy extends TermsAggregationBuilder { - private TermsAggregationBuilderCopy(TermsAggregationBuilder source) { - super(source, copySubAggregations(source), copyMetadataOrNull(source)); - } - } - - private static final class MultiTermsAggregationBuilderCopy extends MultiTermsAggregationBuilder { - private MultiTermsAggregationBuilderCopy(MultiTermsAggregationBuilder source) { - super(source, copySubAggregations(source), copyMetadataOrNull(source)); - } - } - - private static final class DateHistogramAggregationBuilderCopy - extends DateHistogramAggregationBuilder { - private DateHistogramAggregationBuilderCopy(DateHistogramAggregationBuilder source) { - super(source, copySubAggregations(source), copyMetadataOrNull(source)); - } - } - - private static final class HistogramAggregationBuilderCopy extends HistogramAggregationBuilder { - private HistogramAggregationBuilderCopy(HistogramAggregationBuilder source) { - super(source, copySubAggregations(source), copyMetadataOrNull(source)); - } - } - - private static final class TopHitsAggregationBuilderCopy extends TopHitsAggregationBuilder { - private TopHitsAggregationBuilderCopy(TopHitsAggregationBuilder source) { - super(source, copySubAggregations(source), copyMetadataOrNull(source)); - } - } - - private static final class NestedAggregationBuilderCopy extends NestedAggregationBuilder { - private NestedAggregationBuilderCopy(NestedAggregationBuilder source) { - super(source, copySubAggregations(source), copyMetadataOrNull(source)); - } - } - - private static CompositeAggregationBuilder copyCompositeAggregationBuilder( - CompositeAggregationBuilder source) { - return new CompositeAggregationBuilderCopy(source); - } - - private static TermsAggregationBuilder copyTermsAggregationBuilder( - TermsAggregationBuilder source) { - return new TermsAggregationBuilderCopy(source); - } - - private static MultiTermsAggregationBuilder copyMultiTermsAggregationBuilder( - MultiTermsAggregationBuilder source) { - return new MultiTermsAggregationBuilderCopy(source); - } - - private static DateHistogramAggregationBuilder copyDateHistogramAggregationBuilder( - DateHistogramAggregationBuilder source) { - return new DateHistogramAggregationBuilderCopy(source); - } - - private static HistogramAggregationBuilder copyHistogramAggregationBuilder( - HistogramAggregationBuilder source) { - return new HistogramAggregationBuilderCopy(source); - } - - private static TopHitsAggregationBuilder copyTopHitsAggregationBuilder( - TopHitsAggregationBuilder source) { - return new TopHitsAggregationBuilderCopy(source); - } - - private static NestedAggregationBuilder copyNestedAggregationBuilder( - NestedAggregationBuilder source) { - return new NestedAggregationBuilderCopy(source); - } - - private static AggregationBuilder copyAggregationBuilder(AggregationBuilder builder) { - if (builder instanceof CompositeAggregationBuilder composite) { - return copyCompositeAggregationBuilder(composite); - } - if (builder instanceof TermsAggregationBuilder terms) { - return copyTermsAggregationBuilder(terms); - } - if (builder instanceof MultiTermsAggregationBuilder multiTerms) { - return copyMultiTermsAggregationBuilder(multiTerms); - } - if (builder instanceof DateHistogramAggregationBuilder dateHistogram) { - return copyDateHistogramAggregationBuilder(dateHistogram); - } - if (builder instanceof HistogramAggregationBuilder histogram) { - return copyHistogramAggregationBuilder(histogram); - } - if (builder instanceof TopHitsAggregationBuilder topHits) { - return copyTopHitsAggregationBuilder(topHits); - } - if (builder instanceof NestedAggregationBuilder nested) { - return copyNestedAggregationBuilder(nested); - } - return builder; - } - private static AggregationBuilder unwrapNestedBuilder(AggregationBuilder rootBuilder) { if (rootBuilder instanceof NestedAggregationBuilder nested && !nested.getSubAggregations().isEmpty()) { @@ -201,21 +91,6 @@ private void replaceRootBuilder( Pair.of(Collections.singletonList(finalBuilder), builderAndParser.getRight()); } - /** - * Create a deep copy of this action. New lists are created for builders and bucketNames so that - * mutations (sort/limit pushdown) on the copy do not affect the original. - */ - public AggPushDownAction copy() { - List copiedBuilders = - builderAndParser.getLeft().stream() - .map(AggPushDownAction::copyAggregationBuilder) - .collect(Collectors.toCollection(ArrayList::new)); - return new AggPushDownAction( - Pair.of(copiedBuilders, builderAndParser.getRight()), - extendedTypeMapping, - new ArrayList<>(bucketNames)); - } - private static int getScriptCount(AggregationBuilder aggBuilder) { if (aggBuilder instanceof NestedAggregationBuilder) { aggBuilder = aggBuilder.getSubAggregations().iterator().next(); @@ -260,7 +135,8 @@ private String multiTermsBucketNameAsString(CompositeAggregationBuilder composit return composite.sources().stream() .map(TermsValuesSourceBuilder.class::cast) .map(TermsValuesSourceBuilder::name) - .collect(Collectors.joining("|")); // PIPE cannot be used in identifier + .reduce((left, right) -> left + "|" + right) + .orElse(""); // PIPE cannot be used in identifier } /** Re-pushdown a sort aggregation measure to replace the pushed composite aggregation */ @@ -408,17 +284,6 @@ private TermsAggregationBuilder buildTermsAggregationBuilder( return termsBuilder; } - /** - * Build a new {@link TermsAggregationBuilder} by copying from an existing one with a new size. - * This keeps all existing terms options (e.g. include/exclude, collect mode, shard sizing). - */ - private static TermsAggregationBuilder buildTermsAggregationBuilder( - TermsAggregationBuilder source, int newSize) { - TermsAggregationBuilder termsBuilder = copyTermsAggregationBuilder(source); - termsBuilder.size(newSize); - return termsBuilder; - } - private static void copyDateHistogramInterval( DateHistogramValuesSourceBuilder source, Consumer fixedIntervalSetter, @@ -556,51 +421,6 @@ private AggregationBuilder attachSubAggregations( return aggregationBuilder; } - /** - * Create a copy of a {@link CompositeValuesSourceBuilder} to avoid in-place mutation of shared - * bucket objects across different PushDownContext instances. - */ - @SuppressWarnings("unchecked") - private static CompositeValuesSourceBuilder copyCompositeBucket( - CompositeValuesSourceBuilder bucket) { - CompositeValuesSourceBuilder copy; - if (bucket instanceof TermsValuesSourceBuilder terms) { - TermsValuesSourceBuilder termsCopy = new TermsValuesSourceBuilder(terms.name()); - if (terms.field() != null) termsCopy.field(terms.field()); - if (terms.script() != null) termsCopy.script(terms.script()); - if (terms.userValuetypeHint() != null) termsCopy.userValuetypeHint(terms.userValuetypeHint()); - if (terms.format() != null) termsCopy.format(terms.format()); - copy = termsCopy; - } else if (bucket instanceof DateHistogramValuesSourceBuilder dateHisto) { - DateHistogramValuesSourceBuilder dhCopy = - new DateHistogramValuesSourceBuilder(dateHisto.name()); - if (dateHisto.field() != null) dhCopy.field(dateHisto.field()); - if (dateHisto.script() != null) dhCopy.script(dateHisto.script()); - if (dateHisto.userValuetypeHint() != null) - dhCopy.userValuetypeHint(dateHisto.userValuetypeHint()); - copyDateHistogramInterval(dateHisto, dhCopy::fixedInterval, dhCopy::calendarInterval); - if (dateHisto.timeZone() != null) dhCopy.timeZone(dateHisto.timeZone()); - if (dateHisto.offset() != 0) dhCopy.offset(dateHisto.offset()); - if (dateHisto.format() != null) dhCopy.format(dateHisto.format()); - copy = dhCopy; - } else if (bucket instanceof HistogramValuesSourceBuilder histo) { - HistogramValuesSourceBuilder hCopy = new HistogramValuesSourceBuilder(histo.name()); - if (histo.field() != null) hCopy.field(histo.field()); - if (histo.script() != null) hCopy.script(histo.script()); - if (histo.userValuetypeHint() != null) hCopy.userValuetypeHint(histo.userValuetypeHint()); - hCopy.interval(histo.interval()); - if (histo.format() != null) hCopy.format(histo.format()); - copy = hCopy; - } else { - throw new OpenSearchRequestBuilder.PushDownUnSupportedException( - "Unsupported CompositeValuesSourceBuilder: " + bucket.getClass()); - } - copy.missingBucket(bucket.missingBucket()); - copy.missingOrder(bucket.missingOrder()); - copy.order(bucket.order()); - return copy; - } - public void pushDownSortIntoAggBucket( List collations, List fieldNames) { // aggregationBuilder.getLeft() could be empty when count agg optimization works @@ -646,17 +466,16 @@ private void pushDownSortIntoCompositeBucket( RelFieldCollation.Direction.DESCENDING.equals(direction) ? SortOrder.DESC : SortOrder.ASC; - CompositeValuesSourceBuilder newBucket = copyCompositeBucket(bucket); - if (newBucket.missingBucket()) { + if (bucket.missingBucket()) { MissingOrder missingOrder = switch (nullDirection) { case FIRST -> MissingOrder.FIRST; case LAST -> MissingOrder.LAST; default -> MissingOrder.DEFAULT; }; - newBucket.missingOrder(missingOrder); + bucket.missingOrder(missingOrder); } - newBuckets.add(newBucket.order(order)); + newBuckets.add(bucket.order(order)); newBucketNames.add(bucketName); selected.add(bucketName); }); @@ -682,41 +501,7 @@ private void pushDownSortIntoTermsBucket( AggregationBuilder original, TermsAggregationBuilder termsAggBuilder, List collations) { - TermsAggregationBuilder newTermsBuilder = - buildTermsAggregationBuilder(termsAggBuilder, termsAggBuilder.size()); - newTermsBuilder.order(BucketOrder.key(!collations.getFirst().getDirection().isDescending())); - replaceRootBuilder(original, newTermsBuilder); - } - - public boolean isCompositeAggregation() { - return builderAndParser.getLeft().stream() - .anyMatch( - builder -> - builder instanceof CompositeAggregationBuilder - || (builder instanceof NestedAggregationBuilder - && builder.getSubAggregations().iterator().next() - instanceof CompositeAggregationBuilder)); - } - - private static CompositeAggregationBuilder copyAndResizeCompositeAggregationBuilder( - CompositeAggregationBuilder source, int size) { - CompositeAggregationBuilder copy = copyCompositeAggregationBuilder(source); - copy.size(size); - return copy; - } - - private static MultiTermsAggregationBuilder copyAndResizeMultiTermsAggregationBuilder( - MultiTermsAggregationBuilder source, int size) { - MultiTermsAggregationBuilder copy = copyMultiTermsAggregationBuilder(source); - copy.size(size); - return copy; - } - - private static TopHitsAggregationBuilder copyAndResizeTopHitsAggregationBuilder( - TopHitsAggregationBuilder source, int size) { - TopHitsAggregationBuilder copy = copyTopHitsAggregationBuilder(source); - copy.size(size); - return copy; + termsAggBuilder.order(BucketOrder.key(!collations.getFirst().getDirection().isDescending())); } private static Integer getBucketSize(AggregationBuilder builder) { @@ -729,25 +514,32 @@ private static Integer getBucketSize(AggregationBuilder builder) { if (builder instanceof MultiTermsAggregationBuilder multiTermsAggBuilder) { return multiTermsAggBuilder.size(); } - if (builder instanceof TopHitsAggregationBuilder topHitsAggBuilder) { + if (builder + instanceof + org.opensearch.search.aggregations.metrics.TopHitsAggregationBuilder topHitsAggBuilder) { return topHitsAggBuilder.size(); } return null; } - private static AggregationBuilder copyAndResizeBucketBuilder( - AggregationBuilder builder, int size) { + private static void resizeBucketBuilder(AggregationBuilder builder, int size) { if (builder instanceof CompositeAggregationBuilder compositeAggBuilder) { - return copyAndResizeCompositeAggregationBuilder(compositeAggBuilder, size); + compositeAggBuilder.size(size); + return; } if (builder instanceof TermsAggregationBuilder termsAggBuilder) { - return buildTermsAggregationBuilder(termsAggBuilder, size); + termsAggBuilder.size(size); + return; } if (builder instanceof MultiTermsAggregationBuilder multiTermsAggBuilder) { - return copyAndResizeMultiTermsAggregationBuilder(multiTermsAggBuilder, size); + multiTermsAggBuilder.size(size); + return; } - if (builder instanceof TopHitsAggregationBuilder topHitsAggBuilder) { - return copyAndResizeTopHitsAggregationBuilder(topHitsAggBuilder, size); + if (builder + instanceof + org.opensearch.search.aggregations.metrics.TopHitsAggregationBuilder topHitsAggBuilder) { + topHitsAggBuilder.size(size); + return; } throw new IllegalStateException( "Not a resizable bucket aggregation builder: " + builder.getClass().getSimpleName()); @@ -756,7 +548,11 @@ private static AggregationBuilder copyAndResizeBucketBuilder( private AggregationBuilder resizeAggregationForLimit(AggregationBuilder builder, int size) { Integer bucketSize = getBucketSize(builder); if (bucketSize != null) { - return size < bucketSize ? copyAndResizeBucketBuilder(builder, size) : null; + if (size < bucketSize) { + resizeBucketBuilder(builder, size); + return builder; + } + return null; } if (builder instanceof ValuesSourceAggregationBuilder.LeafOnly) { // all metric aggregations generate one row and are effectively already limited. @@ -766,24 +562,6 @@ private AggregationBuilder resizeAggregationForLimit(AggregationBuilder builder, "Unknown aggregation builder " + builder.getClass().getSimpleName()); } - /** - * Read-only probe: check if the limit can be pushed down into aggregation bucket without - * modifying any builder state. - */ - public boolean canPushDownLimitIntoBucketSize(Integer size) { - if (builderAndParser.getLeft().isEmpty()) return false; - AggregationBuilder builder = unwrapNestedBuilder(builderAndParser.getLeft().getFirst()); - Integer bucketSize = getBucketSize(builder); - if (bucketSize != null) { - return size < bucketSize; - } - if (builder instanceof ValuesSourceAggregationBuilder.LeafOnly) { - return true; - } - throw new OpenSearchRequestBuilder.PushDownUnSupportedException( - "Unknown aggregation builder " + builder.getClass().getSimpleName()); - } - /** * Check if the limit can be pushed down into aggregation bucket when the limit size is less than * bucket number. diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggSpec.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggSpec.java new file mode 100644 index 00000000000..96eae23afcb --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggSpec.java @@ -0,0 +1,437 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage.scan.context; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import javax.annotation.Nullable; +import lombok.Getter; +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.rel.RelFieldCollation; +import org.apache.calcite.rel.core.Aggregate; +import org.apache.calcite.rel.core.Project; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.commons.lang3.tuple.Pair; +import org.opensearch.search.aggregations.AggregationBuilder; +import org.opensearch.search.aggregations.bucket.composite.CompositeAggregationBuilder; +import org.opensearch.search.aggregations.bucket.composite.CompositeValuesSourceBuilder; +import org.opensearch.search.aggregations.bucket.composite.DateHistogramValuesSourceBuilder; +import org.opensearch.search.aggregations.bucket.composite.HistogramValuesSourceBuilder; +import org.opensearch.search.aggregations.bucket.composite.TermsValuesSourceBuilder; +import org.opensearch.search.aggregations.bucket.nested.NestedAggregationBuilder; +import org.opensearch.search.aggregations.bucket.terms.MultiTermsAggregationBuilder; +import org.opensearch.search.aggregations.bucket.terms.TermsAggregationBuilder; +import org.opensearch.search.aggregations.metrics.TopHitsAggregationBuilder; +import org.opensearch.sql.data.type.ExprType; +import org.opensearch.sql.opensearch.data.type.OpenSearchDataType; +import org.opensearch.sql.opensearch.request.AggregateAnalyzer; +import org.opensearch.sql.opensearch.request.OpenSearchRequestBuilder; +import org.opensearch.sql.opensearch.response.agg.OpenSearchAggregationResponseParser; + +/** Immutable aggregation pushdown specification used during planning. */ +@Getter +public final class AggSpec { + private enum AggKind { + OTHER, + COMPOSITE, + TERMS, + MULTI_TERMS, + DATE_HISTOGRAM, + HISTOGRAM, + TOP_HITS, + RARE_TOP + } + + private final Aggregate aggregate; + @Nullable private final Project project; + private final List outputFields; + private final RelDataType rowType; + private final Map fieldTypes; + private final RelOptCluster cluster; + private final boolean bucketNullable; + private final int queryBucketSize; + private final Map extendedTypeMapping; + private final List initialBucketNames; + private final List bucketNames; + private final long scriptCount; + private final AggKind kind; + @Nullable private final AggKind measureSortTarget; + @Nullable private final List bucketSortCollations; + @Nullable private final List bucketSortFieldNames; + @Nullable private final List measureSortCollations; + @Nullable private final List measureSortFieldNames; + @Nullable private final RareTopDigest rareTopDigest; + @Nullable private final Integer bucketSize; + + private AggSpec( + Aggregate aggregate, + @Nullable Project project, + List outputFields, + RelDataType rowType, + Map fieldTypes, + RelOptCluster cluster, + boolean bucketNullable, + int queryBucketSize, + Map extendedTypeMapping, + List initialBucketNames, + List bucketNames, + long scriptCount, + AggKind kind, + @Nullable AggKind measureSortTarget, + @Nullable List bucketSortCollations, + @Nullable List bucketSortFieldNames, + @Nullable List measureSortCollations, + @Nullable List measureSortFieldNames, + @Nullable RareTopDigest rareTopDigest, + @Nullable Integer bucketSize) { + this.aggregate = aggregate; + this.project = project; + this.outputFields = List.copyOf(outputFields); + this.rowType = rowType; + this.fieldTypes = Map.copyOf(fieldTypes); + this.cluster = cluster; + this.bucketNullable = bucketNullable; + this.queryBucketSize = queryBucketSize; + this.extendedTypeMapping = Map.copyOf(extendedTypeMapping); + this.initialBucketNames = List.copyOf(initialBucketNames); + this.bucketNames = List.copyOf(bucketNames); + this.scriptCount = scriptCount; + this.kind = kind; + this.measureSortTarget = measureSortTarget; + this.bucketSortCollations = + bucketSortCollations == null ? null : List.copyOf(bucketSortCollations); + this.bucketSortFieldNames = + bucketSortFieldNames == null ? null : List.copyOf(bucketSortFieldNames); + this.measureSortCollations = + measureSortCollations == null ? null : List.copyOf(measureSortCollations); + this.measureSortFieldNames = + measureSortFieldNames == null ? null : List.copyOf(measureSortFieldNames); + this.rareTopDigest = rareTopDigest; + this.bucketSize = bucketSize; + } + + public static AggSpec create( + Aggregate aggregate, + @Nullable Project project, + List outputFields, + RelDataType rowType, + Map fieldTypes, + RelOptCluster cluster, + boolean bucketNullable, + int queryBucketSize, + Map extendedTypeMapping, + List bucketNames, + Pair, OpenSearchAggregationResponseParser> builderAndParser) { + AggregationBuilder rootBuilder = + builderAndParser.getLeft().isEmpty() ? null : builderAndParser.getLeft().getFirst(); + AggKind kind = inferKind(rootBuilder); + return new AggSpec( + aggregate, + project, + outputFields, + rowType, + fieldTypes, + cluster, + bucketNullable, + queryBucketSize, + extendedTypeMapping, + bucketNames, + bucketNames, + new AggPushDownAction(builderAndParser, extendedTypeMapping, bucketNames).getScriptCount(), + kind, + inferMeasureSortTarget(rootBuilder), + null, + null, + null, + null, + null, + inferBucketSize(rootBuilder)); + } + + public boolean isCompositeAggregation() { + return kind == AggKind.COMPOSITE; + } + + public boolean isSingleRowAggregation() { + return aggregate.getGroupSet().isEmpty(); + } + + public boolean canPushDownLimitIntoBucketSize(int size) { + return bucketSize != null && size < bucketSize; + } + + public AggSpec withBucketSort(List collations, List fieldNames) { + if (kind != AggKind.COMPOSITE && kind != AggKind.TERMS) { + throw new OpenSearchRequestBuilder.PushDownUnSupportedException( + "Cannot pushdown sort into aggregation bucket"); + } + List newBucketNames = bucketNames; + if (kind == AggKind.COMPOSITE) { + List reordered = new ArrayList<>(bucketNames.size()); + List selected = new ArrayList<>(collations.size()); + for (RelFieldCollation collation : collations) { + String bucketName = fieldNames.get(collation.getFieldIndex()); + if (!bucketNames.contains(bucketName)) { + throw new OpenSearchRequestBuilder.PushDownUnSupportedException( + "Cannot pushdown sort into aggregation bucket"); + } + reordered.add(bucketName); + selected.add(bucketName); + } + for (String name : bucketNames) { + if (!selected.contains(name)) { + reordered.add(name); + } + } + newBucketNames = reordered; + } + return new AggSpec( + aggregate, + project, + outputFields, + rowType, + fieldTypes, + cluster, + bucketNullable, + queryBucketSize, + extendedTypeMapping, + initialBucketNames, + newBucketNames, + scriptCount, + kind, + measureSortTarget, + collations, + fieldNames, + measureSortCollations, + measureSortFieldNames, + rareTopDigest, + bucketSize); + } + + public AggSpec withoutBucketSort() { + if (bucketSortCollations == null) { + return this; + } + return new AggSpec( + aggregate, + project, + outputFields, + rowType, + fieldTypes, + cluster, + bucketNullable, + queryBucketSize, + extendedTypeMapping, + initialBucketNames, + initialBucketNames, + scriptCount, + kind, + measureSortTarget, + null, + null, + measureSortCollations, + measureSortFieldNames, + rareTopDigest, + bucketSize); + } + + public AggSpec withSortMeasure(List collations, List fieldNames) { + if (kind != AggKind.COMPOSITE || measureSortTarget == null) { + throw new OpenSearchRequestBuilder.PushDownUnSupportedException( + "Cannot pushdown sort aggregate measure"); + } + Integer resizedBucketSize = + switch (measureSortTarget) { + case TERMS, MULTI_TERMS -> bucketSize; + default -> null; + }; + return new AggSpec( + aggregate, + project, + outputFields, + rowType, + fieldTypes, + cluster, + bucketNullable, + queryBucketSize, + extendedTypeMapping, + initialBucketNames, + bucketNames, + scriptCount, + measureSortTarget, + null, + bucketSortCollations, + bucketSortFieldNames, + collations, + fieldNames, + rareTopDigest, + resizedBucketSize); + } + + public AggSpec withRareTop(RareTopDigest digest) { + if (kind != AggKind.COMPOSITE) { + throw new OpenSearchRequestBuilder.PushDownUnSupportedException("Cannot pushdown " + digest); + } + return new AggSpec( + aggregate, + project, + outputFields, + rowType, + fieldTypes, + cluster, + bucketNullable, + queryBucketSize, + extendedTypeMapping, + initialBucketNames, + bucketNames, + scriptCount, + AggKind.RARE_TOP, + null, + bucketSortCollations, + bucketSortFieldNames, + measureSortCollations, + measureSortFieldNames, + digest, + null); + } + + public AggSpec withLimit(int size) { + if (!canPushDownLimitIntoBucketSize(size)) { + return this; + } + return new AggSpec( + aggregate, + project, + outputFields, + rowType, + fieldTypes, + cluster, + bucketNullable, + queryBucketSize, + extendedTypeMapping, + initialBucketNames, + bucketNames, + scriptCount, + kind, + measureSortTarget, + bucketSortCollations, + bucketSortFieldNames, + measureSortCollations, + measureSortFieldNames, + rareTopDigest, + size); + } + + public Pair, OpenSearchAggregationResponseParser> build() { + try { + AggregateAnalyzer.AggregateBuilderHelper helper = + new AggregateAnalyzer.AggregateBuilderHelper( + rowType, fieldTypes, cluster, bucketNullable, queryBucketSize); + Pair, OpenSearchAggregationResponseParser> builderAndParser = + AggregateAnalyzer.analyze(aggregate, project, outputFields, helper); + AggPushDownAction temp = + new AggPushDownAction( + builderAndParser, extendedTypeMapping, new ArrayList<>(initialBucketNames)); + if (bucketSortCollations != null) { + temp.pushDownSortIntoAggBucket(bucketSortCollations, bucketSortFieldNames); + } + if (measureSortCollations != null) { + temp.rePushDownSortAggMeasure(measureSortCollations, measureSortFieldNames); + } + if (rareTopDigest != null) { + temp.rePushDownRareTop(rareTopDigest); + } + if (bucketSize != null) { + temp.pushDownLimitIntoBucketSize(bucketSize); + } + return temp.getBuilderAndParser(); + } catch (AggregateAnalyzer.ExpressionNotAnalyzableException e) { + throw new OpenSearchRequestBuilder.PushDownUnSupportedException( + "Cannot materialize aggregation pushdown", e); + } + } + + private static AggKind inferKind(@Nullable AggregationBuilder rootBuilder) { + AggregationBuilder builder = unwrapNestedBuilder(rootBuilder); + if (builder instanceof CompositeAggregationBuilder) { + return AggKind.COMPOSITE; + } + if (builder instanceof TermsAggregationBuilder) { + return AggKind.TERMS; + } + if (builder instanceof MultiTermsAggregationBuilder) { + return AggKind.MULTI_TERMS; + } + if (builder + instanceof + org.opensearch.search.aggregations.bucket.histogram.DateHistogramAggregationBuilder) { + return AggKind.DATE_HISTOGRAM; + } + if (builder + instanceof + org.opensearch.search.aggregations.bucket.histogram.HistogramAggregationBuilder) { + return AggKind.HISTOGRAM; + } + if (builder instanceof TopHitsAggregationBuilder) { + return AggKind.TOP_HITS; + } + return AggKind.OTHER; + } + + @Nullable + private static AggKind inferMeasureSortTarget(@Nullable AggregationBuilder rootBuilder) { + AggregationBuilder builder = unwrapNestedBuilder(rootBuilder); + if (!(builder instanceof CompositeAggregationBuilder composite)) { + return null; + } + if (composite.sources().size() == 1) { + CompositeValuesSourceBuilder source = composite.sources().getFirst(); + if (source instanceof TermsValuesSourceBuilder terms && !terms.missingBucket()) { + return AggKind.TERMS; + } + if (source instanceof DateHistogramValuesSourceBuilder) { + return AggKind.DATE_HISTOGRAM; + } + if (source instanceof HistogramValuesSourceBuilder histo && !histo.missingBucket()) { + return AggKind.HISTOGRAM; + } + return null; + } + return composite.sources().stream() + .allMatch( + src -> src instanceof TermsValuesSourceBuilder terms && !terms.missingBucket()) + ? AggKind.MULTI_TERMS + : null; + } + + @Nullable + private static Integer inferBucketSize(@Nullable AggregationBuilder rootBuilder) { + AggregationBuilder builder = unwrapNestedBuilder(rootBuilder); + if (builder instanceof CompositeAggregationBuilder composite) { + return composite.size(); + } + if (builder instanceof TermsAggregationBuilder terms) { + return terms.size(); + } + if (builder instanceof MultiTermsAggregationBuilder multiTerms) { + return multiTerms.size(); + } + if (builder instanceof TopHitsAggregationBuilder topHits) { + return topHits.size(); + } + return null; + } + + @Nullable + private static AggregationBuilder unwrapNestedBuilder(@Nullable AggregationBuilder rootBuilder) { + if (rootBuilder instanceof NestedAggregationBuilder nested + && !nested.getSubAggregations().isEmpty()) { + return nested.getSubAggregations().iterator().next(); + } + return rootBuilder; + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggregationBuilderAction.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggregationBuilderAction.java deleted file mode 100644 index f9f43c89a7b..00000000000 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggregationBuilderAction.java +++ /dev/null @@ -1,16 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.sql.opensearch.storage.scan.context; - -/** A lambda action to apply on the {@link AggPushDownAction} */ -public interface AggregationBuilderAction extends AbstractAction { - default void pushOperation(PushDownContext context, PushDownOperation operation) { - // Apply transformation to aggregation builder in the optimization phase as some transformation - // may cause exception. We need to detect that exception in advance. - apply(context.getAggPushDownAction()); - context.addOperationForAgg(operation); - } -} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java index fb95a11414e..c887c9d075d 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java @@ -11,6 +11,7 @@ import java.util.List; import javax.annotation.Nullable; import lombok.Getter; +import lombok.Setter; import org.apache.calcite.rel.core.Aggregate; import org.apache.calcite.rel.core.Project; import org.jetbrains.annotations.NotNull; @@ -27,7 +28,7 @@ public class PushDownContext extends AbstractCollection { private ArrayDeque operationsForRequestBuilder; private boolean isAggregatePushed = false; - private AggPushDownAction aggPushDownAction; + @Setter private AggSpec aggSpec; private ArrayDeque operationsForAgg; // Records the start pos of the query, which is updated by new added limit operations. @@ -50,8 +51,9 @@ public PushDownContext(OpenSearchIndex osIndex) { public PushDownContext clone() { PushDownContext newContext = new PushDownContext(osIndex); for (PushDownOperation operation : this) { - newContext.add(maybeCopyAggOperation(operation)); + newContext.add(operation); } + newContext.aggSpec = aggSpec; return newContext; } @@ -64,9 +66,10 @@ public PushDownContext cloneWithoutSort() { PushDownContext newContext = new PushDownContext(osIndex); for (PushDownOperation action : this) { if (action.type() != PushDownType.SORT && action.type() != PushDownType.SORT_EXPR) { - newContext.add(maybeCopyAggOperation(action)); + newContext.add(action); } } + newContext.aggSpec = aggSpec == null ? null : aggSpec.withoutBucketSort(); return newContext; } @@ -110,23 +113,11 @@ public PushDownContext cloneForAggregate(Aggregate aggregate, @Nullable Project continue; } } - newContext.add(maybeCopyAggOperation(operation)); + newContext.add(operation); } return newContext; } - /** - * Deep-copy AGGREGATION operations so that the cloned context gets its own AggPushDownAction - * instance. Other operation types are immutable lambdas and can be shared safely. - */ - private PushDownOperation maybeCopyAggOperation(PushDownOperation operation) { - if (operation.type() == PushDownType.AGGREGATION - && operation.action() instanceof AggPushDownAction aggAction) { - return new PushDownOperation(operation.type(), operation.digest(), aggAction.copy()); - } - return operation; - } - @NotNull @Override public Iterator iterator() { @@ -146,20 +137,11 @@ void addOperationForRequestBuilder(PushDownOperation operation) { queue.add(operation); } - void addOperationForAgg(PushDownOperation operation) { - if (operationsForAgg == null) { - this.operationsForAgg = new ArrayDeque<>(); - } - operationsForAgg.add(operation); - queue.add(operation); - } - @Override public boolean add(PushDownOperation operation) { operation.action().pushOperation(this, operation); if (operation.type() == PushDownType.AGGREGATION) { isAggregatePushed = true; - this.aggPushDownAction = (AggPushDownAction) operation.action(); } if (operation.type() == PushDownType.LIMIT) { startFrom += ((LimitDigest) operation.digest()).offset(); @@ -228,6 +210,10 @@ public OpenSearchRequestBuilder createRequestBuilder() { operationsForRequestBuilder.forEach( operation -> ((OSRequestBuilderAction) operation.action()).apply(newRequestBuilder)); } + if (aggSpec != null) { + newRequestBuilder.pushDownAggregation(aggSpec.build()); + newRequestBuilder.pushTypeMapping(aggSpec.getExtendedTypeMapping()); + } return newRequestBuilder; } } From 7df5f941e24d5c5f1e363b2d60224affa9080653 Mon Sep 17 00:00:00 2001 From: Songkan Tang Date: Tue, 17 Mar 2026 11:02:02 +0800 Subject: [PATCH 4/8] Fix tests Signed-off-by: Songkan Tang --- .../calcite/agg_filter_nested.yaml | 3 +- .../calcite/explain_agg_sort_on_measure1.yaml | 3 +- .../calcite/explain_agg_sort_on_measure2.yaml | 3 +- .../explain_agg_sort_on_measure_complex1.yaml | 3 +- .../explain_agg_sort_on_measure_complex2.yaml | 3 +- ...plain_agg_sort_on_measure_multi_terms.yaml | 3 +- ...gg_sort_on_measure_multi_terms_script.yaml | 3 +- .../explain_agg_sort_on_measure_script.yaml | 3 +- .../explain_count_eval_complex_push.json | 4 +- .../calcite/explain_count_eval_push.json | 4 +- .../calcite/explain_nested_agg_top_push.yaml | 3 +- .../storage/scan/context/AggSpec.java | 33 +++++++++- .../scan/CalciteIndexScanCostTest.java | 62 +++++-------------- 13 files changed, 67 insertions(+), 63 deletions(-) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/agg_filter_nested.yaml b/integ-test/src/test/resources/expectedOutput/calcite/agg_filter_nested.yaml index c566e7e18f4..840f9d39352 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/agg_filter_nested.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/agg_filter_nested.yaml @@ -5,5 +5,4 @@ calcite: LogicalProject($f1=[CASE(<($7, 'K'), 1, null:NULL)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_cascaded_nested]]) physical: | - EnumerableLimit(fetch=[10000]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_cascaded_nested]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={},george_and_jk=COUNT() FILTER $0)], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"george_and_jk":{"filter":{"nested":{"query":{"range":{"author.name":{"from":null,"to":"K","include_lower":true,"include_upper":false,"boost":1.0}}},"path":"author","ignore_unmapped":false,"score_mode":"none","boost":1.0}},"aggregations":{"george_and_jk":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_cascaded_nested]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={},george_and_jk=COUNT() FILTER $0), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"george_and_jk":{"filter":{"nested":{"query":{"range":{"author.name":{"from":null,"to":"K","include_lower":true,"include_upper":false,"boost":1.0}}},"path":"author","ignore_unmapped":false,"score_mode":"none","boost":1.0}},"aggregations":{"george_and_jk":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure1.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure1.yaml index c08c533bc60..acb81c11dc3 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure1.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure1.yaml @@ -8,4 +8,5 @@ calcite: LogicalFilter(condition=[IS NOT NULL($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), state], SORT_AGG_METRICS->[0 ASC FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"state":{"terms":{"field":"state.keyword","size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"asc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + EnumerableLimit(fetch=[10000]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), state], SORT_AGG_METRICS->[0 ASC FIRST]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"state":{"terms":{"field":"state.keyword","size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"asc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure2.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure2.yaml index 9c41efa9139..67310fbb71a 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure2.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure2.yaml @@ -8,4 +8,5 @@ calcite: LogicalFilter(condition=[IS NOT NULL($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},sum=SUM($0)), PROJECT->[sum, state], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"state":{"terms":{"field":"state.keyword","size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"sum":"desc"},{"_key":"asc"}]},"aggregations":{"sum":{"sum":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + EnumerableLimit(fetch=[10000]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},sum=SUM($0)), PROJECT->[sum, state], SORT_AGG_METRICS->[0 DESC LAST]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"state":{"terms":{"field":"state.keyword","size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"sum":"desc"},{"_key":"asc"}]},"aggregations":{"sum":{"sum":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex1.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex1.yaml index cd0355241fe..22c03ba4519 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex1.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex1.yaml @@ -8,4 +8,5 @@ calcite: LogicalFilter(condition=[IS NOT NULL($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={2},sum(balance)=SUM($0),c=COUNT(),dc(employer)=COUNT(DISTINCT $1)), PROJECT->[sum(balance), c, dc(employer), state], SORT_AGG_METRICS->[1 DESC LAST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"state":{"terms":{"field":"state.keyword","size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(balance)":{"sum":{"field":"balance"}},"dc(employer)":{"cardinality":{"field":"employer.keyword"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + EnumerableLimit(fetch=[10000]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={2},sum(balance)=SUM($0),c=COUNT(),dc(employer)=COUNT(DISTINCT $1)), PROJECT->[sum(balance), c, dc(employer), state], SORT_AGG_METRICS->[1 DESC LAST]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"state":{"terms":{"field":"state.keyword","size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(balance)":{"sum":{"field":"balance"}},"dc(employer)":{"cardinality":{"field":"employer.keyword"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex2.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex2.yaml index 59cd137ca59..1c8c0ec5ee9 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex2.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex2.yaml @@ -9,4 +9,5 @@ calcite: LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], new_state=[LOWER($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},sum(balance)=SUM($2),count()=COUNT(),d=COUNT(DISTINCT $3)), PROJECT->[sum(balance), count(), d, gender, new_state], SORT_AGG_METRICS->[2 DESC LAST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"gender|new_state":{"multi_terms":{"terms":[{"field":"gender.keyword"},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}}}],"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"d":"desc"},{"_key":"asc"}]},"aggregations":{"sum(balance)":{"sum":{"field":"balance"}},"d":{"cardinality":{"field":"employer.keyword"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + EnumerableLimit(fetch=[10000]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},sum(balance)=SUM($2),count()=COUNT(),d=COUNT(DISTINCT $3)), PROJECT->[sum(balance), count(), d, gender, new_state], SORT_AGG_METRICS->[2 DESC LAST]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"gender|new_state":{"multi_terms":{"terms":[{"field":"gender.keyword"},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}}}],"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"d":"desc"},{"_key":"asc"}]},"aggregations":{"sum(balance)":{"sum":{"field":"balance"}},"d":{"cardinality":{"field":"employer.keyword"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms.yaml index b584249d91a..2047c63541e 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms.yaml @@ -8,4 +8,5 @@ calcite: LogicalFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($7))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), gender, state], SORT_AGG_METRICS->[0 ASC FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"gender|state":{"multi_terms":{"terms":[{"field":"gender.keyword"},{"field":"state.keyword"}],"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"asc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + EnumerableLimit(fetch=[10000]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), gender, state], SORT_AGG_METRICS->[0 ASC FIRST]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"gender|state":{"multi_terms":{"terms":[{"field":"gender.keyword"},{"field":"state.keyword"}],"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"asc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms_script.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms_script.yaml index 44a51b2171d..26c51980831 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms_script.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms_script.yaml @@ -9,4 +9,5 @@ calcite: LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], new_gender=[LOWER($4)], new_state=[LOWER($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), new_gender, new_state], SORT_AGG_METRICS->[0 ASC FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"new_gender|new_state":{"multi_terms":{"terms":[{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}}},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}}}],"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"asc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + EnumerableLimit(fetch=[10000]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), new_gender, new_state], SORT_AGG_METRICS->[0 ASC FIRST]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"new_gender|new_state":{"multi_terms":{"terms":[{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}}},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}}}],"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"asc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_script.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_script.yaml index e24043592fe..9f9e12dad44 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_script.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_script.yaml @@ -9,4 +9,5 @@ calcite: LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], new_state=[LOWER($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), new_state], SORT_AGG_METRICS->[0 ASC FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"new_state":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"asc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + EnumerableLimit(fetch=[10000]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), new_state], SORT_AGG_METRICS->[0 ASC FIRST]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"new_state":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"asc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_count_eval_complex_push.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_count_eval_complex_push.json index 8e429a7f610..7132c66c807 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_count_eval_complex_push.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_count_eval_complex_push.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalAggregate(group=[{}], mature_count=[COUNT($0)])\n LogicalProject($f1=[CASE(SEARCH($10, Sarg[(30..50)]), 1, null:NULL)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={},mature_count=COUNT() FILTER $0)], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"mature_count\":{\"filter\":{\"range\":{\"age\":{\"from\":30.0,\"to\":50.0,\"include_lower\":false,\"include_upper\":false,\"boost\":1.0}}},\"aggregations\":{\"mature_count\":{\"value_count\":{\"field\":\"_index\"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={},mature_count=COUNT() FILTER $0), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"mature_count\":{\"filter\":{\"range\":{\"age\":{\"from\":30.0,\"to\":50.0,\"include_lower\":false,\"include_upper\":false,\"boost\":1.0}}},\"aggregations\":{\"mature_count\":{\"value_count\":{\"field\":\"_index\"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" } -} \ No newline at end of file +} diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_count_eval_push.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_count_eval_push.json index f0b75595a56..46adc8ff8f0 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_count_eval_push.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_count_eval_push.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalAggregate(group=[{}], mature_count=[COUNT($0)])\n LogicalProject($f1=[CASE(>($10, 30), 1, null:NULL)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={},mature_count=COUNT() FILTER $0)], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"mature_count\":{\"filter\":{\"range\":{\"age\":{\"from\":30,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"aggregations\":{\"mature_count\":{\"value_count\":{\"field\":\"_index\"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={},mature_count=COUNT() FILTER $0), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"mature_count\":{\"filter\":{\"range\":{\"age\":{\"from\":30,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"aggregations\":{\"mature_count\":{\"value_count\":{\"field\":\"_index\"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" } -} \ No newline at end of file +} diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_nested_agg_top_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_nested_agg_top_push.yaml index 65f3ed320fc..ab1a67d814d 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_nested_agg_top_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_nested_agg_top_push.yaml @@ -9,4 +9,5 @@ calcite: LogicalFilter(condition=[IS NOT NULL($3)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_nested_simple]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_nested_simple]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count=COUNT()), RARE_TOP->top 10 address.city, PROJECT->[address.city, count], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"nested_composite_buckets":{"nested":{"path":"address"},"aggregations":{"address.city":{"terms":{"field":"address.city.keyword","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + EnumerableLimit(fetch=[10000]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_nested_simple]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count=COUNT()), RARE_TOP->top 10 address.city, PROJECT->[address.city, count]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"nested_composite_buckets":{"nested":{"path":"address"},"aggregations":{"address.city":{"terms":{"field":"address.city.keyword","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggSpec.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggSpec.java index 96eae23afcb..4d9975c4f65 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggSpec.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggSpec.java @@ -5,6 +5,8 @@ package org.opensearch.sql.opensearch.storage.scan.context; +import static org.opensearch.search.aggregations.MultiBucketConsumerService.DEFAULT_MAX_BUCKETS; + import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -26,6 +28,7 @@ import org.opensearch.search.aggregations.bucket.terms.MultiTermsAggregationBuilder; import org.opensearch.search.aggregations.bucket.terms.TermsAggregationBuilder; import org.opensearch.search.aggregations.metrics.TopHitsAggregationBuilder; +import org.opensearch.search.aggregations.support.ValuesSourceAggregationBuilder; import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.opensearch.data.type.OpenSearchDataType; import org.opensearch.sql.opensearch.request.AggregateAnalyzer; @@ -60,6 +63,7 @@ private enum AggKind { private final long scriptCount; private final AggKind kind; @Nullable private final AggKind measureSortTarget; + private final boolean rareTopSupported; @Nullable private final List bucketSortCollations; @Nullable private final List bucketSortFieldNames; @Nullable private final List measureSortCollations; @@ -82,6 +86,7 @@ private AggSpec( long scriptCount, AggKind kind, @Nullable AggKind measureSortTarget, + boolean rareTopSupported, @Nullable List bucketSortCollations, @Nullable List bucketSortFieldNames, @Nullable List measureSortCollations, @@ -102,6 +107,7 @@ private AggSpec( this.scriptCount = scriptCount; this.kind = kind; this.measureSortTarget = measureSortTarget; + this.rareTopSupported = rareTopSupported; this.bucketSortCollations = bucketSortCollations == null ? null : List.copyOf(bucketSortCollations); this.bucketSortFieldNames = @@ -144,6 +150,7 @@ public static AggSpec create( new AggPushDownAction(builderAndParser, extendedTypeMapping, bucketNames).getScriptCount(), kind, inferMeasureSortTarget(rootBuilder), + isRareTopSupported(rootBuilder), null, null, null, @@ -204,6 +211,7 @@ public AggSpec withBucketSort(List collations, List f scriptCount, kind, measureSortTarget, + rareTopSupported, collations, fieldNames, measureSortCollations, @@ -231,6 +239,7 @@ public AggSpec withoutBucketSort() { scriptCount, kind, measureSortTarget, + rareTopSupported, null, null, measureSortCollations, @@ -264,6 +273,7 @@ public AggSpec withSortMeasure(List collations, List scriptCount, measureSortTarget, null, + rareTopSupported, bucketSortCollations, bucketSortFieldNames, collations, @@ -273,7 +283,7 @@ public AggSpec withSortMeasure(List collations, List } public AggSpec withRareTop(RareTopDigest digest) { - if (kind != AggKind.COMPOSITE) { + if (kind != AggKind.COMPOSITE || !rareTopSupported) { throw new OpenSearchRequestBuilder.PushDownUnSupportedException("Cannot pushdown " + digest); } return new AggSpec( @@ -291,12 +301,13 @@ public AggSpec withRareTop(RareTopDigest digest) { scriptCount, AggKind.RARE_TOP, null, + rareTopSupported, bucketSortCollations, bucketSortFieldNames, measureSortCollations, measureSortFieldNames, digest, - null); + digest.byList().isEmpty() ? digest.number() : DEFAULT_MAX_BUCKETS); } public AggSpec withLimit(int size) { @@ -318,6 +329,7 @@ public AggSpec withLimit(int size) { scriptCount, kind, measureSortTarget, + rareTopSupported, bucketSortCollations, bucketSortFieldNames, measureSortCollations, @@ -382,12 +394,29 @@ private static AggKind inferKind(@Nullable AggregationBuilder rootBuilder) { return AggKind.OTHER; } + private static boolean isRareTopSupported(@Nullable AggregationBuilder rootBuilder) { + AggregationBuilder builder = unwrapNestedBuilder(rootBuilder); + if (!(builder instanceof CompositeAggregationBuilder composite)) { + return false; + } + if (composite.sources().size() == 1) { + return composite.sources().getFirst() instanceof TermsValuesSourceBuilder terms + && !terms.missingBucket(); + } + return composite.sources().stream() + .allMatch(src -> src instanceof TermsValuesSourceBuilder terms && !terms.missingBucket()); + } + @Nullable private static AggKind inferMeasureSortTarget(@Nullable AggregationBuilder rootBuilder) { AggregationBuilder builder = unwrapNestedBuilder(rootBuilder); if (!(builder instanceof CompositeAggregationBuilder composite)) { return null; } + if (composite.getSubAggregations().stream() + .anyMatch(metric -> !(metric instanceof ValuesSourceAggregationBuilder.LeafOnly))) { + return null; + } if (composite.sources().size() == 1) { CompositeValuesSourceBuilder source = composite.sources().getFirst(); if (source instanceof TermsValuesSourceBuilder terms && !terms.missingBucket()) { diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/CalciteIndexScanCostTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/CalciteIndexScanCostTest.java index 021a64aad7d..456cea13150 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/CalciteIndexScanCostTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/CalciteIndexScanCostTest.java @@ -39,7 +39,6 @@ import org.apache.calcite.sql.type.SqlTypeFactoryImpl; import org.apache.calcite.sql.type.SqlTypeName; import org.apache.calcite.util.ImmutableBitSet; -import org.apache.commons.lang3.tuple.Pair; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; @@ -47,10 +46,8 @@ import org.mockito.junit.jupiter.MockitoExtension; import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.common.setting.Settings.Key; -import org.opensearch.sql.opensearch.request.OpenSearchRequestBuilder; import org.opensearch.sql.opensearch.storage.OpenSearchIndex; -import org.opensearch.sql.opensearch.storage.scan.context.AggPushDownAction; -import org.opensearch.sql.opensearch.storage.scan.context.AggregationBuilderAction; +import org.opensearch.sql.opensearch.storage.scan.context.AggSpec; import org.opensearch.sql.opensearch.storage.scan.context.FilterDigest; import org.opensearch.sql.opensearch.storage.scan.context.LimitDigest; import org.opensearch.sql.opensearch.storage.scan.context.OSRequestBuilderAction; @@ -60,6 +57,7 @@ @ExtendWith(MockitoExtension.class) public class CalciteIndexScanCostTest { static final RelDataTypeFactory typeFactory = new SqlTypeFactoryImpl(RelDataTypeSystem.DEFAULT); + private static final OSRequestBuilderAction NO_OP_ACTION = req -> {}; final RexBuilder builder = new RexBuilder(typeFactory); @Mock private static RelOptCluster cluster; @@ -210,17 +208,12 @@ void test_cost_on_aggregate_pushdown() { null, List.of()); when(mq.getRowCount(aggregate)).thenReturn(1000d); - AggPushDownAction action = - new AggPushDownAction(Pair.of(List.of(), null), null, List.of()) { - @Override - public void apply(OpenSearchRequestBuilder requestBuilder) {} - }; lenient().when(relDataType.getFieldList()).thenReturn(new MockFieldList(1)); lenient().when(relDataType.getFieldCount()).thenReturn(1); lenient().when(table.getRowType()).thenReturn(relDataType); scan.getPushDownContext() - .add(new PushDownOperation(PushDownType.AGGREGATION, aggregate, action)); + .add(new PushDownOperation(PushDownType.AGGREGATION, aggregate, NO_OP_ACTION)); assertEquals(1800, Objects.requireNonNull(scan.computeSelfCost(planner, mq)).getRows()); } @@ -233,11 +226,6 @@ void test_cost_on_aggregate_pushdown_with_one_aggCall() { lenient().when(table.getRowType()).thenReturn(relDataType); CalciteLogicalIndexScan scan = new CalciteLogicalIndexScan(cluster, table, osIndex); - AggPushDownAction action = - new AggPushDownAction(Pair.of(List.of(), null), null, List.of()) { - @Override - public void apply(OpenSearchRequestBuilder requestBuilder) {} - }; AggregateCall countCall = AggregateCall.create( SqlStdOperatorTable.COUNT, @@ -266,7 +254,7 @@ public void apply(OpenSearchRequestBuilder requestBuilder) {} lenient().when(table.getRowType()).thenReturn(relDataType); scan.getPushDownContext() - .add(new PushDownOperation(PushDownType.AGGREGATION, aggregate, action)); + .add(new PushDownOperation(PushDownType.AGGREGATION, aggregate, NO_OP_ACTION)); assertEquals(2812.5, Objects.requireNonNull(scan.computeSelfCost(planner, mq)).getRows()); } @@ -279,11 +267,6 @@ void test_cost_on_aggregate_pushdown_with_two_aggCall() { lenient().when(table.getRowType()).thenReturn(relDataType); CalciteLogicalIndexScan scan = new CalciteLogicalIndexScan(cluster, table, osIndex); - AggPushDownAction action = - new AggPushDownAction(Pair.of(List.of(), null), null, List.of()) { - @Override - public void apply(OpenSearchRequestBuilder requestBuilder) {} - }; AggregateCall countCall = AggregateCall.create( SqlStdOperatorTable.COUNT, @@ -325,7 +308,7 @@ public void apply(OpenSearchRequestBuilder requestBuilder) {} lenient().when(table.getRowType()).thenReturn(relDataType); scan.getPushDownContext() - .add(new PushDownOperation(PushDownType.AGGREGATION, aggregate, action)); + .add(new PushDownOperation(PushDownType.AGGREGATION, aggregate, NO_OP_ACTION)); assertEquals( 3836.2500429153442, Objects.requireNonNull(scan.computeSelfCost(planner, mq)).getRows()); } @@ -339,16 +322,8 @@ void test_cost_on_aggregate_pushdown_with_one_aggCall_with_script() { lenient().when(table.getRowType()).thenReturn(relDataType); CalciteLogicalIndexScan scan = new CalciteLogicalIndexScan(cluster, table, osIndex); - AggPushDownAction action = - new AggPushDownAction(Pair.of(List.of(), null), null, List.of()) { - @Override - public void apply(OpenSearchRequestBuilder requestBuilder) {} - - @Override - public long getScriptCount() { - return 1; - } - }; + AggSpec aggSpec = mock(AggSpec.class); + when(aggSpec.getScriptCount()).thenReturn(1L); AggregateCall countCall = AggregateCall.create( SqlStdOperatorTable.COUNT, @@ -375,9 +350,10 @@ public long getScriptCount() { lenient().when(relDataType.getFieldList()).thenReturn(new MockFieldList(2)); lenient().when(relDataType.getFieldCount()).thenReturn(2); lenient().when(table.getRowType()).thenReturn(relDataType); + scan.getPushDownContext().setAggSpec(aggSpec); scan.getPushDownContext() - .add(new PushDownOperation(PushDownType.AGGREGATION, aggregate, action)); + .add(new PushDownOperation(PushDownType.AGGREGATION, aggregate, NO_OP_ACTION)); assertEquals( 2913.7500643730164, Objects.requireNonNull(scan.computeSelfCost(planner, mq)).getRows()); } @@ -458,16 +434,8 @@ void test_cost_on_aggregate_pushdown_along_with_others() { lenient().when(table.getRowType()).thenReturn(relDataType); CalciteLogicalIndexScan scan = new CalciteLogicalIndexScan(cluster, table, osIndex); - AggPushDownAction action = - new AggPushDownAction(Pair.of(List.of(), null), null, List.of()) { - @Override - public void apply(OpenSearchRequestBuilder requestBuilder) {} - - @Override - public long getScriptCount() { - return 1; - } - }; + AggSpec aggSpec = mock(AggSpec.class); + when(aggSpec.getScriptCount()).thenReturn(1L); AggregateCall countCall = AggregateCall.create( SqlStdOperatorTable.COUNT, @@ -494,6 +462,7 @@ public long getScriptCount() { lenient().when(relDataType.getFieldList()).thenReturn(new MockFieldList(2)); lenient().when(relDataType.getFieldCount()).thenReturn(2); lenient().when(table.getRowType()).thenReturn(relDataType); + scan.getPushDownContext().setAggSpec(aggSpec); List projectDigest1 = List.of("A", "B"); scan.getPushDownContext() @@ -501,19 +470,18 @@ public long getScriptCount() { new PushDownOperation( PushDownType.PROJECT, projectDigest1, (OSRequestBuilderAction) req -> {})); scan.getPushDownContext() - .add(new PushDownOperation(PushDownType.AGGREGATION, aggregate, action)); + .add(new PushDownOperation(PushDownType.AGGREGATION, aggregate, NO_OP_ACTION)); List projectDigest2 = List.of("COUNT"); scan.getPushDownContext() .add( new PushDownOperation( - PushDownType.PROJECT, projectDigest2, (AggregationBuilderAction) req -> {})); + PushDownType.PROJECT, projectDigest2, NO_OP_ACTION)); scan.getPushDownContext() .add(new PushDownOperation(PushDownType.SORT, null, (OSRequestBuilderAction) req -> {})); LimitDigest limitDigest = new LimitDigest(100, 0); scan.getPushDownContext() .add( - new PushDownOperation( - PushDownType.LIMIT, limitDigest, (AggregationBuilderAction) req -> {})); + new PushDownOperation(PushDownType.LIMIT, limitDigest, NO_OP_ACTION)); lenient().when(relDataType.getFieldList()).thenReturn(new MockFieldList(projectDigest2.size())); assertEquals( 2102.8500643730163, Objects.requireNonNull(scan.computeSelfCost(planner, mq)).getRows()); From f36c20adac5777ecf76aa145d8045dbadd8a966d Mon Sep 17 00:00:00 2001 From: Songkan Tang Date: Tue, 17 Mar 2026 11:11:35 +0800 Subject: [PATCH 5/8] Fix spotless check Signed-off-by: Songkan Tang --- .../opensearch/storage/scan/CalciteIndexScanCostTest.java | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/CalciteIndexScanCostTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/CalciteIndexScanCostTest.java index 456cea13150..cc2a9051b69 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/CalciteIndexScanCostTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/CalciteIndexScanCostTest.java @@ -473,15 +473,12 @@ void test_cost_on_aggregate_pushdown_along_with_others() { .add(new PushDownOperation(PushDownType.AGGREGATION, aggregate, NO_OP_ACTION)); List projectDigest2 = List.of("COUNT"); scan.getPushDownContext() - .add( - new PushDownOperation( - PushDownType.PROJECT, projectDigest2, NO_OP_ACTION)); + .add(new PushDownOperation(PushDownType.PROJECT, projectDigest2, NO_OP_ACTION)); scan.getPushDownContext() .add(new PushDownOperation(PushDownType.SORT, null, (OSRequestBuilderAction) req -> {})); LimitDigest limitDigest = new LimitDigest(100, 0); scan.getPushDownContext() - .add( - new PushDownOperation(PushDownType.LIMIT, limitDigest, NO_OP_ACTION)); + .add(new PushDownOperation(PushDownType.LIMIT, limitDigest, NO_OP_ACTION)); lenient().when(relDataType.getFieldList()).thenReturn(new MockFieldList(projectDigest2.size())); assertEquals( 2102.8500643730163, Objects.requireNonNull(scan.computeSelfCost(planner, mq)).getRows()); From 84892bc2456d5098d999c46a809243cfb2e068fa Mon Sep 17 00:00:00 2001 From: Songkan Tang Date: Fri, 20 Mar 2026 15:03:38 +0800 Subject: [PATCH 6/8] Keep the pushdown limit logic as before Signed-off-by: Songkan Tang --- .../calcite/agg_filter_nested.yaml | 3 +- .../chart_timestamp_span_and_category.yaml | 2 +- .../calcite/explain_agg_paginating_join1.yaml | 2 +- .../calcite/explain_agg_sort_on_measure1.yaml | 3 +- .../calcite/explain_agg_sort_on_measure2.yaml | 3 +- .../explain_agg_sort_on_measure_complex1.yaml | 3 +- .../explain_agg_sort_on_measure_complex2.yaml | 3 +- ...plain_agg_sort_on_measure_multi_terms.yaml | 3 +- ...gg_sort_on_measure_multi_terms_script.yaml | 3 +- .../explain_agg_sort_on_measure_script.yaml | 3 +- .../explain_count_eval_complex_push.json | 4 +- .../calcite/explain_count_eval_push.json | 4 +- .../calcite/explain_nested_agg_top_push.yaml | 3 +- .../storage/scan/CalciteLogicalIndexScan.java | 21 ++----- .../storage/scan/context/AggSpec.java | 60 ++++++++++++++++++- 15 files changed, 77 insertions(+), 43 deletions(-) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/agg_filter_nested.yaml b/integ-test/src/test/resources/expectedOutput/calcite/agg_filter_nested.yaml index 840f9d39352..c566e7e18f4 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/agg_filter_nested.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/agg_filter_nested.yaml @@ -5,4 +5,5 @@ calcite: LogicalProject($f1=[CASE(<($7, 'K'), 1, null:NULL)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_cascaded_nested]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_cascaded_nested]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={},george_and_jk=COUNT() FILTER $0), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"george_and_jk":{"filter":{"nested":{"query":{"range":{"author.name":{"from":null,"to":"K","include_lower":true,"include_upper":false,"boost":1.0}}},"path":"author","ignore_unmapped":false,"score_mode":"none","boost":1.0}},"aggregations":{"george_and_jk":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + EnumerableLimit(fetch=[10000]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_cascaded_nested]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={},george_and_jk=COUNT() FILTER $0)], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"george_and_jk":{"filter":{"nested":{"query":{"range":{"author.name":{"from":null,"to":"K","include_lower":true,"include_upper":false,"boost":1.0}}},"path":"author","ignore_unmapped":false,"score_mode":"none","boost":1.0}},"aggregations":{"george_and_jk":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/chart_timestamp_span_and_category.yaml b/integ-test/src/test/resources/expectedOutput/calcite/chart_timestamp_span_and_category.yaml index 610d0b3f11b..9267e6faab1 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/chart_timestamp_span_and_category.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/chart_timestamp_span_and_category.yaml @@ -28,4 +28,4 @@ calcite: EnumerableCalc(expr#0..2=[{inputs}], category=[$t0], $1=[$t2]) EnumerableWindow(window#0=[window(order by [1 DESC-nulls-last] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) EnumerableAggregate(group=[{0}], __grand_total__=[SUM($1)]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[FILTER->AND(IS NOT NULL($2), IS NOT NULL($1)), FILTER->IS NOT NULL($0), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 2},max(value)=MAX($1)), PROJECT->[category, max(value)]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"filter":[{"bool":{"must":[{"exists":{"field":"timestamp","boost":1.0}},{"exists":{"field":"value","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"category","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"category":{"terms":{"field":"category","missing_bucket":true,"missing_order":"first","order":"asc"}}},{"timestamp0":{"date_histogram":{"field":"timestamp","missing_bucket":false,"order":"asc","calendar_interval":"1w"}}}]},"aggregations":{"max(value)":{"max":{"field":"value"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[FILTER->AND(IS NOT NULL($2), IS NOT NULL($1)), FILTER->IS NOT NULL($0), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 2},max(value)=MAX($1)), PROJECT->[category, max(value)]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"filter":[{"bool":{"must":[{"exists":{"field":"timestamp","boost":1.0}},{"exists":{"field":"value","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"category","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"category":{"terms":{"field":"category","missing_bucket":true,"missing_order":"first","order":"asc"}}},{"timestamp0":{"date_histogram":{"field":"timestamp","missing_bucket":false,"order":"asc","calendar_interval":"1w"}}}]},"aggregations":{"max(value)":{"max":{"field":"value"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_join1.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_join1.yaml index e47f3210546..ea76cdee61e 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_join1.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_join1.yaml @@ -16,4 +16,4 @@ calcite: EnumerableLimit(fetch=[10000]) EnumerableMergeJoin(condition=[=($1, $3)], joinType=[inner]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT()), PROJECT->[c, state], SORT->[1]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":2,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"last","order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT()), PROJECT->[c, state], LIMIT->50000, SORT->[1]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":2,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"last","order":"asc"}}}]}}}}, requestedTotalSize=50000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT()), PROJECT->[c, state], LIMIT->50000, SORT->[1]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":2,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"last","order":"asc"}}}]}}}}, requestedTotalSize=50000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure1.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure1.yaml index acb81c11dc3..c3998eb9e90 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure1.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure1.yaml @@ -8,5 +8,4 @@ calcite: LogicalFilter(condition=[IS NOT NULL($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - EnumerableLimit(fetch=[10000]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), state], SORT_AGG_METRICS->[0 ASC FIRST]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"state":{"terms":{"field":"state.keyword","size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"asc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), state], SORT_AGG_METRICS->[0 ASC FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"state":{"terms":{"field":"state.keyword","size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"asc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure2.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure2.yaml index 67310fbb71a..9c41efa9139 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure2.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure2.yaml @@ -8,5 +8,4 @@ calcite: LogicalFilter(condition=[IS NOT NULL($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - EnumerableLimit(fetch=[10000]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},sum=SUM($0)), PROJECT->[sum, state], SORT_AGG_METRICS->[0 DESC LAST]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"state":{"terms":{"field":"state.keyword","size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"sum":"desc"},{"_key":"asc"}]},"aggregations":{"sum":{"sum":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},sum=SUM($0)), PROJECT->[sum, state], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"state":{"terms":{"field":"state.keyword","size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"sum":"desc"},{"_key":"asc"}]},"aggregations":{"sum":{"sum":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex1.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex1.yaml index 22c03ba4519..cd0355241fe 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex1.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex1.yaml @@ -8,5 +8,4 @@ calcite: LogicalFilter(condition=[IS NOT NULL($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - EnumerableLimit(fetch=[10000]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={2},sum(balance)=SUM($0),c=COUNT(),dc(employer)=COUNT(DISTINCT $1)), PROJECT->[sum(balance), c, dc(employer), state], SORT_AGG_METRICS->[1 DESC LAST]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"state":{"terms":{"field":"state.keyword","size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(balance)":{"sum":{"field":"balance"}},"dc(employer)":{"cardinality":{"field":"employer.keyword"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={2},sum(balance)=SUM($0),c=COUNT(),dc(employer)=COUNT(DISTINCT $1)), PROJECT->[sum(balance), c, dc(employer), state], SORT_AGG_METRICS->[1 DESC LAST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"state":{"terms":{"field":"state.keyword","size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(balance)":{"sum":{"field":"balance"}},"dc(employer)":{"cardinality":{"field":"employer.keyword"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex2.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex2.yaml index 1c8c0ec5ee9..59cd137ca59 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex2.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex2.yaml @@ -9,5 +9,4 @@ calcite: LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], new_state=[LOWER($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - EnumerableLimit(fetch=[10000]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},sum(balance)=SUM($2),count()=COUNT(),d=COUNT(DISTINCT $3)), PROJECT->[sum(balance), count(), d, gender, new_state], SORT_AGG_METRICS->[2 DESC LAST]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"gender|new_state":{"multi_terms":{"terms":[{"field":"gender.keyword"},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}}}],"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"d":"desc"},{"_key":"asc"}]},"aggregations":{"sum(balance)":{"sum":{"field":"balance"}},"d":{"cardinality":{"field":"employer.keyword"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},sum(balance)=SUM($2),count()=COUNT(),d=COUNT(DISTINCT $3)), PROJECT->[sum(balance), count(), d, gender, new_state], SORT_AGG_METRICS->[2 DESC LAST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"gender|new_state":{"multi_terms":{"terms":[{"field":"gender.keyword"},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}}}],"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"d":"desc"},{"_key":"asc"}]},"aggregations":{"sum(balance)":{"sum":{"field":"balance"}},"d":{"cardinality":{"field":"employer.keyword"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms.yaml index 2047c63541e..887f35fbf85 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms.yaml @@ -8,5 +8,4 @@ calcite: LogicalFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($7))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - EnumerableLimit(fetch=[10000]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), gender, state], SORT_AGG_METRICS->[0 ASC FIRST]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"gender|state":{"multi_terms":{"terms":[{"field":"gender.keyword"},{"field":"state.keyword"}],"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"asc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), gender, state], SORT_AGG_METRICS->[0 ASC FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"gender|state":{"multi_terms":{"terms":[{"field":"gender.keyword"},{"field":"state.keyword"}],"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"asc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms_script.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms_script.yaml index 26c51980831..dff3dd78359 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms_script.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms_script.yaml @@ -9,5 +9,4 @@ calcite: LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], new_gender=[LOWER($4)], new_state=[LOWER($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - EnumerableLimit(fetch=[10000]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), new_gender, new_state], SORT_AGG_METRICS->[0 ASC FIRST]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"new_gender|new_state":{"multi_terms":{"terms":[{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}}},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}}}],"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"asc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), new_gender, new_state], SORT_AGG_METRICS->[0 ASC FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"new_gender|new_state":{"multi_terms":{"terms":[{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}}},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}}}],"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"asc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_script.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_script.yaml index 9f9e12dad44..cea7ee2546f 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_script.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_script.yaml @@ -9,5 +9,4 @@ calcite: LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], new_state=[LOWER($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - EnumerableLimit(fetch=[10000]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), new_state], SORT_AGG_METRICS->[0 ASC FIRST]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"new_state":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"asc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), new_state], SORT_AGG_METRICS->[0 ASC FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"new_state":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"asc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_count_eval_complex_push.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_count_eval_complex_push.json index 7132c66c807..8e429a7f610 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_count_eval_complex_push.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_count_eval_complex_push.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalAggregate(group=[{}], mature_count=[COUNT($0)])\n LogicalProject($f1=[CASE(SEARCH($10, Sarg[(30..50)]), 1, null:NULL)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n", - "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={},mature_count=COUNT() FILTER $0), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"mature_count\":{\"filter\":{\"range\":{\"age\":{\"from\":30.0,\"to\":50.0,\"include_lower\":false,\"include_upper\":false,\"boost\":1.0}}},\"aggregations\":{\"mature_count\":{\"value_count\":{\"field\":\"_index\"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + "physical": "EnumerableLimit(fetch=[10000])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={},mature_count=COUNT() FILTER $0)], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"mature_count\":{\"filter\":{\"range\":{\"age\":{\"from\":30.0,\"to\":50.0,\"include_lower\":false,\"include_upper\":false,\"boost\":1.0}}},\"aggregations\":{\"mature_count\":{\"value_count\":{\"field\":\"_index\"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" } -} +} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_count_eval_push.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_count_eval_push.json index 46adc8ff8f0..f0b75595a56 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_count_eval_push.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_count_eval_push.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalAggregate(group=[{}], mature_count=[COUNT($0)])\n LogicalProject($f1=[CASE(>($10, 30), 1, null:NULL)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n", - "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={},mature_count=COUNT() FILTER $0), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"mature_count\":{\"filter\":{\"range\":{\"age\":{\"from\":30,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"aggregations\":{\"mature_count\":{\"value_count\":{\"field\":\"_index\"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + "physical": "EnumerableLimit(fetch=[10000])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={},mature_count=COUNT() FILTER $0)], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"mature_count\":{\"filter\":{\"range\":{\"age\":{\"from\":30,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"aggregations\":{\"mature_count\":{\"value_count\":{\"field\":\"_index\"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" } -} +} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_nested_agg_top_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_nested_agg_top_push.yaml index ab1a67d814d..46611b40abb 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_nested_agg_top_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_nested_agg_top_push.yaml @@ -9,5 +9,4 @@ calcite: LogicalFilter(condition=[IS NOT NULL($3)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_nested_simple]]) physical: | - EnumerableLimit(fetch=[10000]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_nested_simple]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count=COUNT()), RARE_TOP->top 10 address.city, PROJECT->[address.city, count]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"nested_composite_buckets":{"nested":{"path":"address"},"aggregations":{"address.city":{"terms":{"field":"address.city.keyword","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_nested_simple]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count=COUNT()), RARE_TOP->top 10 address.city, PROJECT->[address.city, count], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"nested_composite_buckets":{"nested":{"path":"address"},"aggregations":{"address.city":{"terms":{"field":"address.city.keyword","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java index b21d24d2c66..ac93c7a9d68 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java @@ -430,24 +430,11 @@ public AbstractRelNode pushDownLimit(LogicalSort sort, Integer limit, Integer of .map(op -> (LimitDigest) op.digest()) .map(d -> totalSize < d.offset() + d.limit()) .orElse(true); - boolean canUpdateBuilder = aggSpec.canPushDownLimitIntoBucketSize(totalSize); - boolean alreadyBoundedByCurrentBucketSize = - aggSpec.getBucketSize() != null && totalSize <= aggSpec.getBucketSize(); - boolean alreadyEnforcedByExistingLimit = - pushDownContext.isLimitPushed() && !canReduceEstimatedRowsCount; - boolean canEnforceLimit = - aggSpec.isCompositeAggregation() - || canUpdateBuilder - || alreadyBoundedByCurrentBucketSize - || alreadyEnforcedByExistingLimit - || (aggSpec.isSingleRowAggregation() && offset == 0); - - // Push down the limit into the aggregation bucket in advance to detect whether the limit - // can update the aggregation builder - boolean canUpdate = canReduceEstimatedRowsCount || canUpdateBuilder; - if (!canEnforceLimit || (!canUpdate && offset > 0)) return null; + boolean canUpdate = + canReduceEstimatedRowsCount || aggSpec.canPushDownLimitIntoBucketSize(totalSize); + if (!canUpdate && offset > 0) return null; CalciteLogicalIndexScan newScan = this.copyWithNewSchema(getRowType()); - if (canUpdateBuilder) { + if (canUpdate) { newScan.pushDownContext.setAggSpec(aggSpec.withLimit(limit + offset)); } AbstractAction action; diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggSpec.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggSpec.java index 4d9975c4f65..daad72fa014 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggSpec.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggSpec.java @@ -49,6 +49,13 @@ private enum AggKind { RARE_TOP } + private enum LimitPushdownMode { + UNSUPPORTED, + ESTIMATE_ONLY, + LEAF_METRIC, + BUCKET_SIZE + } + private final Aggregate aggregate; @Nullable private final Project project; private final List outputFields; @@ -62,6 +69,7 @@ private enum AggKind { private final List bucketNames; private final long scriptCount; private final AggKind kind; + private final LimitPushdownMode limitPushdownMode; @Nullable private final AggKind measureSortTarget; private final boolean rareTopSupported; @Nullable private final List bucketSortCollations; @@ -85,6 +93,7 @@ private AggSpec( List bucketNames, long scriptCount, AggKind kind, + LimitPushdownMode limitPushdownMode, @Nullable AggKind measureSortTarget, boolean rareTopSupported, @Nullable List bucketSortCollations, @@ -106,6 +115,7 @@ private AggSpec( this.bucketNames = List.copyOf(bucketNames); this.scriptCount = scriptCount; this.kind = kind; + this.limitPushdownMode = limitPushdownMode; this.measureSortTarget = measureSortTarget; this.rareTopSupported = rareTopSupported; this.bucketSortCollations = @@ -149,6 +159,7 @@ public static AggSpec create( bucketNames, new AggPushDownAction(builderAndParser, extendedTypeMapping, bucketNames).getScriptCount(), kind, + inferLimitPushdownMode(builderAndParser.getLeft(), rootBuilder), inferMeasureSortTarget(rootBuilder), isRareTopSupported(rootBuilder), null, @@ -168,7 +179,14 @@ public boolean isSingleRowAggregation() { } public boolean canPushDownLimitIntoBucketSize(int size) { - return bucketSize != null && size < bucketSize; + return switch (limitPushdownMode) { + case BUCKET_SIZE -> bucketSize != null && size < bucketSize; + case LEAF_METRIC -> true; + case ESTIMATE_ONLY -> false; + case UNSUPPORTED -> + throw new OpenSearchRequestBuilder.PushDownUnSupportedException( + "Cannot pushdown limit into aggregation bucket"); + }; } public AggSpec withBucketSort(List collations, List fieldNames) { @@ -210,6 +228,7 @@ public AggSpec withBucketSort(List collations, List f newBucketNames, scriptCount, kind, + limitPushdownMode, measureSortTarget, rareTopSupported, collations, @@ -238,6 +257,7 @@ public AggSpec withoutBucketSort() { initialBucketNames, scriptCount, kind, + limitPushdownMode, measureSortTarget, rareTopSupported, null, @@ -272,6 +292,7 @@ public AggSpec withSortMeasure(List collations, List bucketNames, scriptCount, measureSortTarget, + inferLimitPushdownMode(measureSortTarget), null, rareTopSupported, bucketSortCollations, @@ -300,6 +321,7 @@ public AggSpec withRareTop(RareTopDigest digest) { bucketNames, scriptCount, AggKind.RARE_TOP, + LimitPushdownMode.BUCKET_SIZE, null, rareTopSupported, bucketSortCollations, @@ -311,8 +333,17 @@ public AggSpec withRareTop(RareTopDigest digest) { } public AggSpec withLimit(int size) { - if (!canPushDownLimitIntoBucketSize(size)) { - return this; + switch (limitPushdownMode) { + case ESTIMATE_ONLY, LEAF_METRIC: + return this; + case UNSUPPORTED: + throw new OpenSearchRequestBuilder.PushDownUnSupportedException( + "Cannot pushdown limit into aggregation bucket"); + case BUCKET_SIZE: + if (!canPushDownLimitIntoBucketSize(size)) { + return this; + } + break; } return new AggSpec( aggregate, @@ -328,6 +359,7 @@ public AggSpec withLimit(int size) { bucketNames, scriptCount, kind, + limitPushdownMode, measureSortTarget, rareTopSupported, bucketSortCollations, @@ -394,6 +426,28 @@ private static AggKind inferKind(@Nullable AggregationBuilder rootBuilder) { return AggKind.OTHER; } + private static LimitPushdownMode inferLimitPushdownMode(AggKind kind) { + return switch (kind) { + case COMPOSITE, TERMS, MULTI_TERMS, TOP_HITS, RARE_TOP -> LimitPushdownMode.BUCKET_SIZE; + case OTHER, DATE_HISTOGRAM, HISTOGRAM -> LimitPushdownMode.UNSUPPORTED; + }; + } + + private static LimitPushdownMode inferLimitPushdownMode( + List builders, @Nullable AggregationBuilder rootBuilder) { + if (builders.isEmpty()) { + // count() optimization uses hits.total and leaves the builder list empty. Main still keeps + // LIMIT in PushDownContext for these cases even though no request-side limit is applied. + return LimitPushdownMode.ESTIMATE_ONLY; + } + AggregationBuilder builder = unwrapNestedBuilder(rootBuilder); + if (builder instanceof ValuesSourceAggregationBuilder.LeafOnly) { + // Main treats leaf metric aggregations as limit-pushable because they produce a single row. + return LimitPushdownMode.LEAF_METRIC; + } + return inferLimitPushdownMode(inferKind(rootBuilder)); + } + private static boolean isRareTopSupported(@Nullable AggregationBuilder rootBuilder) { AggregationBuilder builder = unwrapNestedBuilder(rootBuilder); if (!(builder instanceof CompositeAggregationBuilder composite)) { From 05a2ed23835ae4ead3bd03b2237f536ec0cf3186 Mon Sep 17 00:00:00 2001 From: Songkan Tang Date: Mon, 23 Mar 2026 16:03:20 +0800 Subject: [PATCH 7/8] Minor change to reduce unnecesary changes Signed-off-by: Songkan Tang --- .../calcite/explain_agg_sort_on_measure1.yaml | 2 +- ...xplain_agg_sort_on_measure_multi_terms.yaml | 2 +- ...agg_sort_on_measure_multi_terms_script.yaml | 2 +- .../explain_agg_sort_on_measure_script.yaml | 2 +- .../calcite/explain_nested_agg_top_push.yaml | 2 +- .../storage/scan/CalciteLogicalIndexScan.java | 2 ++ .../scan/context/AggPushDownAction.java | 18 ++++-------------- .../storage/scan/context/AggSpec.java | 4 ++-- 8 files changed, 13 insertions(+), 21 deletions(-) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure1.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure1.yaml index c3998eb9e90..c08c533bc60 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure1.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure1.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[IS NOT NULL($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), state], SORT_AGG_METRICS->[0 ASC FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"state":{"terms":{"field":"state.keyword","size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"asc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), state], SORT_AGG_METRICS->[0 ASC FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"state":{"terms":{"field":"state.keyword","size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"asc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms.yaml index 887f35fbf85..b584249d91a 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($7))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), gender, state], SORT_AGG_METRICS->[0 ASC FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"gender|state":{"multi_terms":{"terms":[{"field":"gender.keyword"},{"field":"state.keyword"}],"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"asc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), gender, state], SORT_AGG_METRICS->[0 ASC FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"gender|state":{"multi_terms":{"terms":[{"field":"gender.keyword"},{"field":"state.keyword"}],"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"asc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms_script.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms_script.yaml index dff3dd78359..44a51b2171d 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms_script.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms_script.yaml @@ -9,4 +9,4 @@ calcite: LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], new_gender=[LOWER($4)], new_state=[LOWER($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), new_gender, new_state], SORT_AGG_METRICS->[0 ASC FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"new_gender|new_state":{"multi_terms":{"terms":[{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}}},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}}}],"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"asc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), new_gender, new_state], SORT_AGG_METRICS->[0 ASC FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"new_gender|new_state":{"multi_terms":{"terms":[{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}}},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}}}],"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"asc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_script.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_script.yaml index cea7ee2546f..e24043592fe 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_script.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_script.yaml @@ -9,4 +9,4 @@ calcite: LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], new_state=[LOWER($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), new_state], SORT_AGG_METRICS->[0 ASC FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"new_state":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"asc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), new_state], SORT_AGG_METRICS->[0 ASC FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"new_state":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"asc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_nested_agg_top_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_nested_agg_top_push.yaml index 46611b40abb..65f3ed320fc 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_nested_agg_top_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_nested_agg_top_push.yaml @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[IS NOT NULL($3)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_nested_simple]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_nested_simple]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count=COUNT()), RARE_TOP->top 10 address.city, PROJECT->[address.city, count], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"nested_composite_buckets":{"nested":{"path":"address"},"aggregations":{"address.city":{"terms":{"field":"address.city.keyword","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_nested_simple]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count=COUNT()), RARE_TOP->top 10 address.city, PROJECT->[address.city, count], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"nested_composite_buckets":{"nested":{"path":"address"},"aggregations":{"address.city":{"terms":{"field":"address.city.keyword","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java index 85ec1943a5d..c7855c46844 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java @@ -428,6 +428,8 @@ public AbstractRelNode pushDownAggregate(Aggregate aggregate, @Nullable Project extendedTypeMapping, bucketNames, builderAndParser); + // Now agg state and request is lazily built by AggSpec.build(). Agg operation in + // PushDownContext is a no_op marker newScan.pushDownContext.setAggSpec(aggSpec); newScan.pushDownContext.add( PushDownType.AGGREGATION, aggregate, (OSRequestBuilderAction) requestBuilder -> {}); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggPushDownAction.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggPushDownAction.java index 64ad826dffe..9666ea317e5 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggPushDownAction.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggPushDownAction.java @@ -33,6 +33,7 @@ import org.opensearch.search.aggregations.bucket.nested.NestedAggregationBuilder; import org.opensearch.search.aggregations.bucket.terms.MultiTermsAggregationBuilder; import org.opensearch.search.aggregations.bucket.terms.TermsAggregationBuilder; +import org.opensearch.search.aggregations.metrics.TopHitsAggregationBuilder; import org.opensearch.search.aggregations.support.MultiTermsValuesSourceConfig; import org.opensearch.search.aggregations.support.ValuesSourceAggregationBuilder; import org.opensearch.search.sort.SortOrder; @@ -432,7 +433,7 @@ public void pushDownSortIntoAggBucket( return; } if (builder instanceof TermsAggregationBuilder termsAggBuilder) { - pushDownSortIntoTermsBucket(original, termsAggBuilder, collations); + termsAggBuilder.order(BucketOrder.key(!collations.getFirst().getDirection().isDescending())); } // TODO for MultiTermsAggregationBuilder } @@ -497,13 +498,6 @@ private void pushDownSortIntoCompositeBucket( bucketNames = newBucketNames; } - private void pushDownSortIntoTermsBucket( - AggregationBuilder original, - TermsAggregationBuilder termsAggBuilder, - List collations) { - termsAggBuilder.order(BucketOrder.key(!collations.getFirst().getDirection().isDescending())); - } - private static Integer getBucketSize(AggregationBuilder builder) { if (builder instanceof CompositeAggregationBuilder compositeAggBuilder) { return compositeAggBuilder.size(); @@ -514,9 +508,7 @@ private static Integer getBucketSize(AggregationBuilder builder) { if (builder instanceof MultiTermsAggregationBuilder multiTermsAggBuilder) { return multiTermsAggBuilder.size(); } - if (builder - instanceof - org.opensearch.search.aggregations.metrics.TopHitsAggregationBuilder topHitsAggBuilder) { + if (builder instanceof TopHitsAggregationBuilder topHitsAggBuilder) { return topHitsAggBuilder.size(); } return null; @@ -535,9 +527,7 @@ private static void resizeBucketBuilder(AggregationBuilder builder, int size) { multiTermsAggBuilder.size(size); return; } - if (builder - instanceof - org.opensearch.search.aggregations.metrics.TopHitsAggregationBuilder topHitsAggBuilder) { + if (builder instanceof TopHitsAggregationBuilder topHitsAggBuilder) { topHitsAggBuilder.size(size); return; } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggSpec.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggSpec.java index daad72fa014..8bf47f44c89 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggSpec.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggSpec.java @@ -436,13 +436,13 @@ private static LimitPushdownMode inferLimitPushdownMode(AggKind kind) { private static LimitPushdownMode inferLimitPushdownMode( List builders, @Nullable AggregationBuilder rootBuilder) { if (builders.isEmpty()) { - // count() optimization uses hits.total and leaves the builder list empty. Main still keeps + // count() optimization uses hits.total and leaves the builder list empty. Keeps // LIMIT in PushDownContext for these cases even though no request-side limit is applied. return LimitPushdownMode.ESTIMATE_ONLY; } AggregationBuilder builder = unwrapNestedBuilder(rootBuilder); if (builder instanceof ValuesSourceAggregationBuilder.LeafOnly) { - // Main treats leaf metric aggregations as limit-pushable because they produce a single row. + // Treats leaf metric aggregations as limit-pushable because they produce a single row. return LimitPushdownMode.LEAF_METRIC; } return inferLimitPushdownMode(inferKind(rootBuilder)); From c2bd684d59820ab78c9f9d4ef022956068f30c1b Mon Sep 17 00:00:00 2001 From: Songkan Tang Date: Tue, 24 Mar 2026 16:48:44 +0800 Subject: [PATCH 8/8] Address comments Signed-off-by: Songkan Tang --- .../storage/scan/CalciteLogicalIndexScan.java | 19 +- .../scan/context/AggPushDownAction.java | 270 +++++++-------- .../storage/scan/context/AggSpec.java | 320 +++++------------- .../storage/scan/context/PushDownContext.java | 4 +- 4 files changed, 214 insertions(+), 399 deletions(-) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java index c7855c46844..740801ff418 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java @@ -415,21 +415,10 @@ public AbstractRelNode pushDownAggregate(Aggregate aggregate, @Nullable Project OpenSearchDataType.of( OpenSearchTypeFactory.convertRelDataTypeToExprType( field.getType())))); - AggSpec aggSpec = - AggSpec.create( - aggregate, - project, - outputFields, - getRowType(), - fieldTypes, - getCluster(), - bucketNullable, - queryBucketSize, - extendedTypeMapping, - bucketNames, - builderAndParser); - // Now agg state and request is lazily built by AggSpec.build(). Agg operation in - // PushDownContext is a no_op marker + AggSpec aggSpec = AggSpec.create(extendedTypeMapping, bucketNames, builderAndParser); + // AggPushDownAction is lazily materialized by AggSpec.buildAction() and then this action + // will materialize agg request builder. + // The AGGREGATION pushdown operation in PushDownContext remains a no-op marker here. newScan.pushDownContext.setAggSpec(aggSpec); newScan.pushDownContext.add( PushDownType.AGGREGATION, aggregate, (OSRequestBuilderAction) requestBuilder -> {}); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggPushDownAction.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggPushDownAction.java index 9666ea317e5..35a6c1f26cf 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggPushDownAction.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggPushDownAction.java @@ -13,6 +13,7 @@ import java.util.List; import java.util.Map; import java.util.function.Consumer; +import java.util.stream.Collectors; import lombok.EqualsAndHashCode; import lombok.Getter; import org.apache.calcite.rel.RelFieldCollation; @@ -50,7 +51,6 @@ public class AggPushDownAction implements OSRequestBuilderAction { private Pair, OpenSearchAggregationResponseParser> builderAndParser; private final Map extendedTypeMapping; - private final long scriptCount; // Record the output field names of all buckets as the sequence of buckets private List bucketNames; @@ -60,39 +60,10 @@ public AggPushDownAction( List bucketNames) { this.builderAndParser = builderAndParser; this.extendedTypeMapping = extendedTypeMapping; - this.scriptCount = - builderAndParser.getLeft().stream().mapToInt(AggPushDownAction::getScriptCount).sum(); this.bucketNames = bucketNames; } - private static AggregatorFactories.Builder copySubAggregations(AggregationBuilder source) { - AggregatorFactories.Builder copiedFactories = new AggregatorFactories.Builder(); - source.getSubAggregations().forEach(copiedFactories::addAggregator); - source.getPipelineAggregations().forEach(copiedFactories::addPipelineAggregator); - return copiedFactories; - } - - private static AggregationBuilder unwrapNestedBuilder(AggregationBuilder rootBuilder) { - if (rootBuilder instanceof NestedAggregationBuilder nested - && !nested.getSubAggregations().isEmpty()) { - return nested.getSubAggregations().iterator().next(); - } - return rootBuilder; - } - - private void replaceRootBuilder( - AggregationBuilder originalRoot, AggregationBuilder newInnerBuilder) { - AggregationBuilder finalBuilder = newInnerBuilder; - if (originalRoot instanceof NestedAggregationBuilder nested) { - finalBuilder = - AggregationBuilders.nested(nested.getName(), nested.path()) - .subAggregation(newInnerBuilder); - } - builderAndParser = - Pair.of(Collections.singletonList(finalBuilder), builderAndParser.getRight()); - } - - private static int getScriptCount(AggregationBuilder aggBuilder) { + static int getScriptCount(AggregationBuilder aggBuilder) { if (aggBuilder instanceof NestedAggregationBuilder) { aggBuilder = aggBuilder.getSubAggregations().iterator().next(); } @@ -113,6 +84,25 @@ private static int getScriptCount(AggregationBuilder aggBuilder) { return 0; } + private static AggregatorFactories.Builder copySubAggregations(AggregationBuilder source) { + AggregatorFactories.Builder copiedFactories = new AggregatorFactories.Builder(); + source.getSubAggregations().forEach(copiedFactories::addAggregator); + source.getPipelineAggregations().forEach(copiedFactories::addPipelineAggregator); + return copiedFactories; + } + + private void replaceRootBuilder( + AggregationBuilder originalRoot, AggregationBuilder newInnerBuilder) { + AggregationBuilder finalBuilder = newInnerBuilder; + if (originalRoot instanceof NestedAggregationBuilder nested) { + finalBuilder = + AggregationBuilders.nested(nested.getName(), nested.path()) + .subAggregation(newInnerBuilder); + } + builderAndParser = + Pair.of(Collections.singletonList(finalBuilder), builderAndParser.getRight()); + } + @Override public void apply(OpenSearchRequestBuilder requestBuilder) { requestBuilder.pushDownAggregation(builderAndParser); @@ -136,8 +126,7 @@ private String multiTermsBucketNameAsString(CompositeAggregationBuilder composit return composite.sources().stream() .map(TermsValuesSourceBuilder.class::cast) .map(TermsValuesSourceBuilder::name) - .reduce((left, right) -> left + "|" + right) - .orElse(""); // PIPE cannot be used in identifier + .collect(Collectors.joining("|")); // PIPE cannot be used in identifier } /** Re-pushdown a sort aggregation measure to replace the pushed composite aggregation */ @@ -285,6 +274,7 @@ private TermsAggregationBuilder buildTermsAggregationBuilder( return termsBuilder; } + /** Build a {@link DateHistogramAggregationBuilder} by {@link DateHistogramValuesSourceBuilder} */ private static void copyDateHistogramInterval( DateHistogramValuesSourceBuilder source, Consumer fixedIntervalSetter, @@ -348,7 +338,6 @@ private static void copyHistogramBucketOptions( target.minDocCount(1); } - /** Build a {@link DateHistogramAggregationBuilder} by {@link DateHistogramValuesSourceBuilder} */ private DateHistogramAggregationBuilder buildDateHistogramAggregationBuilder( DateHistogramValuesSourceBuilder dateHisto, BucketOrder bucketOrder) { DateHistogramAggregationBuilder dateHistoBuilder = @@ -427,10 +416,63 @@ public void pushDownSortIntoAggBucket( // aggregationBuilder.getLeft() could be empty when count agg optimization works if (builderAndParser.getLeft().isEmpty()) return; AggregationBuilder original = builderAndParser.getLeft().getFirst(); - AggregationBuilder builder = unwrapNestedBuilder(original); + AggregationBuilder builder; + if (original instanceof NestedAggregationBuilder) { + builder = original.getSubAggregations().iterator().next(); + } else { + builder = original; + } + List selected = new ArrayList<>(collations.size()); if (builder instanceof CompositeAggregationBuilder compositeAggBuilder) { - pushDownSortIntoCompositeBucket(original, compositeAggBuilder, collations, fieldNames); - return; + // It will always use a single CompositeAggregationBuilder for the aggregation with GroupBy + // See {@link AggregateAnalyzer} + List> buckets = compositeAggBuilder.sources(); + List> newBuckets = new ArrayList<>(buckets.size()); + List newBucketNames = new ArrayList<>(buckets.size()); + // Have to put the collation required buckets first, then the rest of buckets. + collations.forEach( + collation -> { + /* + Must find the bucket by field name because: + 1. The sequence of buckets may have changed after sort push-down. + 2. The schema of scan operator may be inconsistent with the sequence of buckets + after project push-down. + */ + String bucketName = fieldNames.get(collation.getFieldIndex()); + CompositeValuesSourceBuilder bucket = buckets.get(bucketNames.indexOf(bucketName)); + RelFieldCollation.Direction direction = collation.getDirection(); + RelFieldCollation.NullDirection nullDirection = collation.nullDirection; + SortOrder order = + RelFieldCollation.Direction.DESCENDING.equals(direction) + ? SortOrder.DESC + : SortOrder.ASC; + if (bucket.missingBucket()) { + MissingOrder missingOrder = + switch (nullDirection) { + case FIRST -> MissingOrder.FIRST; + case LAST -> MissingOrder.LAST; + default -> MissingOrder.DEFAULT; + }; + bucket.missingOrder(missingOrder); + } + newBuckets.add(bucket.order(order)); + newBucketNames.add(bucketName); + selected.add(bucketName); + }); + buckets.stream() + .map(CompositeValuesSourceBuilder::name) + .filter(name -> !selected.contains(name)) + .forEach( + name -> { + newBuckets.add(buckets.get(bucketNames.indexOf(name))); + newBucketNames.add(name); + }); + AggregationBuilder finalBuilder = + AggregationBuilders.composite(compositeAggBuilder.getName(), newBuckets) + .subAggregations(copySubAggregations(compositeAggBuilder)) + .size(compositeAggBuilder.size()); + replaceRootBuilder(original, finalBuilder); + bucketNames = newBucketNames; } if (builder instanceof TermsAggregationBuilder termsAggBuilder) { termsAggBuilder.order(BucketOrder.key(!collations.getFirst().getDirection().isDescending())); @@ -438,136 +480,56 @@ public void pushDownSortIntoAggBucket( // TODO for MultiTermsAggregationBuilder } - private void pushDownSortIntoCompositeBucket( - AggregationBuilder original, - CompositeAggregationBuilder compositeAggBuilder, - List collations, - List fieldNames) { - // It will always use a single CompositeAggregationBuilder for the aggregation with GroupBy. - // See {@link AggregateAnalyzer} - List> buckets = compositeAggBuilder.sources(); - List> newBuckets = new ArrayList<>(buckets.size()); - List newBucketNames = new ArrayList<>(buckets.size()); - List selected = new ArrayList<>(collations.size()); - - // Have to put the collation required buckets first, then the rest of buckets. - collations.forEach( - collation -> { - /* - Must find the bucket by field name because: - 1. The sequence of buckets may have changed after sort push-down. - 2. The schema of scan operator may be inconsistent with the sequence of buckets - after project push-down. - */ - String bucketName = fieldNames.get(collation.getFieldIndex()); - CompositeValuesSourceBuilder bucket = buckets.get(bucketNames.indexOf(bucketName)); - RelFieldCollation.Direction direction = collation.getDirection(); - RelFieldCollation.NullDirection nullDirection = collation.nullDirection; - SortOrder order = - RelFieldCollation.Direction.DESCENDING.equals(direction) - ? SortOrder.DESC - : SortOrder.ASC; - if (bucket.missingBucket()) { - MissingOrder missingOrder = - switch (nullDirection) { - case FIRST -> MissingOrder.FIRST; - case LAST -> MissingOrder.LAST; - default -> MissingOrder.DEFAULT; - }; - bucket.missingOrder(missingOrder); - } - newBuckets.add(bucket.order(order)); - newBucketNames.add(bucketName); - selected.add(bucketName); - }); - - buckets.stream() - .map(CompositeValuesSourceBuilder::name) - .filter(name -> !selected.contains(name)) - .forEach( - name -> { - newBuckets.add(buckets.get(bucketNames.indexOf(name))); - newBucketNames.add(name); - }); - - AggregationBuilder finalBuilder = - AggregationBuilders.composite(compositeAggBuilder.getName(), newBuckets) - .subAggregations(copySubAggregations(compositeAggBuilder)) - .size(compositeAggBuilder.size()); - replaceRootBuilder(original, finalBuilder); - bucketNames = newBucketNames; - } - - private static Integer getBucketSize(AggregationBuilder builder) { - if (builder instanceof CompositeAggregationBuilder compositeAggBuilder) { - return compositeAggBuilder.size(); - } - if (builder instanceof TermsAggregationBuilder termsAggBuilder) { - return termsAggBuilder.size(); - } - if (builder instanceof MultiTermsAggregationBuilder multiTermsAggBuilder) { - return multiTermsAggBuilder.size(); - } - if (builder instanceof TopHitsAggregationBuilder topHitsAggBuilder) { - return topHitsAggBuilder.size(); + /** + * Check if the limit can be pushed down into aggregation bucket when the limit size is less than + * bucket number. + */ + public boolean pushDownLimitIntoBucketSize(Integer size) { + // aggregationBuilder.getLeft() could be empty when count agg optimization works + if (builderAndParser.getLeft().isEmpty()) return false; + AggregationBuilder builder = builderAndParser.getLeft().getFirst(); + if (builder instanceof NestedAggregationBuilder) { + builder = builder.getSubAggregations().iterator().next(); } - return null; - } - - private static void resizeBucketBuilder(AggregationBuilder builder, int size) { if (builder instanceof CompositeAggregationBuilder compositeAggBuilder) { - compositeAggBuilder.size(size); - return; + if (size < compositeAggBuilder.size()) { + compositeAggBuilder.size(size); + return true; + } else { + return false; + } } if (builder instanceof TermsAggregationBuilder termsAggBuilder) { - termsAggBuilder.size(size); - return; + if (size < termsAggBuilder.size()) { + termsAggBuilder.size(size); + return true; + } else { + return false; + } } if (builder instanceof MultiTermsAggregationBuilder multiTermsAggBuilder) { - multiTermsAggBuilder.size(size); - return; + if (size < multiTermsAggBuilder.size()) { + multiTermsAggBuilder.size(size); + return true; + } else { + return false; + } } if (builder instanceof TopHitsAggregationBuilder topHitsAggBuilder) { - topHitsAggBuilder.size(size); - return; - } - throw new IllegalStateException( - "Not a resizable bucket aggregation builder: " + builder.getClass().getSimpleName()); - } - - private AggregationBuilder resizeAggregationForLimit(AggregationBuilder builder, int size) { - Integer bucketSize = getBucketSize(builder); - if (bucketSize != null) { - if (size < bucketSize) { - resizeBucketBuilder(builder, size); - return builder; + if (size < topHitsAggBuilder.size()) { + topHitsAggBuilder.size(size); + return true; + } else { + return false; } - return null; } + // now we only have Composite, Terms and MultiTerms bucket aggregations, + // add code here when we could support more in the future. if (builder instanceof ValuesSourceAggregationBuilder.LeafOnly) { - // all metric aggregations generate one row and are effectively already limited. - return builder; + // Note: all metric aggregations will be treated as pushed since it generates only one row. + return true; } throw new OpenSearchRequestBuilder.PushDownUnSupportedException( "Unknown aggregation builder " + builder.getClass().getSimpleName()); } - - /** - * Check if the limit can be pushed down into aggregation bucket when the limit size is less than - * bucket number. - */ - public boolean pushDownLimitIntoBucketSize(Integer size) { - // aggregationBuilder.getLeft() could be empty when count agg optimization works - if (builderAndParser.getLeft().isEmpty()) return false; - AggregationBuilder original = builderAndParser.getLeft().getFirst(); - AggregationBuilder builder = unwrapNestedBuilder(original); - AggregationBuilder resizedBuilder = resizeAggregationForLimit(builder, size); - if (resizedBuilder == null) { - return false; - } - if (resizedBuilder != builder) { - replaceRootBuilder(original, resizedBuilder); - } - return true; - } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggSpec.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggSpec.java index 8bf47f44c89..7c0b0f15f8b 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggSpec.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggSpec.java @@ -12,11 +12,7 @@ import java.util.Map; import javax.annotation.Nullable; import lombok.Getter; -import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.rel.RelFieldCollation; -import org.apache.calcite.rel.core.Aggregate; -import org.apache.calcite.rel.core.Project; -import org.apache.calcite.rel.type.RelDataType; import org.apache.commons.lang3.tuple.Pair; import org.opensearch.search.aggregations.AggregationBuilder; import org.opensearch.search.aggregations.bucket.composite.CompositeAggregationBuilder; @@ -24,18 +20,18 @@ import org.opensearch.search.aggregations.bucket.composite.DateHistogramValuesSourceBuilder; import org.opensearch.search.aggregations.bucket.composite.HistogramValuesSourceBuilder; import org.opensearch.search.aggregations.bucket.composite.TermsValuesSourceBuilder; +import org.opensearch.search.aggregations.bucket.histogram.DateHistogramAggregationBuilder; +import org.opensearch.search.aggregations.bucket.histogram.HistogramAggregationBuilder; import org.opensearch.search.aggregations.bucket.nested.NestedAggregationBuilder; import org.opensearch.search.aggregations.bucket.terms.MultiTermsAggregationBuilder; import org.opensearch.search.aggregations.bucket.terms.TermsAggregationBuilder; import org.opensearch.search.aggregations.metrics.TopHitsAggregationBuilder; import org.opensearch.search.aggregations.support.ValuesSourceAggregationBuilder; -import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.opensearch.data.type.OpenSearchDataType; -import org.opensearch.sql.opensearch.request.AggregateAnalyzer; import org.opensearch.sql.opensearch.request.OpenSearchRequestBuilder; import org.opensearch.sql.opensearch.response.agg.OpenSearchAggregationResponseParser; -/** Immutable aggregation pushdown specification used during planning. */ +/** Immutable aggregation pushdown state and ordered replay plan. */ @Getter public final class AggSpec { private enum AggKind { @@ -56,89 +52,47 @@ private enum LimitPushdownMode { BUCKET_SIZE } - private final Aggregate aggregate; - @Nullable private final Project project; - private final List outputFields; - private final RelDataType rowType; - private final Map fieldTypes; - private final RelOptCluster cluster; - private final boolean bucketNullable; - private final int queryBucketSize; + private interface BuildAction extends AbstractAction { + @Override + default void pushOperation(PushDownContext context, PushDownOperation operation) { + throw new UnsupportedOperationException("Internal aggregation build action cannot be queued"); + } + } + + private final Pair, OpenSearchAggregationResponseParser> + baseBuilderAndParser; private final Map extendedTypeMapping; private final List initialBucketNames; - private final List bucketNames; + // Cost model uses the script count of the base logical aggregation. Supported rewrites keep the + // same scripted sources/metrics semantically, while replay-time builders are request-scoped and + // may not preserve a structure that can be re-counted accurately after rewrite. private final long scriptCount; private final AggKind kind; private final LimitPushdownMode limitPushdownMode; - @Nullable private final AggKind measureSortTarget; - private final boolean rareTopSupported; - @Nullable private final List bucketSortCollations; - @Nullable private final List bucketSortFieldNames; - @Nullable private final List measureSortCollations; - @Nullable private final List measureSortFieldNames; - @Nullable private final RareTopDigest rareTopDigest; + // The pushdown operation queue to rewrite base agg + private final List operationsForAgg; @Nullable private final Integer bucketSize; private AggSpec( - Aggregate aggregate, - @Nullable Project project, - List outputFields, - RelDataType rowType, - Map fieldTypes, - RelOptCluster cluster, - boolean bucketNullable, - int queryBucketSize, + Pair, OpenSearchAggregationResponseParser> baseBuilderAndParser, Map extendedTypeMapping, List initialBucketNames, - List bucketNames, long scriptCount, AggKind kind, LimitPushdownMode limitPushdownMode, - @Nullable AggKind measureSortTarget, - boolean rareTopSupported, - @Nullable List bucketSortCollations, - @Nullable List bucketSortFieldNames, - @Nullable List measureSortCollations, - @Nullable List measureSortFieldNames, - @Nullable RareTopDigest rareTopDigest, + List operationsForAgg, @Nullable Integer bucketSize) { - this.aggregate = aggregate; - this.project = project; - this.outputFields = List.copyOf(outputFields); - this.rowType = rowType; - this.fieldTypes = Map.copyOf(fieldTypes); - this.cluster = cluster; - this.bucketNullable = bucketNullable; - this.queryBucketSize = queryBucketSize; + this.baseBuilderAndParser = baseBuilderAndParser; this.extendedTypeMapping = Map.copyOf(extendedTypeMapping); this.initialBucketNames = List.copyOf(initialBucketNames); - this.bucketNames = List.copyOf(bucketNames); this.scriptCount = scriptCount; this.kind = kind; this.limitPushdownMode = limitPushdownMode; - this.measureSortTarget = measureSortTarget; - this.rareTopSupported = rareTopSupported; - this.bucketSortCollations = - bucketSortCollations == null ? null : List.copyOf(bucketSortCollations); - this.bucketSortFieldNames = - bucketSortFieldNames == null ? null : List.copyOf(bucketSortFieldNames); - this.measureSortCollations = - measureSortCollations == null ? null : List.copyOf(measureSortCollations); - this.measureSortFieldNames = - measureSortFieldNames == null ? null : List.copyOf(measureSortFieldNames); - this.rareTopDigest = rareTopDigest; + this.operationsForAgg = List.copyOf(operationsForAgg); this.bucketSize = bucketSize; } public static AggSpec create( - Aggregate aggregate, - @Nullable Project project, - List outputFields, - RelDataType rowType, - Map fieldTypes, - RelOptCluster cluster, - boolean bucketNullable, - int queryBucketSize, Map extendedTypeMapping, List bucketNames, Pair, OpenSearchAggregationResponseParser> builderAndParser) { @@ -146,27 +100,13 @@ public static AggSpec create( builderAndParser.getLeft().isEmpty() ? null : builderAndParser.getLeft().getFirst(); AggKind kind = inferKind(rootBuilder); return new AggSpec( - aggregate, - project, - outputFields, - rowType, - fieldTypes, - cluster, - bucketNullable, - queryBucketSize, + builderAndParser, extendedTypeMapping, bucketNames, - bucketNames, - new AggPushDownAction(builderAndParser, extendedTypeMapping, bucketNames).getScriptCount(), + builderAndParser.getLeft().stream().mapToInt(AggPushDownAction::getScriptCount).sum(), kind, - inferLimitPushdownMode(builderAndParser.getLeft(), rootBuilder), - inferMeasureSortTarget(rootBuilder), - isRareTopSupported(rootBuilder), - null, - null, - null, - null, - null, + inferBaseLimitPushdownMode(rootBuilder, kind), + List.of(), inferBucketSize(rootBuilder)); } @@ -174,10 +114,6 @@ public boolean isCompositeAggregation() { return kind == AggKind.COMPOSITE; } - public boolean isSingleRowAggregation() { - return aggregate.getGroupSet().isEmpty(); - } - public boolean canPushDownLimitIntoBucketSize(int size) { return switch (limitPushdownMode) { case BUCKET_SIZE -> bucketSize != null && size < bucketSize; @@ -194,141 +130,82 @@ public AggSpec withBucketSort(List collations, List f throw new OpenSearchRequestBuilder.PushDownUnSupportedException( "Cannot pushdown sort into aggregation bucket"); } - List newBucketNames = bucketNames; if (kind == AggKind.COMPOSITE) { - List reordered = new ArrayList<>(bucketNames.size()); - List selected = new ArrayList<>(collations.size()); for (RelFieldCollation collation : collations) { String bucketName = fieldNames.get(collation.getFieldIndex()); - if (!bucketNames.contains(bucketName)) { + if (!initialBucketNames.contains(bucketName)) { throw new OpenSearchRequestBuilder.PushDownUnSupportedException( "Cannot pushdown sort into aggregation bucket"); } - reordered.add(bucketName); - selected.add(bucketName); } - for (String name : bucketNames) { - if (!selected.contains(name)) { - reordered.add(name); - } - } - newBucketNames = reordered; } return new AggSpec( - aggregate, - project, - outputFields, - rowType, - fieldTypes, - cluster, - bucketNullable, - queryBucketSize, + baseBuilderAndParser, extendedTypeMapping, initialBucketNames, - newBucketNames, scriptCount, kind, limitPushdownMode, - measureSortTarget, - rareTopSupported, - collations, - fieldNames, - measureSortCollations, - measureSortFieldNames, - rareTopDigest, + replaceOperations( + PushDownType.SORT, + collations, + action -> action.pushDownSortIntoAggBucket(collations, fieldNames)), bucketSize); } public AggSpec withoutBucketSort() { - if (bucketSortCollations == null) { + if (operationsForAgg.stream().noneMatch(operation -> operation.type() == PushDownType.SORT)) { return this; } return new AggSpec( - aggregate, - project, - outputFields, - rowType, - fieldTypes, - cluster, - bucketNullable, - queryBucketSize, + baseBuilderAndParser, extendedTypeMapping, initialBucketNames, - initialBucketNames, scriptCount, kind, limitPushdownMode, - measureSortTarget, - rareTopSupported, - null, - null, - measureSortCollations, - measureSortFieldNames, - rareTopDigest, + removeOperations(PushDownType.SORT), bucketSize); } public AggSpec withSortMeasure(List collations, List fieldNames) { - if (kind != AggKind.COMPOSITE || measureSortTarget == null) { + AggKind rewriteTarget = inferMeasureSortTarget(); + if (rewriteTarget == null) { throw new OpenSearchRequestBuilder.PushDownUnSupportedException( "Cannot pushdown sort aggregate measure"); } Integer resizedBucketSize = - switch (measureSortTarget) { + switch (rewriteTarget) { case TERMS, MULTI_TERMS -> bucketSize; default -> null; }; return new AggSpec( - aggregate, - project, - outputFields, - rowType, - fieldTypes, - cluster, - bucketNullable, - queryBucketSize, + baseBuilderAndParser, extendedTypeMapping, initialBucketNames, - bucketNames, scriptCount, - measureSortTarget, - inferLimitPushdownMode(measureSortTarget), - null, - rareTopSupported, - bucketSortCollations, - bucketSortFieldNames, - collations, - fieldNames, - rareTopDigest, + rewriteTarget, + inferLimitPushdownMode(rewriteTarget), + replaceOperations( + PushDownType.SORT_AGG_METRICS, + collations, + action -> action.rePushDownSortAggMeasure(collations, fieldNames)), resizedBucketSize); } public AggSpec withRareTop(RareTopDigest digest) { - if (kind != AggKind.COMPOSITE || !rareTopSupported) { + if (!supportsCurrentRareTop()) { throw new OpenSearchRequestBuilder.PushDownUnSupportedException("Cannot pushdown " + digest); } return new AggSpec( - aggregate, - project, - outputFields, - rowType, - fieldTypes, - cluster, - bucketNullable, - queryBucketSize, + baseBuilderAndParser, extendedTypeMapping, initialBucketNames, - bucketNames, scriptCount, AggKind.RARE_TOP, - LimitPushdownMode.BUCKET_SIZE, - null, - rareTopSupported, - bucketSortCollations, - bucketSortFieldNames, - measureSortCollations, - measureSortFieldNames, - digest, + inferLimitPushdownMode(AggKind.RARE_TOP), + replaceOperations( + PushDownType.RARE_TOP, digest, action -> action.rePushDownRareTop(digest)), digest.byList().isEmpty() ? digest.number() : DEFAULT_MAX_BUCKETS); } @@ -346,57 +223,37 @@ public AggSpec withLimit(int size) { break; } return new AggSpec( - aggregate, - project, - outputFields, - rowType, - fieldTypes, - cluster, - bucketNullable, - queryBucketSize, + baseBuilderAndParser, extendedTypeMapping, initialBucketNames, - bucketNames, scriptCount, kind, limitPushdownMode, - measureSortTarget, - rareTopSupported, - bucketSortCollations, - bucketSortFieldNames, - measureSortCollations, - measureSortFieldNames, - rareTopDigest, + replaceOperations( + PushDownType.LIMIT, + new LimitDigest(size, 0), + action -> action.pushDownLimitIntoBucketSize(size)), size); } - public Pair, OpenSearchAggregationResponseParser> build() { - try { - AggregateAnalyzer.AggregateBuilderHelper helper = - new AggregateAnalyzer.AggregateBuilderHelper( - rowType, fieldTypes, cluster, bucketNullable, queryBucketSize); - Pair, OpenSearchAggregationResponseParser> builderAndParser = - AggregateAnalyzer.analyze(aggregate, project, outputFields, helper); - AggPushDownAction temp = - new AggPushDownAction( - builderAndParser, extendedTypeMapping, new ArrayList<>(initialBucketNames)); - if (bucketSortCollations != null) { - temp.pushDownSortIntoAggBucket(bucketSortCollations, bucketSortFieldNames); - } - if (measureSortCollations != null) { - temp.rePushDownSortAggMeasure(measureSortCollations, measureSortFieldNames); - } - if (rareTopDigest != null) { - temp.rePushDownRareTop(rareTopDigest); - } - if (bucketSize != null) { - temp.pushDownLimitIntoBucketSize(bucketSize); - } - return temp.getBuilderAndParser(); - } catch (AggregateAnalyzer.ExpressionNotAnalyzableException e) { - throw new OpenSearchRequestBuilder.PushDownUnSupportedException( - "Cannot materialize aggregation pushdown", e); - } + public AggPushDownAction buildAction() { + AggPushDownAction action = + new AggPushDownAction( + baseBuilderAndParser, extendedTypeMapping, new ArrayList<>(initialBucketNames)); + operationsForAgg.forEach(operation -> ((BuildAction) operation.action()).apply(action)); + return action; + } + + private List replaceOperations( + PushDownType type, Object digest, BuildAction action) { + List newOperations = removeOperations(type); + newOperations.add(new PushDownOperation(type, digest, action)); + return newOperations; + } + + private List removeOperations(PushDownType type) { + return new ArrayList<>( + operationsForAgg.stream().filter(operation -> operation.type() != type).toList()); } private static AggKind inferKind(@Nullable AggregationBuilder rootBuilder) { @@ -410,14 +267,10 @@ private static AggKind inferKind(@Nullable AggregationBuilder rootBuilder) { if (builder instanceof MultiTermsAggregationBuilder) { return AggKind.MULTI_TERMS; } - if (builder - instanceof - org.opensearch.search.aggregations.bucket.histogram.DateHistogramAggregationBuilder) { + if (builder instanceof DateHistogramAggregationBuilder) { return AggKind.DATE_HISTOGRAM; } - if (builder - instanceof - org.opensearch.search.aggregations.bucket.histogram.HistogramAggregationBuilder) { + if (builder instanceof HistogramAggregationBuilder) { return AggKind.HISTOGRAM; } if (builder instanceof TopHitsAggregationBuilder) { @@ -433,9 +286,9 @@ private static LimitPushdownMode inferLimitPushdownMode(AggKind kind) { }; } - private static LimitPushdownMode inferLimitPushdownMode( - List builders, @Nullable AggregationBuilder rootBuilder) { - if (builders.isEmpty()) { + private static LimitPushdownMode inferBaseLimitPushdownMode( + @Nullable AggregationBuilder rootBuilder, AggKind kind) { + if (rootBuilder == null) { // count() optimization uses hits.total and leaves the builder list empty. Keeps // LIMIT in PushDownContext for these cases even though no request-side limit is applied. return LimitPushdownMode.ESTIMATE_ONLY; @@ -445,10 +298,10 @@ private static LimitPushdownMode inferLimitPushdownMode( // Treats leaf metric aggregations as limit-pushable because they produce a single row. return LimitPushdownMode.LEAF_METRIC; } - return inferLimitPushdownMode(inferKind(rootBuilder)); + return inferLimitPushdownMode(kind); } - private static boolean isRareTopSupported(@Nullable AggregationBuilder rootBuilder) { + private static boolean supportsBaseRareTop(@Nullable AggregationBuilder rootBuilder) { AggregationBuilder builder = unwrapNestedBuilder(rootBuilder); if (!(builder instanceof CompositeAggregationBuilder composite)) { return false; @@ -462,7 +315,12 @@ private static boolean isRareTopSupported(@Nullable AggregationBuilder rootBuild } @Nullable - private static AggKind inferMeasureSortTarget(@Nullable AggregationBuilder rootBuilder) { + private AggKind inferMeasureSortTarget() { + if (kind != AggKind.COMPOSITE) { + return null; + } + AggregationBuilder rootBuilder = + baseBuilderAndParser.getLeft().isEmpty() ? null : baseBuilderAndParser.getLeft().getFirst(); AggregationBuilder builder = unwrapNestedBuilder(rootBuilder); if (!(builder instanceof CompositeAggregationBuilder composite)) { return null; @@ -491,6 +349,14 @@ private static AggKind inferMeasureSortTarget(@Nullable AggregationBuilder rootB : null; } + private boolean supportsCurrentRareTop() { + return kind == AggKind.COMPOSITE + && supportsBaseRareTop( + baseBuilderAndParser.getLeft().isEmpty() + ? null + : baseBuilderAndParser.getLeft().getFirst()); + } + @Nullable private static Integer inferBucketSize(@Nullable AggregationBuilder rootBuilder) { AggregationBuilder builder = unwrapNestedBuilder(rootBuilder); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java index c887c9d075d..a622f948efb 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java @@ -29,7 +29,6 @@ public class PushDownContext extends AbstractCollection { private boolean isAggregatePushed = false; @Setter private AggSpec aggSpec; - private ArrayDeque operationsForAgg; // Records the start pos of the query, which is updated by new added limit operations. private int startFrom = 0; @@ -211,8 +210,7 @@ public OpenSearchRequestBuilder createRequestBuilder() { operation -> ((OSRequestBuilderAction) operation.action()).apply(newRequestBuilder)); } if (aggSpec != null) { - newRequestBuilder.pushDownAggregation(aggSpec.build()); - newRequestBuilder.pushTypeMapping(aggSpec.getExtendedTypeMapping()); + aggSpec.buildAction().apply(newRequestBuilder); } return newRequestBuilder; }