diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java b/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java index 8dfe963081c..52f2bdde6f4 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java @@ -223,7 +223,7 @@ public static ExprType convertSqlTypeNameToExprType(SqlTypeName sqlTypeName) { case BIGINT -> LONG; case FLOAT, REAL -> FLOAT; case DOUBLE, DECIMAL -> DOUBLE; // TODO the decimal is only used for literal - case CHAR, VARCHAR, MULTISET -> STRING; // call toString() for MULTISET + case CHAR, VARCHAR -> STRING; case BOOLEAN -> BOOLEAN; case DATE -> DATE; case TIME, TIME_TZ, TIME_WITH_LOCAL_TIME_ZONE -> TIME; @@ -242,7 +242,7 @@ public static ExprType convertSqlTypeNameToExprType(SqlTypeName sqlTypeName) { INTERVAL_MINUTE_SECOND, INTERVAL_SECOND -> INTERVAL; - case ARRAY -> ARRAY; + case ARRAY, MULTISET -> ARRAY; case MAP -> STRUCT; case GEOMETRY -> GEO_POINT; case NULL, ANY, OTHER -> UNDEFINED; diff --git a/docs/user/ppl/cmd/mvcombine.md b/docs/user/ppl/cmd/mvcombine.md index 4ccad724ca7..fc3130371f5 100644 --- a/docs/user/ppl/cmd/mvcombine.md +++ b/docs/user/ppl/cmd/mvcombine.md @@ -27,6 +27,26 @@ mvcombine --- +## Configuration + +The mvcombine command leverages OpenSearch's top hits aggregation pushdown, which requires increasing the index setting `index.max_inner_result_window` to `plugins.query.buckets` (default: 10000) or larger. + +Change the `index.max_inner_result_window` to `10000`: + +```bash ppl +curl -sS -H 'Content-Type: application/json' \ +-X PUT localhost:9200/mvcombine_data/_settings \ +-d '{"index" : {"max_inner_result_window" : "10000"}}' +``` + +```text +{ + "acknowledged": true +} +``` + +--- + ## Example 1: Basic mvcombine Given the following input rows: diff --git a/doctest/markdown_parser.py b/doctest/markdown_parser.py index 00ea5441d49..d899a7cc37b 100644 --- a/doctest/markdown_parser.py +++ b/doctest/markdown_parser.py @@ -34,7 +34,7 @@ class MarkdownDocTestParser: # Regex to match Markdown code fences with optional attributes CODE_FENCE_PATTERN = re.compile( - r'^```(\w+)([^\n]*?)\s*\n' # ```language [attributes] (no newlines in attributes) + r'^```([\w][\w ]*\w|\w+)([^\n]*?)\s*\n' # ```language [attributes] (language may contain spaces, e.g. "bash ppl") r'(.*?)' # code content (non-greedy) r'^```\s*$', # closing ``` re.MULTILINE | re.DOTALL diff --git a/doctest/test_docs.py b/doctest/test_docs.py index 6283252065f..0744d48d7c3 100644 --- a/doctest/test_docs.py +++ b/doctest/test_docs.py @@ -472,9 +472,9 @@ def create_markdown_suite(filepaths, category_name, setup_func): transform=transform, ) - # Prepare globs for bash commands + # Prepare globs for bash commands (needed when input_langs includes "bash ppl") test_globs = {} - if "bash" in category_name: + if "bash" in category_name or "bash ppl" in input_langs: test_globs = { "sh": partial( subprocess.run, diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMvCombineCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMvCombineCommandIT.java index 1cf535bd71b..4a5c227167a 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMvCombineCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMvCombineCommandIT.java @@ -17,6 +17,7 @@ import java.util.List; import org.json.JSONArray; import org.json.JSONObject; +import org.junit.After; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import org.opensearch.client.ResponseException; @@ -31,6 +32,12 @@ public void init() throws Exception { super.init(); enableCalcite(); loadIndex(Index.MVCOMBINE); + updateIndexSettings(INDEX, "{ \"index\": { \"max_inner_result_window\":" + 10000 + " } }"); + } + + @After + public void afterTest() throws IOException { + updateIndexSettings(INDEX, "{ \"index\": { \"max_inner_result_window\":" + 100 + " } }"); } // --------------------------- diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLGraphLookupIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLGraphLookupIT.java index fbaefdb8c3c..ce7530dec15 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLGraphLookupIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLGraphLookupIT.java @@ -29,7 +29,7 @@ * * * diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLMapPathIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLMapPathIT.java index def57bb11d3..c7c5dd4fdcb 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLMapPathIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLMapPathIT.java @@ -150,22 +150,29 @@ public void testAddtotalsOnMapPath() throws IOException { @Test public void testMvcombineOnMapPath() throws IOException { - JSONObject result = - ppl( - """ - source=%s | spath input=doc - | mvcombine doc.user.name - | fields doc.user.name, doc.user.city\ - """, - TEST_INDEX); - verifySchema(result, schema("doc.user.name", "array"), schema("doc.user.city", "string")); - verifyDataRows( - result, - rows(new JSONArray("[\"John\"]"), "NYC"), - rows(new JSONArray("[\"Alice\"]"), "LA"), - rows(new JSONArray("[\"John\"]"), "SF"), - rows(new JSONArray("[\"Bob\"]"), "NYC"), - rows(null, null)); + try { + updateIndexSettings( + TEST_INDEX, "{ \"index\": { \"max_inner_result_window\":" + 10000 + " } }"); + JSONObject result = + ppl( + """ + source=%s | spath input=doc + | mvcombine doc.user.name + | fields doc.user.name, doc.user.city\ + """, + TEST_INDEX); + verifySchema(result, schema("doc.user.name", "array"), schema("doc.user.city", "string")); + verifyDataRows( + result, + rows(new JSONArray("[\"John\"]"), "NYC"), + rows(new JSONArray("[\"Alice\"]"), "LA"), + rows(new JSONArray("[\"John\"]"), "SF"), + rows(new JSONArray("[\"Bob\"]"), "NYC"), + rows(null, null)); + + } finally { + updateIndexSettings(TEST_INDEX, "{ \"index\": { \"max_inner_result_window\":" + 100 + " } }"); + } } @Test diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java index 099b2f7e0cb..559029add7d 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java @@ -430,6 +430,11 @@ public String toString() { protected static JSONObject updateIndexSettings(String indexName, String setting) throws IOException { + return updateIndexSettings(indexName, setting, client()); + } + + protected static JSONObject updateIndexSettings( + String indexName, String setting, RestClient client) throws IOException { Request request = new Request("PUT", "/" + indexName + "/_settings"); if (!isNullOrEmpty(setting)) { request.setJsonEntity(setting); @@ -437,7 +442,7 @@ protected static JSONObject updateIndexSettings(String indexName, String setting RequestOptions.Builder restOptionsBuilder = RequestOptions.DEFAULT.toBuilder(); restOptionsBuilder.addHeader("Content-Type", "application/json"); request.setOptions(restOptionsBuilder); - return new JSONObject(executeRequest(request)); + return new JSONObject(executeRequest(request, client)); } protected String makeRequest(String query) { diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/NewAddedCommandsIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/NewAddedCommandsIT.java index a5164ec7fef..9887082d780 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/NewAddedCommandsIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/NewAddedCommandsIT.java @@ -246,12 +246,17 @@ private void verifyQuery(JSONObject result) throws IOException { public void testMvCombineUnsupportedInV2() throws IOException { JSONObject result; try { + updateIndexSettings( + TEST_INDEX_BANK, "{ \"index\": { \"max_inner_result_window\":" + 10000 + " } }"); result = executeQuery( String.format( "source=%s | fields state, city, age | mvcombine age", TEST_INDEX_BANK)); } catch (ResponseException e) { result = new JSONObject(TestUtils.getResponseBody(e.getResponse())); + } finally { + updateIndexSettings( + TEST_INDEX_BANK, "{ \"index\": { \"max_inner_result_window\":" + 100 + " } }"); } verifyQuery(result); } diff --git a/integ-test/src/test/java/org/opensearch/sql/security/CalciteCrossClusterSearchIT.java b/integ-test/src/test/java/org/opensearch/sql/security/CalciteCrossClusterSearchIT.java index 13dbdce4bce..bf834933f0b 100644 --- a/integ-test/src/test/java/org/opensearch/sql/security/CalciteCrossClusterSearchIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/security/CalciteCrossClusterSearchIT.java @@ -5,6 +5,7 @@ package org.opensearch.sql.security; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK; import static org.opensearch.sql.util.MatcherUtils.columnName; import static org.opensearch.sql.util.MatcherUtils.rows; import static org.opensearch.sql.util.MatcherUtils.schema; @@ -389,20 +390,30 @@ public void testCrossClusterAppend() throws IOException { /** CrossClusterSearchIT Test for mvcombine. */ @Test public void testCrossClusterMvcombine() throws IOException { - - JSONObject result = - executeQuery( - String.format( - "search source=%s | where firstname='Hattie' or firstname='Nanette' " - + "| fields firstname, age | mvcombine age", - TEST_INDEX_BANK_REMOTE)); - - verifyColumn(result, columnName("firstname"), columnName("age")); - - verifyDataRows( - result, - rows("Hattie", new org.json.JSONArray().put(36)), - rows("Nanette", new org.json.JSONArray().put(28))); + try { + updateIndexSettings( + TEST_INDEX_BANK, + "{ \"index\": { \"max_inner_result_window\":" + 10000 + " } }", + remoteClient()); + JSONObject result = + executeQuery( + String.format( + "search source=%s | where firstname='Hattie' or firstname='Nanette' " + + "| fields firstname, age | mvcombine age", + TEST_INDEX_BANK_REMOTE)); + + verifyColumn(result, columnName("firstname"), columnName("age")); + + verifyDataRows( + result, + rows("Hattie", new org.json.JSONArray().put(36)), + rows("Nanette", new org.json.JSONArray().put(28))); + } finally { + updateIndexSettings( + TEST_INDEX_BANK, + "{ \"index\": { \"max_inner_result_window\":" + 100 + " } }", + remoteClient()); + } } /** CrossClusterSearchIT Test for fieldformat. */ diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_mvcombine.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_mvcombine.yaml index ff54f066772..a3d6b8faf90 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_mvcombine.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_mvcombine.yaml @@ -6,7 +6,4 @@ calcite: LogicalProject(state=[$7], city=[$5], age=[$8], $f3=[IS NOT NULL($8)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - EnumerableLimit(fetch=[10000]) - EnumerableAggregate(group=[{0, 1}], age=[ARRAY_AGG($2) FILTER $3]) - EnumerableCalc(expr#0..2=[{inputs}], expr#3=[IS NOT NULL($t2)], proj#0..3=[{exprs}]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[state, city, age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["state","city","age"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},age=ARRAY_AGG($2) FILTER $3), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}},{"city":{"terms":{"field":"city.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"age":{"filter":{"exists":{"field":"age","boost":1.0}},"aggregations":{"age":{"top_hits":{"from":0,"size":1000,"version":false,"seq_no_primary_term":false,"explain":false,"fields":[{"field":"age"}]}}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_patterns_simple_pattern_agg_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_patterns_simple_pattern_agg_push.yaml index 80677913ecb..3ed2eb3310f 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_patterns_simple_pattern_agg_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_patterns_simple_pattern_agg_push.yaml @@ -7,4 +7,4 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | EnumerableCalc(expr#0..2=[{inputs}], expr#3=[PATTERN_PARSER($t0, $t2)], expr#4=['pattern'], expr#5=[ITEM($t3, $t4)], expr#6=[SAFE_CAST($t5)], expr#7=['tokens'], expr#8=[ITEM($t3, $t7)], expr#9=[SAFE_CAST($t8)], patterns_field=[$t6], pattern_count=[$t1], tokens=[$t9], sample_logs=[$t2]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},pattern_count=COUNT($1),sample_logs=TAKE($0, $2)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"patterns_field":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQH2nsKICAib3AiOiB7CiAgICAibmFtZSI6ICJDQVNFIiwKICAgICJraW5kIjogIkNBU0UiLAogICAgInN5bnRheCI6ICJTUEVDSUFMIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiT1IiLAogICAgICAgICJraW5kIjogIk9SIiwKICAgICAgICAic3ludGF4IjogIkJJTkFSWSIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJvcCI6IHsKICAgICAgICAgICAgIm5hbWUiOiAiSVMgTlVMTCIsCiAgICAgICAgICAgICJraW5kIjogIklTX05VTEwiLAogICAgICAgICAgICAic3ludGF4IjogIlBPU1RGSVgiCiAgICAgICAgICB9LAogICAgICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgICAgICB7CiAgICAgICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICAgIF0KICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJvcCI6IHsKICAgICAgICAgICAgIm5hbWUiOiAiPSIsCiAgICAgICAgICAgICJraW5kIjogIkVRVUFMUyIsCiAgICAgICAgICAgICJzeW50YXgiOiAiQklOQVJZIgogICAgICAgICAgfSwKICAgICAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICAgICAgewogICAgICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgICAgICAgICAgfQogICAgICAgICAgICB9LAogICAgICAgICAgICB7CiAgICAgICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDIsCiAgICAgICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICAgIF0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAzLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9LAogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiUkVHRVhQX1JFUExBQ0UiLAogICAgICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICAgICAic3ludGF4IjogIkZVTkNUSU9OIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDQsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDUsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDYsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0KICAgICAgXQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,0,2,2,0,2,2],"DIGESTS":["email.keyword","email.keyword","","","email.keyword","[a-zA-Z0-9]+","<*>"]}},"missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"pattern_count":{"value_count":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQH2nsKICAib3AiOiB7CiAgICAibmFtZSI6ICJDQVNFIiwKICAgICJraW5kIjogIkNBU0UiLAogICAgInN5bnRheCI6ICJTUEVDSUFMIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiT1IiLAogICAgICAgICJraW5kIjogIk9SIiwKICAgICAgICAic3ludGF4IjogIkJJTkFSWSIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJvcCI6IHsKICAgICAgICAgICAgIm5hbWUiOiAiSVMgTlVMTCIsCiAgICAgICAgICAgICJraW5kIjogIklTX05VTEwiLAogICAgICAgICAgICAic3ludGF4IjogIlBPU1RGSVgiCiAgICAgICAgICB9LAogICAgICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgICAgICB7CiAgICAgICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICAgIF0KICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJvcCI6IHsKICAgICAgICAgICAgIm5hbWUiOiAiPSIsCiAgICAgICAgICAgICJraW5kIjogIkVRVUFMUyIsCiAgICAgICAgICAgICJzeW50YXgiOiAiQklOQVJZIgogICAgICAgICAgfSwKICAgICAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICAgICAgewogICAgICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgICAgICAgICAgfQogICAgICAgICAgICB9LAogICAgICAgICAgICB7CiAgICAgICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDIsCiAgICAgICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICAgIF0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAzLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9LAogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiUkVHRVhQX1JFUExBQ0UiLAogICAgICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICAgICAic3ludGF4IjogIkZVTkNUSU9OIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDQsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDUsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDYsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0KICAgICAgXQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,0,2,2,0,2,2],"DIGESTS":["email.keyword","email.keyword","","","email.keyword","[a-zA-Z0-9]+","<*>"]}}}},"sample_logs":{"top_hits":{"from":0,"size":10,"version":false,"seq_no_primary_term":false,"explain":false,"_source":false,"fields":[{"field":"email"}]}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},pattern_count=COUNT($1),sample_logs=TAKE($0, $2)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"patterns_field":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQH2nsKICAib3AiOiB7CiAgICAibmFtZSI6ICJDQVNFIiwKICAgICJraW5kIjogIkNBU0UiLAogICAgInN5bnRheCI6ICJTUEVDSUFMIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiT1IiLAogICAgICAgICJraW5kIjogIk9SIiwKICAgICAgICAic3ludGF4IjogIkJJTkFSWSIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJvcCI6IHsKICAgICAgICAgICAgIm5hbWUiOiAiSVMgTlVMTCIsCiAgICAgICAgICAgICJraW5kIjogIklTX05VTEwiLAogICAgICAgICAgICAic3ludGF4IjogIlBPU1RGSVgiCiAgICAgICAgICB9LAogICAgICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgICAgICB7CiAgICAgICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICAgIF0KICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJvcCI6IHsKICAgICAgICAgICAgIm5hbWUiOiAiPSIsCiAgICAgICAgICAgICJraW5kIjogIkVRVUFMUyIsCiAgICAgICAgICAgICJzeW50YXgiOiAiQklOQVJZIgogICAgICAgICAgfSwKICAgICAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICAgICAgewogICAgICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgICAgICAgICAgfQogICAgICAgICAgICB9LAogICAgICAgICAgICB7CiAgICAgICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDIsCiAgICAgICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICAgIF0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAzLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9LAogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiUkVHRVhQX1JFUExBQ0UiLAogICAgICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICAgICAic3ludGF4IjogIkZVTkNUSU9OIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDQsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDUsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDYsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0KICAgICAgXQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,0,2,2,0,2,2],"DIGESTS":["email.keyword","email.keyword","","","email.keyword","[a-zA-Z0-9]+","<*>"]}},"missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"pattern_count":{"value_count":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQH2nsKICAib3AiOiB7CiAgICAibmFtZSI6ICJDQVNFIiwKICAgICJraW5kIjogIkNBU0UiLAogICAgInN5bnRheCI6ICJTUEVDSUFMIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiT1IiLAogICAgICAgICJraW5kIjogIk9SIiwKICAgICAgICAic3ludGF4IjogIkJJTkFSWSIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJvcCI6IHsKICAgICAgICAgICAgIm5hbWUiOiAiSVMgTlVMTCIsCiAgICAgICAgICAgICJraW5kIjogIklTX05VTEwiLAogICAgICAgICAgICAic3ludGF4IjogIlBPU1RGSVgiCiAgICAgICAgICB9LAogICAgICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgICAgICB7CiAgICAgICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICAgIF0KICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJvcCI6IHsKICAgICAgICAgICAgIm5hbWUiOiAiPSIsCiAgICAgICAgICAgICJraW5kIjogIkVRVUFMUyIsCiAgICAgICAgICAgICJzeW50YXgiOiAiQklOQVJZIgogICAgICAgICAgfSwKICAgICAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICAgICAgewogICAgICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgICAgICAgICAgfQogICAgICAgICAgICB9LAogICAgICAgICAgICB7CiAgICAgICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDIsCiAgICAgICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICAgIF0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAzLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9LAogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiUkVHRVhQX1JFUExBQ0UiLAogICAgICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICAgICAic3ludGF4IjogIkZVTkNUSU9OIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDQsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDUsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDYsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0KICAgICAgXQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,0,2,2,0,2,2],"DIGESTS":["email.keyword","email.keyword","","","email.keyword","[a-zA-Z0-9]+","<*>"]}}}},"sample_logs":{"top_hits":{"from":0,"size":10,"version":false,"seq_no_primary_term":false,"explain":false,"_source":false,"fields":[{"field":"email"}]}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_take.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_take.yaml index 0bf56db2ed2..a50ae53a6f5 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_take.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_take.yaml @@ -5,4 +5,4 @@ calcite: LogicalProject(firstname=[$1], $f1=[2]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={},take=TAKE($0, $1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"take":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"_source":false,"fields":[{"field":"firstname"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={},take=TAKE($0, $1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"take":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"_source":false,"fields":[{"field":"firstname"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java index 7d8cb8826cd..6e3cc863c16 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java @@ -603,6 +603,12 @@ yield switch (functionName) { aggCall, args, aggName, helper, dedupNumber, false, false, null, null); yield Pair.of(topHitsAggregationBuilder, new TopHitsParser(aggName, false, false)); } + case COLLECT, ARRAY_AGG -> { + TopHitsAggregationBuilder topHitsBuilder = + createTopHitsBuilder( + aggCall, args, aggName, helper, helper.queryBucketSize, false, false, null, null); + yield Pair.of(topHitsBuilder, new TopHitsParser(aggName, false, true)); + } default -> throw new AggregateAnalyzer.AggregateAnalyzerException( String.format("unsupported aggregator %s", aggCall.getAggregation()));