From 6eb5ac5a8d8b1c56b0a4ecf5c12f54073d83cf06 Mon Sep 17 00:00:00 2001 From: opensearchpplteam Date: Fri, 27 Feb 2026 01:43:22 +0000 Subject: [PATCH 1/5] Fix #5150: Fix dedup aggregation pushdown nullifying renamed fields When the DedupPushdownRule converts a dedup to an aggregation-based top_hits query, fields that were renamed (via rename or eval) would return null values. This happened because the TopHitsParser returned results using original OpenSearch field names, but the output schema expected the renamed names. Added a field name mapping to TopHitsParser so it can translate original OS field names to their renamed output names in the LITERAL_AGG (dedup) aggregation response path. Signed-off-by: opensearchpplteam --- .../rest-api-spec/test/issues/5150.yml | 74 +++++++++++++++++++ .../opensearch/request/AggregateAnalyzer.java | 17 ++++- .../response/agg/TopHitsParser.java | 35 ++++++++- ...enSearchAggregationResponseParserTest.java | 51 +++++++++++++ 4 files changed, 175 insertions(+), 2 deletions(-) create mode 100644 integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5150.yml diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5150.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5150.yml new file mode 100644 index 00000000000..1db0c7753e4 --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5150.yml @@ -0,0 +1,74 @@ +setup: + - do: + indices.create: + index: test_5150 + - do: + query.settings: + body: + transient: + plugins.calcite.enabled : true + + - do: + bulk: + index: test_5150 + refresh: true + body: + - '{"index": {}}' + - '{"category":"X","value":100}' + - '{"index": {}}' + - '{"category":"X","value":200}' + - '{"index": {}}' + - '{"category":"Y","value":300}' + - '{"index": {}}' + - '{"category":"Y","value":400}' + +--- +teardown: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled : false + +--- +"5150: Rename non-dedup field then dedup retains renamed values": + - skip: + features: + - headers + - allowed_warnings + - do: + allowed_warnings: + - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled' + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_5150 | rename value as val | dedup category | sort category | fields category, val + + - match: { total: 2 } + - match: { schema: [{"name": "category", "type": "string"}, {"name": "val", "type": "bigint"}] } + - length: { datarows: 2 } + # Each row should have non-null val + - is_true: datarows.0.1 + - is_true: datarows.1.1 + +--- +"5150: Eval new field then dedup on different field retains eval values": + - skip: + features: + - headers + - allowed_warnings + - do: + allowed_warnings: + - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled' + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_5150 | eval doubled = value * 2 | dedup category | sort category | fields category, value, doubled + + - match: { total: 2 } + - length: { datarows: 2 } + # Each row should have non-null doubled + - is_true: datarows.0.2 + - is_true: datarows.1.2 diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java index 7d8cb8826cd..2e98787f3d5 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java @@ -38,6 +38,7 @@ import com.google.common.collect.ImmutableList; import java.util.ArrayList; import java.util.Collections; +import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -601,7 +602,21 @@ yield switch (functionName) { TopHitsAggregationBuilder topHitsAggregationBuilder = createTopHitsBuilder( aggCall, args, aggName, helper, dedupNumber, false, false, null, null); - yield Pair.of(topHitsAggregationBuilder, new TopHitsParser(aggName, false, false)); + // Build field name mapping for renamed fields (e.g., rename value as val). + // The top_hits response uses original OS field names, but the output schema expects + // the renamed names from the project. + Map fieldNameMapping = new HashMap<>(); + for (Pair arg : args) { + if (arg.getKey() instanceof RexInputRef) { + String originalName = helper.inferNamedField(arg.getKey()).getRootName(); + String outputName = arg.getValue(); + if (!originalName.equals(outputName)) { + fieldNameMapping.put(originalName, outputName); + } + } + } + yield Pair.of( + topHitsAggregationBuilder, new TopHitsParser(aggName, false, false, fieldNameMapping)); } default -> throw new AggregateAnalyzer.AggregateAnalyzerException( diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/TopHitsParser.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/TopHitsParser.java index f9c3d5bb5d2..f4debc1dc3d 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/TopHitsParser.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/TopHitsParser.java @@ -27,10 +27,27 @@ public class TopHitsParser implements MetricParser { private final boolean returnSingleValue; private final boolean returnMergeValue; + /** + * Mapping from original OpenSearch field names to output field names (e.g., renamed via {@code + * rename} command). When a field is renamed (e.g., {@code rename value as val}), the top_hits + * response still contains the original field name ({@code value}), but the output schema expects + * the renamed name ({@code val}). This mapping enables the translation. + */ + private final Map fieldNameMapping; + public TopHitsParser(String name, boolean returnSingleValue, boolean returnMergeValue) { + this(name, returnSingleValue, returnMergeValue, Collections.emptyMap()); + } + + public TopHitsParser( + String name, + boolean returnSingleValue, + boolean returnMergeValue, + Map fieldNameMapping) { this.name = name; this.returnSingleValue = returnSingleValue; this.returnMergeValue = returnMergeValue; + this.fieldNameMapping = fieldNameMapping; } @Override @@ -129,12 +146,28 @@ public List> parse(Aggregation agg) { ? new LinkedHashMap<>() : new LinkedHashMap<>(hit.getSourceAsMap()); hit.getFields().values().forEach(f -> map.put(f.getName(), f.getValue())); - return map; + return applyFieldNameMapping(map); }) .toList(); } } + /** + * Apply field name mapping to translate original OpenSearch field names to output field names. + * Fields not present in the mapping are kept as-is. + */ + private Map applyFieldNameMapping(Map map) { + if (fieldNameMapping.isEmpty()) { + return map; + } + Map result = new LinkedHashMap<>(); + for (Map.Entry entry : map.entrySet()) { + String mappedName = fieldNameMapping.getOrDefault(entry.getKey(), entry.getKey()); + result.put(mappedName, entry.getValue()); + } + return result; + } + private boolean isEmptyHits(SearchHit[] hits) { return isFieldsEmpty(hits) && isSourceEmpty(hits); } diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/response/OpenSearchAggregationResponseParserTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/response/OpenSearchAggregationResponseParserTest.java index 7ba64eaa475..25516da9804 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/response/OpenSearchAggregationResponseParserTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/response/OpenSearchAggregationResponseParserTest.java @@ -570,6 +570,57 @@ void two_bucket_percentiles_should_pass() { ImmutableMap.of("percentiles", List.of(21.0, 27.0, 30.0, 35.0, 55.0, 58.0, 60.0)))); } + /** + * Test for issue #5150: dedup aggregation pushdown with renamed fields. When a field is renamed + * (e.g., rename value as val), the top_hits response uses original field names. The TopHitsParser + * with fieldNameMapping should translate them to the renamed names. + */ + @Test + void dedup_top_hits_with_field_name_mapping_should_remap_fields() { + String response = + "{\n" + + " \"composite#composite_buckets\": {\n" + + " \"buckets\": [\n" + + " {\n" + + " \"key\": {\n" + + " \"category\": \"X\"\n" + + " },\n" + + " \"doc_count\": 2,\n" + + " \"top_hits#dedup\": {\n" + + " \"hits\": {\n" + + " \"total\": { \"value\": 1, \"relation\": \"eq\" },\n" + + " \"max_score\": 1.0,\n" + + " \"hits\": [\n" + + " {\n" + + " \"_index\": \"test\",\n" + + " \"_id\": \"1\",\n" + + " \"_score\": 1.0,\n" + + " \"fields\": {\n" + + " \"category\": [\"X\"],\n" + + " \"value\": [100]\n" + + " }\n" + + " }\n" + + " ]\n" + + " }\n" + + " }\n" + + " }\n" + + " ]\n" + + " }\n" + + "}"; + // "value" is renamed to "val" — the mapping should translate it in the response. + // Use BucketAggregationParser as used by the dedup aggregation pushdown path. + OpenSearchAggregationResponseParser parser = + new BucketAggregationParser( + List.of(new TopHitsParser("dedup", false, false, Map.of("value", "val"))), List.of()); + List> result = parse(parser, response); + assertEquals(1, result.size()); + Map row = result.get(0); + // The renamed field "val" should be present, not the original "value" + assertEquals(100, row.get("val")); + assertNull(row.get("value")); + assertEquals("X", row.get("category")); + } + public List> parse(OpenSearchAggregationResponseParser parser, String json) { return parser.parse(fromJson(json)); } From d82a0a1fc329ed66b46e019044b17db3c9636993 Mon Sep 17 00:00:00 2001 From: opensearchpplteam Date: Fri, 27 Feb 2026 03:24:36 +0000 Subject: [PATCH 2/5] Trigger CI rerun for flaky Java 25 integration test The testNoMvBasic and testNoMvWithEval failures in CalciteExplainIT are unrelated to this PR's dedup pushdown changes and only occur on Java 25. Re-triggering CI. Signed-off-by: opensearchpplteam From e9a24ad8b00926c96795a51985a4388ec233b730 Mon Sep 17 00:00:00 2001 From: opensearchpplteam Date: Fri, 27 Feb 2026 04:52:10 +0000 Subject: [PATCH 3/5] Address review feedback for #5192: document field mapping limitations - Add warning log when HashMap collision is detected in AggregateAnalyzer field name mapping (when same original field maps to multiple output names) - Add clarifying comments in TopHitsParser explaining why field name mapping is only needed in the multi-row return path Signed-off-by: opensearchpplteam --- .../opensearch/request/AggregateAnalyzer.java | 17 ++++++++++++++++- .../opensearch/response/agg/TopHitsParser.java | 3 +++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java index 2e98787f3d5..f85bd134d35 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java @@ -60,6 +60,8 @@ import org.apache.calcite.rex.RexNode; import org.apache.calcite.sql.SqlKind; import org.apache.commons.lang3.tuple.Pair; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.opensearch.script.Script; import org.opensearch.search.aggregations.AggregationBuilder; import org.opensearch.search.aggregations.AggregationBuilders; @@ -110,6 +112,8 @@ */ public class AggregateAnalyzer { + private static final Logger LOG = LogManager.getLogger(); + /** metadata field used when there is no argument. Only apply to COUNT. */ private static final String METADATA_FIELD = "_index"; @@ -605,13 +609,24 @@ yield switch (functionName) { // Build field name mapping for renamed fields (e.g., rename value as val). // The top_hits response uses original OS field names, but the output schema expects // the renamed names from the project. + // Known limitation: if multiple project args reference the same original field with + // different output names (e.g., eval pay2 = salary | rename salary as pay | dedup + // dept_id), the later mapping will overwrite the earlier one in this HashMap. Map fieldNameMapping = new HashMap<>(); for (Pair arg : args) { if (arg.getKey() instanceof RexInputRef) { String originalName = helper.inferNamedField(arg.getKey()).getRootName(); String outputName = arg.getValue(); if (!originalName.equals(outputName)) { - fieldNameMapping.put(originalName, outputName); + String previousMapping = fieldNameMapping.put(originalName, outputName); + if (previousMapping != null) { + LOG.warn( + "Field name mapping collision: field '{}' was mapped to '{}', now" + + " overwritten by '{}'", + originalName, + previousMapping, + outputName); + } } } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/TopHitsParser.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/TopHitsParser.java index f4debc1dc3d..48eb3f943f0 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/TopHitsParser.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/TopHitsParser.java @@ -60,6 +60,9 @@ public List> parse(Aggregation agg) { new HashMap<>(Collections.singletonMap(agg.getName(), null))); } + // Field name mapping is not applied in returnSingleValue or returnMergeValue paths + // because they use the aggregation name (agg.getName()) as the map key, not field names. + // Only the multi-row path below uses actual field names as keys and needs mapping. if (returnSingleValue) { Object value = null; if (!isSourceEmpty(hits)) { From e8a2e3a0797a16354fa628727ee02738e002028d Mon Sep 17 00:00:00 2001 From: opensearchpplteam Date: Fri, 27 Feb 2026 21:55:23 +0000 Subject: [PATCH 4/5] Address review: assert real values in YAML REST tests for #5150 Signed-off-by: opensearchpplteam --- .../resources/rest-api-spec/test/issues/5150.yml | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5150.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5150.yml index 1db0c7753e4..50a02b482dc 100644 --- a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5150.yml +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5150.yml @@ -48,9 +48,10 @@ teardown: - match: { total: 2 } - match: { schema: [{"name": "category", "type": "string"}, {"name": "val", "type": "bigint"}] } - length: { datarows: 2 } - # Each row should have non-null val - - is_true: datarows.0.1 - - is_true: datarows.1.1 + - match: { datarows.0.0: "X" } + - match: { datarows.0.1: 100 } + - match: { datarows.1.0: "Y" } + - match: { datarows.1.1: 300 } --- "5150: Eval new field then dedup on different field retains eval values": @@ -69,6 +70,9 @@ teardown: - match: { total: 2 } - length: { datarows: 2 } - # Each row should have non-null doubled - - is_true: datarows.0.2 - - is_true: datarows.1.2 + - match: { datarows.0.0: "X" } + - match: { datarows.0.1: 100 } + - match: { datarows.0.2: 200 } + - match: { datarows.1.0: "Y" } + - match: { datarows.1.1: 300 } + - match: { datarows.1.2: 600 } From fe1766ac39334b63fc49060c0b298a9456733607 Mon Sep 17 00:00:00 2001 From: opensearchpplteam Date: Tue, 3 Mar 2026 17:28:17 +0000 Subject: [PATCH 5/5] Link known limitation comment to tracking issue #5197 Signed-off-by: opensearchpplteam --- .../sql/opensearch/request/AggregateAnalyzer.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java index f85bd134d35..beb5b9002ac 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java @@ -609,9 +609,10 @@ yield switch (functionName) { // Build field name mapping for renamed fields (e.g., rename value as val). // The top_hits response uses original OS field names, but the output schema expects // the renamed names from the project. - // Known limitation: if multiple project args reference the same original field with - // different output names (e.g., eval pay2 = salary | rename salary as pay | dedup - // dept_id), the later mapping will overwrite the earlier one in this HashMap. + // Known limitation (https://github.com/opensearch-project/sql/issues/5197): if multiple + // project args reference the same original field with different output names (e.g., + // eval pay2 = salary | rename salary as pay | dedup dept_id), the later mapping will + // overwrite the earlier one in this HashMap. Map fieldNameMapping = new HashMap<>(); for (Pair arg : args) { if (arg.getKey() instanceof RexInputRef) {