From 21560cbdda70631b1935c57c911474d869cf725a Mon Sep 17 00:00:00 2001 From: shekharrajak Date: Thu, 9 Apr 2026 23:24:42 +0530 Subject: [PATCH] fix: preserve GROUPING SET dimensions with single-value filter (#13204) --- .../druid/sql/calcite/rel/Grouping.java | 32 ++++++- .../druid/sql/calcite/CalciteQueryTest.java | 88 ++++++++++++++++++- 2 files changed, 116 insertions(+), 4 deletions(-) diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/Grouping.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/Grouping.java index 8bcf1544fc89..740085b89388 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/Grouping.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/Grouping.java @@ -228,14 +228,42 @@ public Grouping applyProject(final PlannerContext plannerContext, final Project // Remove literal dimensions that did not appear in the projection. This is useful for queries // like "SELECT COUNT(*) FROM tbl GROUP BY 'dummy'" which some tools can generate, and for which we don't // actually want to include a dimension 'dummy'. + // + // However, non-literal dimensions (column references) used in any GROUPING SET should not be dropped, + // even if they are not in the projection. This ensures correct NULL value formatting. + // See: https://github.com/apache/druid/issues/13204 final ImmutableBitSet aggregateProjectBits = RelOptUtil.InputFinder.bits(project.getProjects(), null); final int[] newDimIndexes = new int[dimensions.size()]; boolean droppedDimensions = false; + // Collect all dimension indices referenced in any non-empty subtotal, but only when the subtotals + // spec has real effect (i.e. multiple grouping sets). A plain GROUP BY with a single group produces + // subtotals = [[0, 1, ...]] which has no effect and must not prevent literal dimensions from being dropped. + // See: https://github.com/apache/druid/issues/13204 + final Set dimensionsInSubtotals = new HashSet<>(); + if (subtotals.hasEffect(dimensions.stream() + .map(DimensionExpression::toDimensionSpec) + .collect(Collectors.toList()))) { + for (IntList subtotal : subtotals.getSubtotals()) { + if (!subtotal.isEmpty()) { + for (int dimIndex : subtotal) { + dimensionsInSubtotals.add(dimIndex); + } + } + } + } + for (int i = 0; i < dimensions.size(); i++) { final DimensionExpression dimension = dimensions.get(i); - if (plannerContext.parseExpression(dimension.getDruidExpression().getExpression()).isLiteral() - && !aggregateProjectBits.get(i)) { + final boolean isLiteral = plannerContext.parseExpression( + dimension.getDruidExpression().getExpression() + ).isLiteral(); + final boolean isUsedInSubtotals = dimensionsInSubtotals.contains(i); + + // Drop if it's a literal AND not in projection AND not used in any grouping set. + // Non-literal dimensions referenced in a GROUPING SET must be preserved so that + // subtotals which omit them correctly emit null. + if (isLiteral && !aggregateProjectBits.get(i) && !isUsedInSubtotals) { droppedDimensions = true; newDimIndexes[i] = -1; } else { diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java index fcb944852453..808b06e94119 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java @@ -11661,6 +11661,11 @@ public void testGroupingSetsWithDummyDimension() "nvl(\"dim2\",'')", ColumnType.STRING ), + expressionVirtualColumn( + "v1", + "'dummy'", + ColumnType.STRING + ), expressionVirtualColumn( "v2", "timestamp_floor(\"__time\",'P1M',null,'UTC')", @@ -11670,15 +11675,16 @@ public void testGroupingSetsWithDummyDimension() .setDimensions( dimensions( new DefaultDimensionSpec("v0", "d0"), + new DefaultDimensionSpec("v1", "d1", ColumnType.STRING), new DefaultDimensionSpec("v2", "d2", ColumnType.LONG) ) ) .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) .setSubtotalsSpec( ImmutableList.of( - ImmutableList.of("d0", "d2"), + ImmutableList.of("d0", "d1", "d2"), ImmutableList.of("d0"), - ImmutableList.of(), + ImmutableList.of("d1"), ImmutableList.of("d2") ) ) @@ -11958,6 +11964,84 @@ public void testGroupingSetsWithOrderByAggregatorWithLimit() ); } + @Test + public void testGroupingSetsWithSingleValueFilter() + { + msqIncompatible(); + testQuery( + "SELECT dim1, dim2, SUM(cnt)\n" + + "FROM druid.foo\n" + + "WHERE dim2 = 'a'\n" + + "GROUP BY GROUPING SETS ( (dim1, dim2), (dim1) )", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimensions( + dimensions( + new DefaultDimensionSpec("dim1", "d0", ColumnType.STRING), + new DefaultDimensionSpec("dim2", "d1", ColumnType.STRING) + ) + ) + .setDimFilter(equality("dim2", "a", ColumnType.STRING)) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) + .setSubtotalsSpec( + ImmutableList.of( + ImmutableList.of("d0", "d1"), + ImmutableList.of("d0") + ) + ) + .build() + ), + ImmutableList.of( + new Object[]{"", "a", 1L}, + new Object[]{"1", "a", 1L}, + new Object[]{"", null, 1L}, + new Object[]{"1", null, 1L} + ) + ); + } + + @Test + public void testGroupingSetsWithSingleValueFilterUsingIn() + { + msqIncompatible(); + testQuery( + "SELECT dim1, dim2, SUM(cnt)\n" + + "FROM druid.foo\n" + + "WHERE dim2 IN ('a')\n" + + "GROUP BY GROUPING SETS ( (dim1, dim2), (dim1) )", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimensions( + dimensions( + new DefaultDimensionSpec("dim1", "d0", ColumnType.STRING), + new DefaultDimensionSpec("dim2", "d1", ColumnType.STRING) + ) + ) + .setDimFilter(equality("dim2", "a", ColumnType.STRING)) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) + .setSubtotalsSpec( + ImmutableList.of( + ImmutableList.of("d0", "d1"), + ImmutableList.of("d0") + ) + ) + .build() + ), + ImmutableList.of( + new Object[]{"", "a", 1L}, + new Object[]{"1", "a", 1L}, + new Object[]{"", null, 1L}, + new Object[]{"1", null, 1L} + ) + ); + } + @Test public void testTimeExtractWithTooFewArguments() {