Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 30 additions & 2 deletions sql/src/main/java/org/apache/druid/sql/calcite/rel/Grouping.java
Original file line number Diff line number Diff line change
Expand Up @@ -228,14 +228,42 @@ public Grouping applyProject(final PlannerContext plannerContext, final Project
// Remove literal dimensions that did not appear in the projection. This is useful for queries
// like "SELECT COUNT(*) FROM tbl GROUP BY 'dummy'" which some tools can generate, and for which we don't
// actually want to include a dimension 'dummy'.
//
// However, non-literal dimensions (column references) used in any GROUPING SET should not be dropped,
// even if they are not in the projection. This ensures correct NULL value formatting.
// See: https://github.com/apache/druid/issues/13204
final ImmutableBitSet aggregateProjectBits = RelOptUtil.InputFinder.bits(project.getProjects(), null);
final int[] newDimIndexes = new int[dimensions.size()];
boolean droppedDimensions = false;

// Collect all dimension indices referenced in any non-empty subtotal, but only when the subtotals
// spec has real effect (i.e. multiple grouping sets). A plain GROUP BY with a single group produces
// subtotals = [[0, 1, ...]] which has no effect and must not prevent literal dimensions from being dropped.
// See: https://github.com/apache/druid/issues/13204
final Set<Integer> dimensionsInSubtotals = new HashSet<>();
if (subtotals.hasEffect(dimensions.stream()
.map(DimensionExpression::toDimensionSpec)
.collect(Collectors.toList()))) {
for (IntList subtotal : subtotals.getSubtotals()) {
if (!subtotal.isEmpty()) {
for (int dimIndex : subtotal) {
dimensionsInSubtotals.add(dimIndex);
}
}
}
}

for (int i = 0; i < dimensions.size(); i++) {
final DimensionExpression dimension = dimensions.get(i);
if (plannerContext.parseExpression(dimension.getDruidExpression().getExpression()).isLiteral()
&& !aggregateProjectBits.get(i)) {
final boolean isLiteral = plannerContext.parseExpression(
dimension.getDruidExpression().getExpression()
).isLiteral();
final boolean isUsedInSubtotals = dimensionsInSubtotals.contains(i);

// Drop if it's a literal AND not in projection AND not used in any grouping set.
// Non-literal dimensions referenced in a GROUPING SET must be preserved so that
// subtotals which omit them correctly emit null.
if (isLiteral && !aggregateProjectBits.get(i) && !isUsedInSubtotals) {
droppedDimensions = true;
newDimIndexes[i] = -1;
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11661,6 +11661,11 @@ public void testGroupingSetsWithDummyDimension()
"nvl(\"dim2\",'')",
ColumnType.STRING
),
expressionVirtualColumn(
"v1",
"'dummy'",
ColumnType.STRING
),
expressionVirtualColumn(
"v2",
"timestamp_floor(\"__time\",'P1M',null,'UTC')",
Expand All @@ -11670,15 +11675,16 @@ public void testGroupingSetsWithDummyDimension()
.setDimensions(
dimensions(
new DefaultDimensionSpec("v0", "d0"),
new DefaultDimensionSpec("v1", "d1", ColumnType.STRING),
new DefaultDimensionSpec("v2", "d2", ColumnType.LONG)
)
)
.setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt")))
.setSubtotalsSpec(
ImmutableList.of(
ImmutableList.of("d0", "d2"),
ImmutableList.of("d0", "d1", "d2"),
ImmutableList.of("d0"),
ImmutableList.of(),
ImmutableList.of("d1"),
ImmutableList.of("d2")
)
)
Expand Down Expand Up @@ -11958,6 +11964,84 @@ public void testGroupingSetsWithOrderByAggregatorWithLimit()
);
}

@Test
public void testGroupingSetsWithSingleValueFilter()
{
msqIncompatible();
testQuery(
"SELECT dim1, dim2, SUM(cnt)\n"
+ "FROM druid.foo\n"
+ "WHERE dim2 = 'a'\n"
+ "GROUP BY GROUPING SETS ( (dim1, dim2), (dim1) )",
ImmutableList.of(
GroupByQuery.builder()
.setDataSource(CalciteTests.DATASOURCE1)
.setInterval(querySegmentSpec(Filtration.eternity()))
.setGranularity(Granularities.ALL)
.setDimensions(
dimensions(
new DefaultDimensionSpec("dim1", "d0", ColumnType.STRING),
new DefaultDimensionSpec("dim2", "d1", ColumnType.STRING)
)
)
.setDimFilter(equality("dim2", "a", ColumnType.STRING))
.setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt")))
.setSubtotalsSpec(
ImmutableList.of(
ImmutableList.of("d0", "d1"),
ImmutableList.of("d0")
)
)
.build()
),
ImmutableList.of(
new Object[]{"", "a", 1L},
new Object[]{"1", "a", 1L},
new Object[]{"", null, 1L},
new Object[]{"1", null, 1L}
)
);
}

@Test
public void testGroupingSetsWithSingleValueFilterUsingIn()
{
msqIncompatible();
testQuery(
"SELECT dim1, dim2, SUM(cnt)\n"
+ "FROM druid.foo\n"
+ "WHERE dim2 IN ('a')\n"
+ "GROUP BY GROUPING SETS ( (dim1, dim2), (dim1) )",
ImmutableList.of(
GroupByQuery.builder()
.setDataSource(CalciteTests.DATASOURCE1)
.setInterval(querySegmentSpec(Filtration.eternity()))
.setGranularity(Granularities.ALL)
.setDimensions(
dimensions(
new DefaultDimensionSpec("dim1", "d0", ColumnType.STRING),
new DefaultDimensionSpec("dim2", "d1", ColumnType.STRING)
)
)
.setDimFilter(equality("dim2", "a", ColumnType.STRING))
.setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt")))
.setSubtotalsSpec(
ImmutableList.of(
ImmutableList.of("d0", "d1"),
ImmutableList.of("d0")
)
)
.build()
),
ImmutableList.of(
new Object[]{"", "a", 1L},
new Object[]{"1", "a", 1L},
new Object[]{"", null, 1L},
new Object[]{"1", null, 1L}
)
);
}

@Test
public void testTimeExtractWithTooFewArguments()
{
Expand Down
Loading