From 6f1d6b6dfb69456e71df7e4e134e95125c5a1547 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Tue, 7 Apr 2026 17:57:13 -0700 Subject: [PATCH] refactor: switch remaining AggregationTestHelper methods to use java objects instead of json strings --- ...ompressedBigDecimalGroupByQueryConfig.java | 25 +- ...edBigDecimalAggregatorGroupByTestBase.java | 21 +- ...igDecimalAggregatorTimeseriesTestBase.java | 38 +- ...sedBigDecimalMaxAggregatorGroupByTest.java | 15 +- ...BigDecimalMaxAggregatorTimeseriesTest.java | 24 +- ...sedBigDecimalMinAggregatorGroupByTest.java | 15 +- ...BigDecimalMinAggregatorTimeseriesTest.java | 24 +- ...sedBigDecimalSumAggregatorGroupByTest.java | 15 +- ...BigDecimalSumAggregatorTimeseriesTest.java | 24 +- .../resources/bd_max_test_aggregators.json | 9 - .../resources/bd_max_test_groupby_query.json | 33 - .../bd_max_test_timeseries_query.json | 25 - .../resources/bd_min_test_aggregators.json | 9 - .../resources/bd_min_test_groupby_query.json | 33 - .../bd_min_test_timeseries_query.json | 25 - .../resources/bd_sum_test_aggregators.json | 9 - .../resources/bd_sum_test_groupby_query.json | 33 - .../bd_sum_test_timeseries_query.json | 25 - .../ddsketch/DDSketchAggregatorTest.java | 74 +- .../MomentsSketchAggregatorTest.java | 100 +- .../SpectatorHistogramAggregatorTest.java | 509 +++---- .../TDigestSketchAggregatorTest.java | 115 +- .../TimestampGroupByAggregationTest.java | 48 +- .../hll/HllSketchAggregatorTest.java | 304 ++-- .../kll/KllDoublesSketchAggregatorTest.java | 335 +++-- .../kll/KllFloatsSketchAggregatorTest.java | 335 +++-- .../DoublesSketchAggregatorTest.java | 335 +++-- .../theta/SketchAggregationTest.java | 268 +++- .../SketchAggregationWithSimpleDataTest.java | 306 +++- .../oldapi/OldApiSketchAggregationTest.java | 134 +- .../ArrayOfDoublesSketchAggregationTest.java | 1328 +++++++++-------- .../empty_sketch_group_by_query.json | 20 - .../empty_sketch_test_data_aggregators.json | 9 - .../old_simple_test_data_aggregators.json | 12 - .../old_simple_test_data_group_by_query.json | 93 -- .../old_sketch_test_data_aggregators.json | 14 - .../old_sketch_test_data_group_by_query.json | 92 -- .../retention_test_data_group_by_query.json | 135 -- .../simple_test_data_aggregators.json | 12 - .../simple_test_data_group_by_query.json | 106 -- .../sketch_test_data_aggregators.json | 16 - .../sketch_test_data_group_by_query.json | 116 -- .../src/test/resources/timeseries_query.json | 92 -- .../src/test/resources/topn_query.json | 98 -- .../resources/topn_query_sketch_const.json | 104 -- .../bloom/BloomFilterGroupByQueryTest.java | 129 +- .../ApproximateHistogramAggregationTest.java | 49 +- .../FixedBucketsHistogramAggregationTest.java | 60 +- .../aggregation/AggregationTestHelper.java | 140 +- .../HyperUniquesAggregationTest.java | 71 +- ...nalizingFieldAccessPostAggregatorTest.java | 46 +- 51 files changed, 2729 insertions(+), 3278 deletions(-) delete mode 100644 extensions-contrib/compressed-bigdecimal/src/test/resources/bd_max_test_aggregators.json delete mode 100644 extensions-contrib/compressed-bigdecimal/src/test/resources/bd_max_test_groupby_query.json delete mode 100644 extensions-contrib/compressed-bigdecimal/src/test/resources/bd_max_test_timeseries_query.json delete mode 100644 extensions-contrib/compressed-bigdecimal/src/test/resources/bd_min_test_aggregators.json delete mode 100644 extensions-contrib/compressed-bigdecimal/src/test/resources/bd_min_test_groupby_query.json delete mode 100644 extensions-contrib/compressed-bigdecimal/src/test/resources/bd_min_test_timeseries_query.json delete mode 100644 extensions-contrib/compressed-bigdecimal/src/test/resources/bd_sum_test_aggregators.json delete mode 100644 extensions-contrib/compressed-bigdecimal/src/test/resources/bd_sum_test_groupby_query.json delete mode 100644 extensions-contrib/compressed-bigdecimal/src/test/resources/bd_sum_test_timeseries_query.json delete mode 100644 extensions-core/datasketches/src/test/resources/empty_sketch_group_by_query.json delete mode 100644 extensions-core/datasketches/src/test/resources/empty_sketch_test_data_aggregators.json delete mode 100644 extensions-core/datasketches/src/test/resources/oldapi/old_simple_test_data_aggregators.json delete mode 100644 extensions-core/datasketches/src/test/resources/oldapi/old_simple_test_data_group_by_query.json delete mode 100644 extensions-core/datasketches/src/test/resources/oldapi/old_sketch_test_data_aggregators.json delete mode 100644 extensions-core/datasketches/src/test/resources/oldapi/old_sketch_test_data_group_by_query.json delete mode 100644 extensions-core/datasketches/src/test/resources/retention_test_data_group_by_query.json delete mode 100644 extensions-core/datasketches/src/test/resources/simple_test_data_aggregators.json delete mode 100644 extensions-core/datasketches/src/test/resources/simple_test_data_group_by_query.json delete mode 100644 extensions-core/datasketches/src/test/resources/sketch_test_data_aggregators.json delete mode 100644 extensions-core/datasketches/src/test/resources/sketch_test_data_group_by_query.json delete mode 100644 extensions-core/datasketches/src/test/resources/timeseries_query.json delete mode 100644 extensions-core/datasketches/src/test/resources/topn_query.json delete mode 100644 extensions-core/datasketches/src/test/resources/topn_query_sketch_const.json diff --git a/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/CompressedBigDecimalGroupByQueryConfig.java b/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/CompressedBigDecimalGroupByQueryConfig.java index df5e0d2101b9..0feb3b67ea1f 100644 --- a/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/CompressedBigDecimalGroupByQueryConfig.java +++ b/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/CompressedBigDecimalGroupByQueryConfig.java @@ -19,37 +19,42 @@ package org.apache.druid.compressedbigdecimal; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.groupby.GroupByQuery; + +import java.util.List; + public class CompressedBigDecimalGroupByQueryConfig { - private final String jsonQueryFile; - private final String jsonAggregatorsFile; + private final List ingestionAggregators; + private final GroupByQuery query; private final String stringRevenue; private final String longRevenue; private final String doubleRevenue; public CompressedBigDecimalGroupByQueryConfig( - String jsonQueryFile, - String jsonAggregatorsFile, + List ingestionAggregators, + GroupByQuery query, String stringRevenue, String longRevenue, String doubleRevenue ) { - this.jsonQueryFile = jsonQueryFile; - this.jsonAggregatorsFile = jsonAggregatorsFile; + this.ingestionAggregators = ingestionAggregators; + this.query = query; this.stringRevenue = stringRevenue; this.longRevenue = longRevenue; this.doubleRevenue = doubleRevenue; } - public String getJsonQueryFile() + public List getIngestionAggregators() { - return jsonQueryFile; + return ingestionAggregators; } - public String getJsonAggregatorsFile() + public GroupByQuery getQuery() { - return jsonAggregatorsFile; + return query; } public String getStringRevenue() diff --git a/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/aggregator/CompressedBigDecimalAggregatorGroupByTestBase.java b/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/aggregator/CompressedBigDecimalAggregatorGroupByTestBase.java index 3422b8d5fd5e..63213de00c61 100644 --- a/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/aggregator/CompressedBigDecimalAggregatorGroupByTestBase.java +++ b/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/aggregator/CompressedBigDecimalAggregatorGroupByTestBase.java @@ -19,8 +19,6 @@ package org.apache.druid.compressedbigdecimal.aggregator; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.io.Resources; import org.apache.druid.compressedbigdecimal.ArrayCompressedBigDecimal; import org.apache.druid.compressedbigdecimal.CompressedBigDecimalGroupByQueryConfig; import org.apache.druid.compressedbigdecimal.CompressedBigDecimalModule; @@ -28,7 +26,6 @@ import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.guava.Sequence; import org.apache.druid.query.aggregation.AggregationTestHelper; -import org.apache.druid.query.groupby.GroupByQuery; import org.apache.druid.query.groupby.GroupByQueryConfig; import org.apache.druid.query.groupby.ResultRow; import org.hamcrest.collection.IsCollectionWithSize; @@ -47,7 +44,6 @@ import java.io.File; import java.io.IOException; import java.math.BigDecimal; -import java.nio.charset.StandardCharsets; import java.util.List; import java.util.Map; import java.util.TimeZone; @@ -99,32 +95,21 @@ public static void setupClass() @Test public void testIngestAndGroupByAllQuery() throws IOException, Exception { - - String groupByQueryJson = Resources.asCharSource( - this.getClass().getResource("/" + cbdGroupByQueryConfig.getJsonQueryFile()), - StandardCharsets.UTF_8 - ).read(); - Sequence seq = helper.createIndexAndRunQueryOnSegment( this.getClass().getResourceAsStream("/" + "bd_test_data.csv"), CompressedBigDecimalAggregatorTimeseriesTestBase.SCHEMA, CompressedBigDecimalAggregatorTimeseriesTestBase.FORMAT, - Resources.asCharSource( - this.getClass().getResource("/" + cbdGroupByQueryConfig.getJsonAggregatorsFile()), - StandardCharsets.UTF_8 - ).read(), + cbdGroupByQueryConfig.getIngestionAggregators(), 0, Granularities.NONE, 5, - groupByQueryJson + cbdGroupByQueryConfig.getQuery() ); List results = seq.toList(); Assert.assertThat(results, IsCollectionWithSize.hasSize(1)); ResultRow row = results.get(0); - ObjectMapper mapper = helper.getObjectMapper(); - GroupByQuery groupByQuery = mapper.readValue(groupByQueryJson, GroupByQuery.class); - MapBasedRow mapBasedRow = row.toMapBasedRow(groupByQuery); + MapBasedRow mapBasedRow = row.toMapBasedRow(cbdGroupByQueryConfig.getQuery()); Map event = mapBasedRow.getEvent(); Assert.assertEquals( new DateTime("2017-01-01T00:00:00Z", DateTimeZone.forTimeZone(TimeZone.getTimeZone("UTC"))), diff --git a/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/aggregator/CompressedBigDecimalAggregatorTimeseriesTestBase.java b/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/aggregator/CompressedBigDecimalAggregatorTimeseriesTestBase.java index cae6c3db3c9a..f2e4921a5002 100644 --- a/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/aggregator/CompressedBigDecimalAggregatorTimeseriesTestBase.java +++ b/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/aggregator/CompressedBigDecimalAggregatorTimeseriesTestBase.java @@ -20,7 +20,6 @@ package org.apache.druid.compressedbigdecimal.aggregator; import com.google.common.collect.Iterables; -import com.google.common.io.Resources; import org.apache.druid.compressedbigdecimal.ArrayCompressedBigDecimal; import org.apache.druid.compressedbigdecimal.CompressedBigDecimalModule; import org.apache.druid.data.input.ColumnsFilter; @@ -36,6 +35,8 @@ import org.apache.druid.java.util.common.guava.Sequence; import org.apache.druid.query.Result; import org.apache.druid.query.aggregation.AggregationTestHelper; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.timeseries.TimeseriesQuery; import org.apache.druid.query.timeseries.TimeseriesResultValue; import org.apache.druid.testing.InitializedNullHandlingTest; import org.joda.time.DateTime; @@ -47,7 +48,6 @@ import java.io.File; import java.math.BigDecimal; -import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.List; import java.util.Map; @@ -121,8 +121,8 @@ public static void setupClass() public abstract void testIngestAndTimeseriesQuery() throws Exception; protected void testIngestAndTimeseriesQueryHelper( - String jsonAggregatorsFile, - String jsonQueryFile, + List ingestionAggregators, + TimeseriesQuery query, String expected ) throws Exception { @@ -130,17 +130,11 @@ protected void testIngestAndTimeseriesQueryHelper( this.getClass().getResourceAsStream("/" + "bd_test_data.csv"), SCHEMA, FORMAT, - Resources.asCharSource( - this.getClass().getResource("/" + jsonAggregatorsFile), - StandardCharsets.UTF_8 - ).read(), + ingestionAggregators, 0, Granularities.NONE, 5, - Resources.asCharSource( - this.getClass().getResource("/" + jsonQueryFile), - StandardCharsets.UTF_8 - ).read() + query ); TimeseriesResultValue result = ((Result) Iterables.getOnlyElement(seq.toList())).getValue(); @@ -165,8 +159,8 @@ protected void testIngestAndTimeseriesQueryHelper( public abstract void testIngestMultipleSegmentsAndTimeseriesQuery() throws Exception; protected void testIngestMultipleSegmentsAndTimeseriesQueryHelper( - String jsonAggregatorsFile, - String jsonQueryFile, + List ingestionAggregators, + TimeseriesQuery query, String expected ) throws Exception { @@ -175,10 +169,7 @@ protected void testIngestMultipleSegmentsAndTimeseriesQueryHelper( new File(this.getClass().getResource("/" + "bd_test_data.csv").getFile()), SCHEMA, FORMAT, - Resources.asCharSource( - this.getClass().getResource("/" + jsonAggregatorsFile), - StandardCharsets.UTF_8 - ).read(), + ingestionAggregators, segmentDir1, 0, Granularities.NONE, @@ -189,10 +180,7 @@ protected void testIngestMultipleSegmentsAndTimeseriesQueryHelper( new File(this.getClass().getResource("/" + "bd_test_zero_data.csv").getFile()), SCHEMA, FORMAT, - Resources.asCharSource( - this.getClass().getResource("/" + jsonAggregatorsFile), - StandardCharsets.UTF_8 - ).read(), + ingestionAggregators, segmentDir2, 0, Granularities.NONE, @@ -201,10 +189,7 @@ protected void testIngestMultipleSegmentsAndTimeseriesQueryHelper( Sequence seq = helper.runQueryOnSegments( Arrays.asList(segmentDir1, segmentDir2), - Resources.asCharSource( - this.getClass().getResource("/" + jsonQueryFile), - StandardCharsets.UTF_8 - ).read() + query ); TimeseriesResultValue result = ((Result) Iterables.getOnlyElement(seq.toList())).getValue(); @@ -221,4 +206,3 @@ protected void testIngestMultipleSegmentsAndTimeseriesQueryHelper( } } - diff --git a/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/aggregator/max/CompressedBigDecimalMaxAggregatorGroupByTest.java b/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/aggregator/max/CompressedBigDecimalMaxAggregatorGroupByTest.java index 8bb130cff8a9..e34ee644b3ae 100644 --- a/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/aggregator/max/CompressedBigDecimalMaxAggregatorGroupByTest.java +++ b/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/aggregator/max/CompressedBigDecimalMaxAggregatorGroupByTest.java @@ -21,6 +21,8 @@ import org.apache.druid.compressedbigdecimal.CompressedBigDecimalGroupByQueryConfig; import org.apache.druid.compressedbigdecimal.aggregator.CompressedBigDecimalAggregatorGroupByTestBase; +import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.query.groupby.GroupByQuery; import org.apache.druid.query.groupby.GroupByQueryConfig; import org.apache.druid.query.groupby.GroupByQueryRunnerTest; import org.junit.runners.Parameterized; @@ -50,8 +52,17 @@ public static Collection constructorFeeder() { List constructors = new ArrayList<>(); CompressedBigDecimalGroupByQueryConfig cbdGroupByQueryConfig = new CompressedBigDecimalGroupByQueryConfig( - "bd_max_test_groupby_query.json", - "bd_max_test_aggregators.json", + List.of(new CompressedBigDecimalMaxAggregatorFactory("bigDecimalRevenue", "revenue", 3, 9, null)), + GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval("2017-01-01T00:00:00.000Z/P1D") + .setAggregatorSpecs( + new CompressedBigDecimalMaxAggregatorFactory("cbdRevenueFromString", "revenue", 3, 9, null), + new CompressedBigDecimalMaxAggregatorFactory("cbdRevenueFromLong", "longRevenue", 3, 9, null), + new CompressedBigDecimalMaxAggregatorFactory("cbdRevenueFromDouble", "doubleRevenue", 3, 9, null) + ) + .build(), "9999999999.000000000", "9999999999.000000000", "9999999999.000000000" diff --git a/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/aggregator/max/CompressedBigDecimalMaxAggregatorTimeseriesTest.java b/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/aggregator/max/CompressedBigDecimalMaxAggregatorTimeseriesTest.java index d6a2c85dcf0a..e084dcde6a0d 100644 --- a/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/aggregator/max/CompressedBigDecimalMaxAggregatorTimeseriesTest.java +++ b/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/aggregator/max/CompressedBigDecimalMaxAggregatorTimeseriesTest.java @@ -20,15 +20,30 @@ package org.apache.druid.compressedbigdecimal.aggregator.max; import org.apache.druid.compressedbigdecimal.aggregator.CompressedBigDecimalAggregatorTimeseriesTestBase; +import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.query.Druids; +import org.apache.druid.query.filter.NotDimFilter; +import org.apache.druid.query.filter.SelectorDimFilter; +import org.apache.druid.query.timeseries.TimeseriesQuery; + +import java.util.List; public class CompressedBigDecimalMaxAggregatorTimeseriesTest extends CompressedBigDecimalAggregatorTimeseriesTestBase { + private static final TimeseriesQuery QUERY = Druids.newTimeseriesQueryBuilder() + .dataSource("test_datasource") + .granularity(Granularities.ALL) + .aggregators(new CompressedBigDecimalMaxAggregatorFactory("cbdStringRevenue", "revenue", 3, 9, null)) + .filters(new NotDimFilter(new SelectorDimFilter("property", "XXX", null))) + .intervals("2017-01-01T00:00:00.000Z/P1D") + .build(); + @Override public void testIngestAndTimeseriesQuery() throws Exception { testIngestAndTimeseriesQueryHelper( - "bd_max_test_aggregators.json", - "bd_max_test_timeseries_query.json", + List.of(new CompressedBigDecimalMaxAggregatorFactory("bigDecimalRevenue", "revenue", 3, 9, null)), + QUERY, "9999999999.000000000" ); } @@ -37,10 +52,9 @@ public void testIngestAndTimeseriesQuery() throws Exception public void testIngestMultipleSegmentsAndTimeseriesQuery() throws Exception { testIngestMultipleSegmentsAndTimeseriesQueryHelper( - "bd_max_test_aggregators.json", - "bd_max_test_timeseries_query.json", + List.of(new CompressedBigDecimalMaxAggregatorFactory("bigDecimalRevenue", "revenue", 3, 9, null)), + QUERY, "9999999999.000000000" ); } } - diff --git a/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/aggregator/min/CompressedBigDecimalMinAggregatorGroupByTest.java b/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/aggregator/min/CompressedBigDecimalMinAggregatorGroupByTest.java index f27c2bda6823..86d126583af0 100644 --- a/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/aggregator/min/CompressedBigDecimalMinAggregatorGroupByTest.java +++ b/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/aggregator/min/CompressedBigDecimalMinAggregatorGroupByTest.java @@ -21,6 +21,8 @@ import org.apache.druid.compressedbigdecimal.CompressedBigDecimalGroupByQueryConfig; import org.apache.druid.compressedbigdecimal.aggregator.CompressedBigDecimalAggregatorGroupByTestBase; +import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.query.groupby.GroupByQuery; import org.apache.druid.query.groupby.GroupByQueryConfig; import org.apache.druid.query.groupby.GroupByQueryRunnerTest; import org.junit.runner.RunWith; @@ -52,8 +54,17 @@ public static Collection constructorFeeder() { List constructors = new ArrayList<>(); CompressedBigDecimalGroupByQueryConfig cbdGroupByQueryConfig = new CompressedBigDecimalGroupByQueryConfig( - "bd_min_test_groupby_query.json", - "bd_min_test_aggregators.json", + List.of(new CompressedBigDecimalMinAggregatorFactory("bigDecimalRevenue", "revenue", 3, 9, null)), + GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval("2017-01-01T00:00:00.000Z/P1D") + .setAggregatorSpecs( + new CompressedBigDecimalMinAggregatorFactory("cbdRevenueFromString", "revenue", 3, 9, null), + new CompressedBigDecimalMinAggregatorFactory("cbdRevenueFromLong", "longRevenue", 3, 9, null), + new CompressedBigDecimalMinAggregatorFactory("cbdRevenueFromDouble", "doubleRevenue", 3, 9, null) + ) + .build(), "-1.000000000", "-1.000000000", "-1.000000000" diff --git a/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/aggregator/min/CompressedBigDecimalMinAggregatorTimeseriesTest.java b/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/aggregator/min/CompressedBigDecimalMinAggregatorTimeseriesTest.java index a38d4dc9a973..67914a113acd 100644 --- a/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/aggregator/min/CompressedBigDecimalMinAggregatorTimeseriesTest.java +++ b/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/aggregator/min/CompressedBigDecimalMinAggregatorTimeseriesTest.java @@ -20,15 +20,30 @@ package org.apache.druid.compressedbigdecimal.aggregator.min; import org.apache.druid.compressedbigdecimal.aggregator.CompressedBigDecimalAggregatorTimeseriesTestBase; +import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.query.Druids; +import org.apache.druid.query.filter.NotDimFilter; +import org.apache.druid.query.filter.SelectorDimFilter; +import org.apache.druid.query.timeseries.TimeseriesQuery; + +import java.util.List; public class CompressedBigDecimalMinAggregatorTimeseriesTest extends CompressedBigDecimalAggregatorTimeseriesTestBase { + private static final TimeseriesQuery QUERY = Druids.newTimeseriesQueryBuilder() + .dataSource("test_datasource") + .granularity(Granularities.ALL) + .aggregators(new CompressedBigDecimalMinAggregatorFactory("cbdStringRevenue", "revenue", 3, 9, null)) + .filters(new NotDimFilter(new SelectorDimFilter("property", "XXX", null))) + .intervals("2017-01-01T00:00:00.000Z/P1D") + .build(); + @Override public void testIngestAndTimeseriesQuery() throws Exception { testIngestAndTimeseriesQueryHelper( - "bd_min_test_aggregators.json", - "bd_min_test_timeseries_query.json", + List.of(new CompressedBigDecimalMinAggregatorFactory("bigDecimalRevenue", "revenue", 3, 9, null)), + QUERY, "-1.000000000" ); } @@ -37,10 +52,9 @@ public void testIngestAndTimeseriesQuery() throws Exception public void testIngestMultipleSegmentsAndTimeseriesQuery() throws Exception { testIngestMultipleSegmentsAndTimeseriesQueryHelper( - "bd_min_test_aggregators.json", - "bd_min_test_timeseries_query.json", + List.of(new CompressedBigDecimalMinAggregatorFactory("bigDecimalRevenue", "revenue", 3, 9, null)), + QUERY, "-1.000000000" ); } } - diff --git a/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/aggregator/sum/CompressedBigDecimalSumAggregatorGroupByTest.java b/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/aggregator/sum/CompressedBigDecimalSumAggregatorGroupByTest.java index e60cff5382be..e765497d74a8 100644 --- a/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/aggregator/sum/CompressedBigDecimalSumAggregatorGroupByTest.java +++ b/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/aggregator/sum/CompressedBigDecimalSumAggregatorGroupByTest.java @@ -21,6 +21,8 @@ import org.apache.druid.compressedbigdecimal.CompressedBigDecimalGroupByQueryConfig; import org.apache.druid.compressedbigdecimal.aggregator.CompressedBigDecimalAggregatorGroupByTestBase; +import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.query.groupby.GroupByQuery; import org.apache.druid.query.groupby.GroupByQueryConfig; import org.apache.druid.query.groupby.GroupByQueryRunnerTest; import org.junit.runners.Parameterized; @@ -50,8 +52,17 @@ public static Collection constructorFeeder() { List constructors = new ArrayList<>(); CompressedBigDecimalGroupByQueryConfig cbdGroupByQueryConfig = new CompressedBigDecimalGroupByQueryConfig( - "bd_sum_test_groupby_query.json", - "bd_sum_test_aggregators.json", + List.of(new CompressedBigDecimalSumAggregatorFactory("bigDecimalRevenue", "revenue", 3, 9, null)), + GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval("2017-01-01T00:00:00.000Z/P1D") + .setAggregatorSpecs( + new CompressedBigDecimalSumAggregatorFactory("cbdRevenueFromString", "revenue", 3, 9, null), + new CompressedBigDecimalSumAggregatorFactory("cbdRevenueFromLong", "longRevenue", 3, 9, null), + new CompressedBigDecimalSumAggregatorFactory("cbdRevenueFromDouble", "doubleRevenue", 3, 9, null) + ) + .build(), "15000000010.000000005", "10000000010.000000000", "15000000010.500000000" diff --git a/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/aggregator/sum/CompressedBigDecimalSumAggregatorTimeseriesTest.java b/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/aggregator/sum/CompressedBigDecimalSumAggregatorTimeseriesTest.java index 74ae24ee7e6a..29b11f70cf79 100644 --- a/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/aggregator/sum/CompressedBigDecimalSumAggregatorTimeseriesTest.java +++ b/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/aggregator/sum/CompressedBigDecimalSumAggregatorTimeseriesTest.java @@ -20,15 +20,30 @@ package org.apache.druid.compressedbigdecimal.aggregator.sum; import org.apache.druid.compressedbigdecimal.aggregator.CompressedBigDecimalAggregatorTimeseriesTestBase; +import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.query.Druids; +import org.apache.druid.query.filter.NotDimFilter; +import org.apache.druid.query.filter.SelectorDimFilter; +import org.apache.druid.query.timeseries.TimeseriesQuery; + +import java.util.List; public class CompressedBigDecimalSumAggregatorTimeseriesTest extends CompressedBigDecimalAggregatorTimeseriesTestBase { + private static final TimeseriesQuery QUERY = Druids.newTimeseriesQueryBuilder() + .dataSource("test_datasource") + .granularity(Granularities.ALL) + .aggregators(new CompressedBigDecimalSumAggregatorFactory("cbdStringRevenue", "revenue", 3, 9, null)) + .filters(new NotDimFilter(new SelectorDimFilter("property", "XXX", null))) + .intervals("2017-01-01T00:00:00.000Z/P1D") + .build(); + @Override public void testIngestAndTimeseriesQuery() throws Exception { testIngestAndTimeseriesQueryHelper( - "bd_sum_test_aggregators.json", - "bd_sum_test_timeseries_query.json", + List.of(new CompressedBigDecimalSumAggregatorFactory("bigDecimalRevenue", "revenue", 3, 9, null)), + QUERY, "15000000010.000000005" ); } @@ -37,10 +52,9 @@ public void testIngestAndTimeseriesQuery() throws Exception public void testIngestMultipleSegmentsAndTimeseriesQuery() throws Exception { testIngestMultipleSegmentsAndTimeseriesQueryHelper( - "bd_sum_test_aggregators.json", - "bd_sum_test_timeseries_query.json", + List.of(new CompressedBigDecimalSumAggregatorFactory("bigDecimalRevenue", "revenue", 3, 9, null)), + QUERY, "15000000010.000000005" ); } } - diff --git a/extensions-contrib/compressed-bigdecimal/src/test/resources/bd_max_test_aggregators.json b/extensions-contrib/compressed-bigdecimal/src/test/resources/bd_max_test_aggregators.json deleted file mode 100644 index b97dc7f0b5f6..000000000000 --- a/extensions-contrib/compressed-bigdecimal/src/test/resources/bd_max_test_aggregators.json +++ /dev/null @@ -1,9 +0,0 @@ -[ - { - "type": "compressedBigDecimalMax", - "name": "bigDecimalRevenue", - "fieldName": "revenue", - "scale": 9, - "size": 3 - } -] diff --git a/extensions-contrib/compressed-bigdecimal/src/test/resources/bd_max_test_groupby_query.json b/extensions-contrib/compressed-bigdecimal/src/test/resources/bd_max_test_groupby_query.json deleted file mode 100644 index a28938866ee3..000000000000 --- a/extensions-contrib/compressed-bigdecimal/src/test/resources/bd_max_test_groupby_query.json +++ /dev/null @@ -1,33 +0,0 @@ -{ - "queryType": "groupBy", - "dataSource": "test_datasource", - "granularity": "ALL", - "dimensions": [ - ], - "aggregations": [ - { - "type": "compressedBigDecimalMax", - "name": "cbdRevenueFromString", - "fieldName": "revenue", - "scale": 9, - "size": 3 - }, - { - "type": "compressedBigDecimalMax", - "name": "cbdRevenueFromLong", - "fieldName": "longRevenue", - "scale": 9, - "size": 3 - }, - { - "type": "compressedBigDecimalMax", - "name": "cbdRevenueFromDouble", - "fieldName": "doubleRevenue", - "scale": 9, - "size": 3 - } - ], - "intervals": [ - "2017-01-01T00:00:00.000Z/P1D" - ] -} diff --git a/extensions-contrib/compressed-bigdecimal/src/test/resources/bd_max_test_timeseries_query.json b/extensions-contrib/compressed-bigdecimal/src/test/resources/bd_max_test_timeseries_query.json deleted file mode 100644 index 85e563dac5a1..000000000000 --- a/extensions-contrib/compressed-bigdecimal/src/test/resources/bd_max_test_timeseries_query.json +++ /dev/null @@ -1,25 +0,0 @@ -{ - "queryType": "timeseries", - "dataSource": "test_datasource", - "granularity": "ALL", - "aggregations": [ - { - "type": "compressedBigDecimalMax", - "name": "cbdStringRevenue", - "fieldName": "revenue", - "scale": 9, - "size": 3 - } - ], - "filter": { - "type": "not", - "field": { - "type": "selector", - "dimension": "property", - "value": "XXX" - } - }, - "intervals": [ - "2017-01-01T00:00:00.000Z/P1D" - ] -} diff --git a/extensions-contrib/compressed-bigdecimal/src/test/resources/bd_min_test_aggregators.json b/extensions-contrib/compressed-bigdecimal/src/test/resources/bd_min_test_aggregators.json deleted file mode 100644 index 7618418b2345..000000000000 --- a/extensions-contrib/compressed-bigdecimal/src/test/resources/bd_min_test_aggregators.json +++ /dev/null @@ -1,9 +0,0 @@ -[ - { - "type": "compressedBigDecimalMin", - "name": "bigDecimalRevenue", - "fieldName": "revenue", - "scale": 9, - "size": 3 - } -] diff --git a/extensions-contrib/compressed-bigdecimal/src/test/resources/bd_min_test_groupby_query.json b/extensions-contrib/compressed-bigdecimal/src/test/resources/bd_min_test_groupby_query.json deleted file mode 100644 index 284c4050e531..000000000000 --- a/extensions-contrib/compressed-bigdecimal/src/test/resources/bd_min_test_groupby_query.json +++ /dev/null @@ -1,33 +0,0 @@ -{ - "queryType": "groupBy", - "dataSource": "test_datasource", - "granularity": "ALL", - "dimensions": [ - ], - "aggregations": [ - { - "type": "compressedBigDecimalMin", - "name": "cbdRevenueFromString", - "fieldName": "revenue", - "scale": 9, - "size": 3 - }, - { - "type": "compressedBigDecimalMin", - "name": "cbdRevenueFromLong", - "fieldName": "longRevenue", - "scale": 9, - "size": 3 - }, - { - "type": "compressedBigDecimalMin", - "name": "cbdRevenueFromDouble", - "fieldName": "doubleRevenue", - "scale": 9, - "size": 3 - } - ], - "intervals": [ - "2017-01-01T00:00:00.000Z/P1D" - ] -} diff --git a/extensions-contrib/compressed-bigdecimal/src/test/resources/bd_min_test_timeseries_query.json b/extensions-contrib/compressed-bigdecimal/src/test/resources/bd_min_test_timeseries_query.json deleted file mode 100644 index 5ceebe030af1..000000000000 --- a/extensions-contrib/compressed-bigdecimal/src/test/resources/bd_min_test_timeseries_query.json +++ /dev/null @@ -1,25 +0,0 @@ -{ - "queryType": "timeseries", - "dataSource": "test_datasource", - "granularity": "ALL", - "aggregations": [ - { - "type": "compressedBigDecimalMin", - "name": "cbdStringRevenue", - "fieldName": "revenue", - "scale": 9, - "size": 3 - } - ], - "filter": { - "type": "not", - "field": { - "type": "selector", - "dimension": "property", - "value": "XXX" - } - }, - "intervals": [ - "2017-01-01T00:00:00.000Z/P1D" - ] -} diff --git a/extensions-contrib/compressed-bigdecimal/src/test/resources/bd_sum_test_aggregators.json b/extensions-contrib/compressed-bigdecimal/src/test/resources/bd_sum_test_aggregators.json deleted file mode 100644 index 53b12eb83d63..000000000000 --- a/extensions-contrib/compressed-bigdecimal/src/test/resources/bd_sum_test_aggregators.json +++ /dev/null @@ -1,9 +0,0 @@ -[ - { - "type": "compressedBigDecimalSum", - "name": "bigDecimalRevenue", - "fieldName": "revenue", - "scale": 9, - "size": 3 - } -] diff --git a/extensions-contrib/compressed-bigdecimal/src/test/resources/bd_sum_test_groupby_query.json b/extensions-contrib/compressed-bigdecimal/src/test/resources/bd_sum_test_groupby_query.json deleted file mode 100644 index b580c19f439a..000000000000 --- a/extensions-contrib/compressed-bigdecimal/src/test/resources/bd_sum_test_groupby_query.json +++ /dev/null @@ -1,33 +0,0 @@ -{ - "queryType": "groupBy", - "dataSource": "test_datasource", - "granularity": "ALL", - "dimensions": [ - ], - "aggregations": [ - { - "type": "compressedBigDecimalSum", - "name": "cbdRevenueFromString", - "fieldName": "revenue", - "scale": 9, - "size": 3 - }, - { - "type": "compressedBigDecimalSum", - "name": "cbdRevenueFromLong", - "fieldName": "longRevenue", - "scale": 9, - "size": 3 - }, - { - "type": "compressedBigDecimalSum", - "name": "cbdRevenueFromDouble", - "fieldName": "doubleRevenue", - "scale": 9, - "size": 3 - } - ], - "intervals": [ - "2017-01-01T00:00:00.000Z/P1D" - ] -} diff --git a/extensions-contrib/compressed-bigdecimal/src/test/resources/bd_sum_test_timeseries_query.json b/extensions-contrib/compressed-bigdecimal/src/test/resources/bd_sum_test_timeseries_query.json deleted file mode 100644 index 44a4880f85b8..000000000000 --- a/extensions-contrib/compressed-bigdecimal/src/test/resources/bd_sum_test_timeseries_query.json +++ /dev/null @@ -1,25 +0,0 @@ -{ - "queryType": "timeseries", - "dataSource": "test_datasource", - "granularity": "ALL", - "aggregations": [ - { - "type": "compressedBigDecimalSum", - "name": "cbdStringRevenue", - "fieldName": "revenue", - "scale": 9, - "size": 3 - } - ], - "filter": { - "type": "not", - "field": { - "type": "selector", - "dimension": "property", - "value": "XXX" - } - }, - "intervals": [ - "2017-01-01T00:00:00.000Z/P1D" - ] -} diff --git a/extensions-contrib/ddsketch/src/test/java/org/apache/druid/query/aggregation/ddsketch/DDSketchAggregatorTest.java b/extensions-contrib/ddsketch/src/test/java/org/apache/druid/query/aggregation/ddsketch/DDSketchAggregatorTest.java index 240d60f199ca..651dc86d1929 100644 --- a/extensions-contrib/ddsketch/src/test/java/org/apache/druid/query/aggregation/ddsketch/DDSketchAggregatorTest.java +++ b/extensions-contrib/ddsketch/src/test/java/org/apache/druid/query/aggregation/ddsketch/DDSketchAggregatorTest.java @@ -26,10 +26,14 @@ import org.apache.druid.data.input.impl.DimensionsSpec; import org.apache.druid.data.input.impl.TimestampSpec; import org.apache.druid.jackson.DefaultObjectMapper; +import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.guava.Sequence; import org.apache.druid.query.aggregation.AggregationTestHelper; import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.DoubleSumAggregatorFactory; +import org.apache.druid.query.aggregation.post.FieldAccessPostAggregator; +import org.apache.druid.query.groupby.GroupByQuery; import org.apache.druid.query.groupby.GroupByQueryConfig; import org.apache.druid.query.groupby.GroupByQueryRunnerTest; import org.apache.druid.query.groupby.ResultRow; @@ -44,6 +48,7 @@ import java.io.File; import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; import java.util.List; @RunWith(Parameterized.class) @@ -104,29 +109,24 @@ public void buildingSketchesAtIngestionTime() throws Exception DelimitedInputFormat.forColumns( List.of("timestamp", "sequenceNumber", "product", "value") ), - "[{\"type\": \"ddSketch\", \"name\": \"sketch\", \"fieldName\": \"value\", \"relativeError\": 0.01}]", + List.of(new DDSketchAggregatorFactory("sketch", "value", 0.01, 1000)), 0, // minTimestamp Granularities.NONE, 10, // maxRowCount - String.join( - "\n", - "{", - " \"queryType\": \"groupBy\",", - " \"dataSource\": \"test_datasource\",", - " \"granularity\": \"ALL\",", - " \"dimensions\": [],", - " \"aggregations\": [", - " {\"type\": \"ddSketch\", \"name\": \"merged_sketch\", \"fieldName\": \"sketch\", " - + "\"relativeError\": " - + "0.01, \"numBins\": 10000}", - " ],", - " \"postAggregations\": [", - " {\"type\": \"quantilesFromDDSketch\", \"name\": \"quantiles\", \"fractions\": [0, 0.5, 1], " - + "\"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"merged_sketch\"}}", - " ],", - " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", - "}" - ) + GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setDimensions(Collections.emptyList()) + .setAggregatorSpecs(new DDSketchAggregatorFactory("merged_sketch", "sketch", 0.01, 10000)) + .setPostAggregatorSpecs( + new DDSketchToQuantilesPostAggregator( + "quantiles", + new FieldAccessPostAggregator("merged_sketch", "merged_sketch"), + new double[]{0, 0.5, 1} + ) + ) + .setInterval(Intervals.of("2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z")) + .build() ); List results = seq.toList(); Assert.assertEquals(1, results.size()); @@ -136,7 +136,7 @@ public void buildingSketchesAtIngestionTime() throws Exception Object quantilesObject = row.get(1); // "quantiles" Assert.assertTrue(quantilesObject instanceof double[]); double[] quantiles = (double[]) quantilesObject; - + Assert.assertEquals(0.001, quantiles[0], 0.0006); // min value Assert.assertEquals(0.5, quantiles[1], 0.05); // median value Assert.assertEquals(1, quantiles[2], 0.05); // max value @@ -155,26 +155,24 @@ public void buildingSketchesAtQueryTime() throws Exception DelimitedInputFormat.forColumns( List.of("timestamp", "sequenceNumber", "product", "value") ), - "[{\"type\": \"doubleSum\", \"name\": \"value\", \"fieldName\": \"value\"}]", + List.of(new DoubleSumAggregatorFactory("value", "value")), 0, // minTimestamp Granularities.NONE, 10, // maxRowCount - String.join( - "\n", - "{", - " \"queryType\": \"groupBy\",", - " \"dataSource\": \"test_datasource\",", - " \"granularity\": \"ALL\",", - " \"dimensions\": [],", - " \"aggregations\": [", - " {\"type\": \"ddSketch\", \"name\": \"sketch\", \"fieldName\": \"value\", \"relativeError\": 0.005, \"numBins\": 2000}", - " ],", - " \"postAggregations\": [", - " {\"type\": \"quantilesFromDDSketch\", \"name\": \"quantiles\", \"fractions\": [0.99, 0.995, 0.999, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}}", - " ],", - " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", - "}" - ) + GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setDimensions(Collections.emptyList()) + .setAggregatorSpecs(new DDSketchAggregatorFactory("sketch", "value", 0.005, 2000)) + .setPostAggregatorSpecs( + new DDSketchToQuantilesPostAggregator( + "quantiles", + new FieldAccessPostAggregator("sketch", "sketch"), + new double[]{0.99, 0.995, 0.999, 1} + ) + ) + .setInterval(Intervals.of("2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z")) + .build() ); List results = seq.toList(); Assert.assertEquals(1, results.size()); diff --git a/extensions-contrib/momentsketch/src/test/java/org/apache/druid/query/aggregation/momentsketch/aggregator/MomentsSketchAggregatorTest.java b/extensions-contrib/momentsketch/src/test/java/org/apache/druid/query/aggregation/momentsketch/aggregator/MomentsSketchAggregatorTest.java index 745abf00a242..51d08bc5d331 100644 --- a/extensions-contrib/momentsketch/src/test/java/org/apache/druid/query/aggregation/momentsketch/aggregator/MomentsSketchAggregatorTest.java +++ b/extensions-contrib/momentsketch/src/test/java/org/apache/druid/query/aggregation/momentsketch/aggregator/MomentsSketchAggregatorTest.java @@ -28,11 +28,15 @@ import org.apache.druid.data.input.impl.StringDimensionSchema; import org.apache.druid.data.input.impl.TimestampSpec; import org.apache.druid.initialization.DruidModule; +import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.guava.Sequence; import org.apache.druid.query.aggregation.AggregationTestHelper; +import org.apache.druid.query.aggregation.DoubleSumAggregatorFactory; import org.apache.druid.query.aggregation.momentsketch.MomentSketchModule; import org.apache.druid.query.aggregation.momentsketch.MomentSketchWrapper; +import org.apache.druid.query.aggregation.post.FieldAccessPostAggregator; +import org.apache.druid.query.groupby.GroupByQuery; import org.apache.druid.query.groupby.GroupByQueryConfig; import org.apache.druid.query.groupby.GroupByQueryRunnerTest; import org.apache.druid.query.groupby.ResultRow; @@ -47,6 +51,7 @@ import java.io.File; import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; import java.util.List; @RunWith(Parameterized.class) @@ -91,37 +96,53 @@ public void buildingSketchesAtIngestionTime() throws Exception DelimitedInputFormat.forColumns( List.of("timestamp", "sequenceNumber", "product", "value", "valueWithNulls") ), - "[" - + "{\"type\": \"momentSketch\", \"name\": \"sketch\", \"fieldName\": \"value\", \"k\": 10, \"compress\": true}," - + "{\"type\": \"momentSketch\", \"name\": \"sketchWithNulls\", \"fieldName\": \"valueWithNulls\", \"k\": 10, \"compress\": true}" - + "]", + List.of( + new MomentSketchAggregatorFactory("sketch", "value", 10, true), + new MomentSketchAggregatorFactory("sketchWithNulls", "valueWithNulls", 10, true) + ), 0, // minTimestamp Granularities.NONE, 10, // maxRowCount - String.join( - "\n", - "{", - " \"queryType\": \"groupBy\",", - " \"dataSource\": \"test_datasource\",", - " \"granularity\": \"ALL\",", - " \"dimensions\": [],", - " \"aggregations\": [", - " {\"type\": \"momentSketchMerge\", \"name\": \"sketch\", \"fieldName\": \"sketch\", \"k\": 10, \"compress\": true},", - " {\"type\": \"momentSketchMerge\", \"name\": \"sketchWithNulls\", \"fieldName\": \"sketchWithNulls\", \"k\": 10, \"compress\": true}", - " ],", - " \"postAggregations\": [", - " {\"type\": \"momentSketchSolveQuantiles\", \"name\": \"quantiles\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"momentSketchMin\", \"name\": \"min\", \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"momentSketchMax\", \"name\": \"max\", \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"momentSketchSolveQuantiles\", \"name\": \"quantilesWithNulls\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketchWithNulls\"}},", - " {\"type\": \"momentSketchMin\", \"name\": \"minWithNulls\", \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketchWithNulls\"}},", - " {\"type\": \"momentSketchMax\", \"name\": \"maxWithNulls\", \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketchWithNulls\"}}", - " ],", - " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", - "}" - ) + GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setDimensions(Collections.emptyList()) + .setAggregatorSpecs( + new MomentSketchMergeAggregatorFactory("sketch", 10, true), + new MomentSketchMergeAggregatorFactory("sketchWithNulls", 10, true) + ) + .setPostAggregatorSpecs( + new MomentSketchQuantilePostAggregator( + "quantiles", + new FieldAccessPostAggregator("sketch", "sketch"), + new double[]{0, 0.5, 1} + ), + new MomentSketchMinPostAggregator( + "min", + new FieldAccessPostAggregator("sketch", "sketch") + ), + new MomentSketchMaxPostAggregator( + "max", + new FieldAccessPostAggregator("sketch", "sketch") + ), + new MomentSketchQuantilePostAggregator( + "quantilesWithNulls", + new FieldAccessPostAggregator("sketchWithNulls", "sketchWithNulls"), + new double[]{0, 0.5, 1} + ), + new MomentSketchMinPostAggregator( + "minWithNulls", + new FieldAccessPostAggregator("sketchWithNulls", "sketchWithNulls") + ), + new MomentSketchMaxPostAggregator( + "maxWithNulls", + new FieldAccessPostAggregator("sketchWithNulls", "sketchWithNulls") + ) + ) + .setInterval(Intervals.of("2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z")) + .build() ); List results = seq.toList(); Assert.assertEquals(1, results.size()); @@ -187,24 +208,20 @@ public void buildingSketchesAtQueryTime() throws Exception DelimitedInputFormat.forColumns( List.of("timestamp", "sequenceNumber", "product", "value", "valueWithNulls") ), - "[{\"type\": \"doubleSum\", \"name\": \"value\", \"fieldName\": \"value\"}]", + List.of(new DoubleSumAggregatorFactory("value", "value")), 0, // minTimestamp Granularities.NONE, 10, // maxRowCount - String.join( - "\n", - "{", - " \"queryType\": \"groupBy\",", - " \"dataSource\": \"test_datasource\",", - " \"granularity\": \"ALL\",", - " \"dimensions\": [],", - " \"aggregations\": [", - " {\"type\": \"momentSketch\", \"name\": \"sketch\", \"fieldName\": \"value\", \"k\": 10},", - " {\"type\": \"momentSketch\", \"name\": \"sketchWithNulls\", \"fieldName\": \"valueWithNulls\", \"k\": 10}", - " ],", - " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", - "}" - ) + GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setDimensions(Collections.emptyList()) + .setAggregatorSpecs( + new MomentSketchAggregatorFactory("sketch", "value", 10, null), + new MomentSketchAggregatorFactory("sketchWithNulls", "valueWithNulls", 10, null) + ) + .setInterval(Intervals.of("2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z")) + .build() ); List results = seq.toList(); @@ -220,4 +237,3 @@ public void buildingSketchesAtQueryTime() throws Exception Assert.assertEquals(377.0, sketchObjectWithNulls.getPowerSums()[0], 1e-10); } } - diff --git a/extensions-contrib/spectator-histogram/src/test/java/org/apache/druid/spectator/histogram/SpectatorHistogramAggregatorTest.java b/extensions-contrib/spectator-histogram/src/test/java/org/apache/druid/spectator/histogram/SpectatorHistogramAggregatorTest.java index 157072653896..31aff8fb272e 100644 --- a/extensions-contrib/spectator-histogram/src/test/java/org/apache/druid/spectator/histogram/SpectatorHistogramAggregatorTest.java +++ b/extensions-contrib/spectator-histogram/src/test/java/org/apache/druid/spectator/histogram/SpectatorHistogramAggregatorTest.java @@ -44,6 +44,12 @@ import org.apache.druid.query.aggregation.AggregatorUtil; import org.apache.druid.query.aggregation.CountAggregatorFactory; import org.apache.druid.query.aggregation.DoubleSumAggregatorFactory; +import org.apache.druid.query.aggregation.LongSumAggregatorFactory; +import org.apache.druid.query.aggregation.post.FieldAccessPostAggregator; +import org.apache.druid.query.dimension.DefaultDimensionSpec; +import org.apache.druid.query.filter.AndDimFilter; +import org.apache.druid.query.filter.NotDimFilter; +import org.apache.druid.query.filter.SelectorDimFilter; import org.apache.druid.query.groupby.GroupByQuery; import org.apache.druid.query.groupby.GroupByQueryConfig; import org.apache.druid.query.groupby.GroupByQueryRunnerTest; @@ -54,6 +60,7 @@ import org.apache.druid.query.metadata.metadata.ColumnAnalysis; import org.apache.druid.query.metadata.metadata.SegmentAnalysis; import org.apache.druid.query.metadata.metadata.SegmentMetadataQuery; +import org.apache.druid.query.timeseries.TimeseriesQuery; import org.apache.druid.query.timeseries.TimeseriesResultValue; import org.apache.druid.segment.IncrementalIndexSegment; import org.apache.druid.segment.IndexBuilder; @@ -169,29 +176,17 @@ public void testBuildingHistogramQueryTime() throws Exception new File(this.getClass().getClassLoader().getResource("input_data.tsv").getFile()), INPUT_ROW_SCHEMA, INPUT_FORMAT, - String.join( - "\n", - "[", - " {\"type\": \"longSum\", \"name\": \"cost_sum\", \"fieldName\": \"cost\"}", - "]" - ), + List.of(new LongSumAggregatorFactory("cost_sum", "cost")), 0, // minTimestamp Granularities.NONE, 10, // maxRowCount - String.join( - "\n", - "{", - " \"queryType\": \"groupBy\",", - " \"dataSource\": \"test_datasource\",", - " \"granularity\": \"ALL\",", - " \"dimensions\": [\"product\"],", - " \"aggregations\": [", - " {\"type\": \"spectatorHistogram\", \"name\": \"cost_histogram\", \"fieldName\": " - + "\"cost_sum\"}", - " ],", - " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", - "}" - ) + GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setDimensions(new DefaultDimensionSpec("product", "product")) + .setAggregatorSpecs(new SpectatorHistogramAggregatorFactory("cost_histogram", "cost_sum")) + .setInterval("2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z") + .build() ); List results = seq.toList(); assertResultsMatch(results, 0, "A"); @@ -206,29 +201,16 @@ public void testBuildingAndMergingHistograms() throws Exception new File(this.getClass().getClassLoader().getResource("input_data.tsv").getFile()), INPUT_ROW_SCHEMA, INPUT_FORMAT, - String.join( - "\n", - "[", - " {\"type\": \"spectatorHistogram\", \"name\": \"histogram\", \"fieldName\": \"cost\"}", - "]" - ), + List.of(new SpectatorHistogramAggregatorFactory("histogram", "cost")), 0, // minTimestamp Granularities.NONE, 10, // maxRowCount - String.join( - "\n", - "{", - " \"queryType\": \"groupBy\",", - " \"dataSource\": \"test_datasource\",", - " \"granularity\": \"ALL\",", - " \"dimenions\": [],", - " \"aggregations\": [", - " {\"type\": \"spectatorHistogram\", \"name\": \"merged_cost_histogram\", \"fieldName\": " - + "\"histogram\"}", - " ],", - " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", - "}" - ) + GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setAggregatorSpecs(new SpectatorHistogramAggregatorFactory("merged_cost_histogram", "histogram")) + .setInterval("2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z") + .build() ); SpectatorHistogram expected = new SpectatorHistogram(); expected.add(PercentileBuckets.indexOf(10), 1L); @@ -245,32 +227,21 @@ public void testBuildingAndMergingHistograms() throws Exception @Test public void testBuildingAndMergingHistogramsTimeseriesQuery() throws Exception { - Object rawseq = timeSeriesHelper.createIndexAndRunQueryOnSegment( + TimeseriesQuery tsQuery = Druids.newTimeseriesQueryBuilder() + .dataSource("test_datasource") + .granularity(Granularities.ALL) + .aggregators(new SpectatorHistogramAggregatorFactory("merged_cost_histogram", "histogram")) + .intervals("2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z") + .build(); + Sequence> seq = timeSeriesHelper.createIndexAndRunQueryOnSegment( new File(this.getClass().getClassLoader().getResource("input_data.tsv").getFile()), INPUT_ROW_SCHEMA, INPUT_FORMAT, - String.join( - "\n", - "[", - " {\"type\": \"spectatorHistogram\", \"name\": \"histogram\", \"fieldName\": \"cost\"}", - "]" - ), + List.of(new SpectatorHistogramAggregatorFactory("histogram", "cost")), 0, // minTimestamp Granularities.NONE, 10, // maxRowCount - String.join( - "\n", - "{", - " \"queryType\": \"timeseries\",", - " \"dataSource\": \"test_datasource\",", - " \"granularity\": \"ALL\",", - " \"aggregations\": [", - " {\"type\": \"spectatorHistogram\", \"name\": \"merged_cost_histogram\", \"fieldName\": " - + "\"histogram\"}", - " ],", - " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", - "}" - ) + tsQuery ); SpectatorHistogram expected = new SpectatorHistogram(); expected.add(PercentileBuckets.indexOf(10), 1L); @@ -279,7 +250,6 @@ public void testBuildingAndMergingHistogramsTimeseriesQuery() throws Exception expected.add(PercentileBuckets.indexOf(50), 3L); expected.add(PercentileBuckets.indexOf(20000), 1L); - Sequence> seq = (Sequence>) rawseq; List> results = seq.toList(); Assert.assertEquals(1, results.size()); SpectatorHistogram value = (SpectatorHistogram) results.get(0).getValue().getMetric("merged_cost_histogram"); @@ -293,29 +263,17 @@ public void testBuildingAndMergingGroupbyHistograms() throws Exception new File(this.getClass().getClassLoader().getResource("input_data.tsv").getFile()), INPUT_ROW_SCHEMA, INPUT_FORMAT, - String.join( - "\n", - "[", - " {\"type\": \"spectatorHistogram\", \"name\": \"histogram\", \"fieldName\": \"cost\"}", - "]" - ), + List.of(new SpectatorHistogramAggregatorFactory("histogram", "cost")), 0, // minTimestamp Granularities.NONE, 10, // maxRowCount - String.join( - "\n", - "{", - " \"queryType\": \"groupBy\",", - " \"dataSource\": \"test_datasource\",", - " \"granularity\": \"ALL\",", - " \"dimensions\": [\"product\"],", - " \"aggregations\": [", - " {\"type\": \"spectatorHistogram\", \"name\": \"merged_histogram\", \"fieldName\": " - + "\"histogram\"}", - " ],", - " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", - "}" - ) + GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setDimensions(new DefaultDimensionSpec("product", "product")) + .setAggregatorSpecs(new SpectatorHistogramAggregatorFactory("merged_histogram", "histogram")) + .setInterval("2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z") + .build() ); List results = seq.toList(); @@ -348,31 +306,19 @@ public void testBuildingAndCountingHistograms() throws Exception new File(this.getClass().getClassLoader().getResource("input_data.tsv").getFile()), INPUT_ROW_SCHEMA, INPUT_FORMAT, - String.join( - "\n", - "[", - " {\"type\": \"spectatorHistogram\", \"name\": \"histogram\", \"fieldName\": \"cost\"}", - "]" - ), + List.of(new SpectatorHistogramAggregatorFactory("histogram", "cost")), 0, // minTimestamp Granularities.NONE, 10, // maxRowCount - String.join( - "\n", - "{", - " \"queryType\": \"groupBy\",", - " \"dataSource\": \"test_datasource\",", - " \"granularity\": \"ALL\",", - " \"dimenions\": [],", - " \"aggregations\": [", - " {\"type\": \"longSum\", \"name\": \"count_histogram\", \"fieldName\": " - + "\"histogram\"},", - " {\"type\": \"doubleSum\", \"name\": \"double_count_histogram\", \"fieldName\": " - + "\"histogram\"}", - " ],", - " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", - "}" - ) + GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setAggregatorSpecs( + new LongSumAggregatorFactory("count_histogram", "histogram"), + new DoubleSumAggregatorFactory("double_count_histogram", "histogram") + ) + .setInterval("2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z") + .build() ); List results = seq.toList(); @@ -390,52 +336,23 @@ public void testBuildingAndCountingHistogramsWithNullFilter() throws Exception new File(this.getClass().getClassLoader().getResource("input_data.tsv").getFile()), INPUT_ROW_SCHEMA, INPUT_FORMAT, - String.join( - "\n", - "[", - " {\"type\": \"spectatorHistogram\", \"name\": \"histogram\", \"fieldName\": \"cost\"}", - "]" - ), + List.of(new SpectatorHistogramAggregatorFactory("histogram", "cost")), 0, // minTimestamp Granularities.NONE, 10, // maxRowCount - String.join( - "\n", - "{", - " \"queryType\": \"groupBy\",", - " \"dataSource\": \"test_datasource\",", - " \"granularity\": \"ALL\",", - " \"dimenions\": [],", - " \"aggregations\": [", - " {\"type\": \"longSum\", \"name\": \"count_histogram\", \"fieldName\": " - + "\"histogram\"},", - " {\"type\": \"doubleSum\", \"name\": \"double_count_histogram\", \"fieldName\": " - + "\"histogram\"}", - " ],", - " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"],", - " \"filter\": {\n", - " \"fields\": [\n", - " {\n", - " \"field\": {\n", - " \"dimension\": \"histogram\",\n", - " \"value\": \"0\",\n", - " \"type\": \"selector\"\n", - " },\n", - " \"type\": \"not\"\n", - " },\n", - " {\n", - " \"field\": {\n", - " \"dimension\": \"histogram\",\n", - " \"value\": \"\",\n", - " \"type\": \"selector\"\n", - " },\n", - " \"type\": \"not\"\n", - " }\n", - " ],\n", - " \"type\": \"and\"\n", - " }", - "}" - ) + GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setAggregatorSpecs( + new LongSumAggregatorFactory("count_histogram", "histogram"), + new DoubleSumAggregatorFactory("double_count_histogram", "histogram") + ) + .setInterval("2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z") + .setDimFilter(new AndDimFilter( + new NotDimFilter(new SelectorDimFilter("histogram", "0", null)), + new NotDimFilter(new SelectorDimFilter("histogram", "", null)) + )) + .build() ); List results = seq.toList(); @@ -453,29 +370,16 @@ public void testIngestAsHistogramDistribution() throws Exception new File(this.getClass().getClassLoader().getResource("input_data.tsv").getFile()), INPUT_ROW_SCHEMA, INPUT_FORMAT, - String.join( - "\n", - "[", - " {\"type\": \"spectatorHistogramDistribution\", \"name\": \"histogram\", \"fieldName\": \"cost\"}", - "]" - ), + List.of(new SpectatorHistogramAggregatorFactory.Distribution("histogram", "cost")), 0, // minTimestamp Granularities.NONE, 10, // maxRowCount - String.join( - "\n", - "{", - " \"queryType\": \"groupBy\",", - " \"dataSource\": \"test_datasource\",", - " \"granularity\": \"ALL\",", - " \"dimenions\": [],", - " \"aggregations\": [", - " {\"type\": \"spectatorHistogram\", \"name\": \"merged_cost_histogram\", \"fieldName\": " - + "\"histogram\"}", - " ],", - " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", - "}" - ) + GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setAggregatorSpecs(new SpectatorHistogramAggregatorFactory("merged_cost_histogram", "histogram")) + .setInterval("2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z") + .build() ); SpectatorHistogram expected = new SpectatorHistogram(); expected.add(PercentileBuckets.indexOf(10), 1L); @@ -496,29 +400,16 @@ public void testIngestHistogramsTimer() throws Exception new File(this.getClass().getClassLoader().getResource("input_data.tsv").getFile()), INPUT_ROW_SCHEMA, INPUT_FORMAT, - String.join( - "\n", - "[", - " {\"type\": \"spectatorHistogramTimer\", \"name\": \"histogram\", \"fieldName\": \"cost\"}", - "]" - ), + List.of(new SpectatorHistogramAggregatorFactory.Timer("histogram", "cost")), 0, // minTimestamp Granularities.NONE, 10, // maxRowCount - String.join( - "\n", - "{", - " \"queryType\": \"groupBy\",", - " \"dataSource\": \"test_datasource\",", - " \"granularity\": \"ALL\",", - " \"dimenions\": [],", - " \"aggregations\": [", - " {\"type\": \"spectatorHistogram\", \"name\": \"merged_cost_histogram\", \"fieldName\": " - + "\"histogram\"}", - " ],", - " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", - "}" - ) + GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setAggregatorSpecs(new SpectatorHistogramAggregatorFactory("merged_cost_histogram", "histogram")) + .setInterval("2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z") + .build() ); SpectatorHistogram expected = new SpectatorHistogram(); expected.add(PercentileBuckets.indexOf(10), 1L); @@ -535,32 +426,21 @@ public void testIngestHistogramsTimer() throws Exception @Test public void testIngestingPreaggregatedHistograms() throws Exception { - Object rawseq = timeSeriesHelper.createIndexAndRunQueryOnSegment( + TimeseriesQuery preAggTsQuery = Druids.newTimeseriesQueryBuilder() + .dataSource("test_datasource") + .granularity(Granularities.ALL) + .aggregators(new SpectatorHistogramAggregatorFactory("merged_cost_histogram", "histogram")) + .intervals("2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z") + .build(); + Sequence> seq = timeSeriesHelper.createIndexAndRunQueryOnSegment( new File(this.getClass().getClassLoader().getResource("pre_agg_data.tsv").getFile()), INPUT_ROW_SCHEMA, INPUT_FORMAT, - String.join( - "\n", - "[", - " {\"type\": \"spectatorHistogram\", \"name\": \"histogram\", \"fieldName\": \"cost\"}", - "]" - ), + List.of(new SpectatorHistogramAggregatorFactory("histogram", "cost")), 0, // minTimestamp Granularities.NONE, 10, // maxRowCount - String.join( - "\n", - "{", - " \"queryType\": \"timeseries\",", - " \"dataSource\": \"test_datasource\",", - " \"granularity\": \"ALL\",", - " \"aggregations\": [", - " {\"type\": \"spectatorHistogram\", \"name\": \"merged_cost_histogram\", \"fieldName\": " - + "\"histogram\"}", - " ],", - " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", - "}" - ) + preAggTsQuery ); SpectatorHistogram expected = new SpectatorHistogram(); expected.add(PercentileBuckets.indexOf(10), 1L); @@ -569,7 +449,6 @@ public void testIngestingPreaggregatedHistograms() throws Exception expected.add(PercentileBuckets.indexOf(50), 3L); expected.add(PercentileBuckets.indexOf(20000), 1L); - Sequence> seq = (Sequence>) rawseq; List> results = seq.toList(); Assert.assertEquals(1, results.size()); SpectatorHistogram value = (SpectatorHistogram) results.get(0).getValue().getMetric("merged_cost_histogram"); @@ -584,17 +463,11 @@ public void testMetadataQueryTimer() throws Exception new File(this.getClass().getClassLoader().getResource("input_data.tsv").getFile()), INPUT_ROW_SCHEMA, INPUT_FORMAT, - String.join( - "\n", - "[", - " {\"type\": \"spectatorHistogramTimer\", \"name\": \"histogram\", \"fieldName\": \"cost\"}", - "]" - ), + List.of(new SpectatorHistogramAggregatorFactory.Timer("histogram", "cost")), segmentDir, 0, // minTimestamp Granularities.NONE, - 10, // maxRowCount - true + 10 // maxRowCount ); ObjectMapper mapper = (ObjectMapper) TestHelper.makeJsonMapper(); @@ -636,17 +509,11 @@ public void testMetadataQueryDistribution() throws Exception new File(this.getClass().getClassLoader().getResource("input_data.tsv").getFile()), INPUT_ROW_SCHEMA, INPUT_FORMAT, - String.join( - "\n", - "[", - " {\"type\": \"spectatorHistogramDistribution\", \"name\": \"histogram\", \"fieldName\": \"cost\"}", - "]" - ), + List.of(new SpectatorHistogramAggregatorFactory.Distribution("histogram", "cost")), segmentDir, 0, // minTimestamp Granularities.NONE, - 10, // maxRowCount - true + 10 // maxRowCount ); ObjectMapper mapper = (ObjectMapper) TestHelper.makeJsonMapper(); @@ -687,35 +554,28 @@ public void testPercentilePostAggregator() throws Exception new File(this.getClass().getClassLoader().getResource("input_data.tsv").getFile()), INPUT_ROW_SCHEMA, INPUT_FORMAT, - String.join( - "\n", - "[", - " {\"type\": \"spectatorHistogram\", \"name\": \"histogram\", \"fieldName\": \"cost\"}", - "]" - ), + List.of(new SpectatorHistogramAggregatorFactory("histogram", "cost")), 0, // minTimestamp Granularities.NONE, 10, // maxRowCount - String.join( - "\n", - "{", - " \"queryType\": \"groupBy\",", - " \"dataSource\": \"test_datasource\",", - " \"granularity\": \"ALL\",", - " \"dimenions\": [],", - " \"aggregations\": [", - " {\"type\": \"spectatorHistogram\", \"name\": \"merged_cost_histogram\", \"fieldName\": " - + "\"histogram\"}", - " ],", - " \"postAggregations\": [", - " {\"type\": \"percentileSpectatorHistogram\", \"name\": \"percentileValue\", \"field\": {\"type\": \"fieldAccess\",\"fieldName\": \"merged_cost_histogram\"}" - + ", \"percentile\": \"50.0\"},", - " {\"type\": \"percentilesSpectatorHistogram\", \"name\": \"percentileValues\", \"field\": {\"type\": \"fieldAccess\",\"fieldName\": \"merged_cost_histogram\"}" - + ", \"percentiles\": [25.0, 50.0, 75.0, 99.0]}", - " ],", - " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", - "}" - ) + GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setAggregatorSpecs(new SpectatorHistogramAggregatorFactory("merged_cost_histogram", "histogram")) + .setPostAggregatorSpecs( + new SpectatorHistogramPercentilePostAggregator( + "percentileValue", + new FieldAccessPostAggregator(null, "merged_cost_histogram"), + 50.0 + ), + new SpectatorHistogramPercentilesPostAggregator( + "percentileValues", + new FieldAccessPostAggregator(null, "merged_cost_histogram"), + new double[]{25.0, 50.0, 75.0, 99.0} + ) + ) + .setInterval("2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z") + .build() ); SpectatorHistogram expected = new SpectatorHistogram(); expected.add(PercentileBuckets.indexOf(10), 1L); @@ -806,33 +666,24 @@ public void testPercentilePostAggregatorWithNullSketch() throws Exception new File(this.getClass().getClassLoader().getResource("input_data.tsv").getFile()), INPUT_ROW_SCHEMA, INPUT_FORMAT, - String.join( - "\n", - "[", - " {\"type\": \"spectatorHistogram\", \"name\": \"histogram\", \"fieldName\": \"cost\"}", - "]" - ), + List.of(new SpectatorHistogramAggregatorFactory("histogram", "cost")), 0, // minTimestamp Granularities.NONE, 10, // maxRowCount - String.join( - "\n", - "{", - " \"queryType\": \"groupBy\",", - " \"dataSource\": \"test_datasource\",", - " \"granularity\": \"ALL\",", - " \"dimensions\": [\"product\"],", - " \"aggregations\": [", - " {\"type\": \"spectatorHistogram\", \"name\": \"merged_histogram\", \"fieldName\": " - + "\"histogram\"}", - " ],", - " \"postAggregations\": [", - " {\"type\": \"percentileSpectatorHistogram\", \"name\": \"p50\", \"field\": {\"type\": \"fieldAccess\",\"fieldName\": \"merged_histogram\"}" - + ", \"percentile\": \"50.0\"}", - " ],", - " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", - "}" - ) + GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setDimensions(new DefaultDimensionSpec("product", "product")) + .setAggregatorSpecs(new SpectatorHistogramAggregatorFactory("merged_histogram", "histogram")) + .setPostAggregatorSpecs( + new SpectatorHistogramPercentilePostAggregator( + "p50", + new FieldAccessPostAggregator(null, "merged_histogram"), + 50.0 + ) + ) + .setInterval("2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z") + .build() ); List results = seq.toList(); @@ -856,33 +707,24 @@ public void testPercentilesPostAggregatorWithNullSketch() throws Exception new File(this.getClass().getClassLoader().getResource("input_data.tsv").getFile()), INPUT_ROW_SCHEMA, INPUT_FORMAT, - String.join( - "\n", - "[", - " {\"type\": \"spectatorHistogram\", \"name\": \"histogram\", \"fieldName\": \"cost\"}", - "]" - ), + List.of(new SpectatorHistogramAggregatorFactory("histogram", "cost")), 0, // minTimestamp Granularities.NONE, 10, // maxRowCount - String.join( - "\n", - "{", - " \"queryType\": \"groupBy\",", - " \"dataSource\": \"test_datasource\",", - " \"granularity\": \"ALL\",", - " \"dimensions\": [\"product\"],", - " \"aggregations\": [", - " {\"type\": \"spectatorHistogram\", \"name\": \"merged_histogram\", \"fieldName\": " - + "\"histogram\"}", - " ],", - " \"postAggregations\": [", - " {\"type\": \"percentilesSpectatorHistogram\", \"name\": \"percentiles\", \"field\": {\"type\": \"fieldAccess\",\"fieldName\": \"merged_histogram\"}" - + ", \"percentiles\": [25.0, 50.0, 75.0]}", - " ],", - " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", - "}" - ) + GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setDimensions(new DefaultDimensionSpec("product", "product")) + .setAggregatorSpecs(new SpectatorHistogramAggregatorFactory("merged_histogram", "histogram")) + .setPostAggregatorSpecs( + new SpectatorHistogramPercentilesPostAggregator( + "percentiles", + new FieldAccessPostAggregator(null, "merged_histogram"), + new double[]{25.0, 50.0, 75.0} + ) + ) + .setInterval("2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z") + .build() ); List results = seq.toList(); @@ -909,32 +751,22 @@ public void testCountPostAggregator() throws Exception new File(this.getClass().getClassLoader().getResource("input_data.tsv").getFile()), INPUT_ROW_SCHEMA, INPUT_FORMAT, - String.join( - "\n", - "[", - " {\"type\": \"spectatorHistogram\", \"name\": \"histogram\", \"fieldName\": \"cost\"}", - "]" - ), + List.of(new SpectatorHistogramAggregatorFactory("histogram", "cost")), 0, Granularities.NONE, 10, - String.join( - "\n", - "{", - " \"queryType\": \"groupBy\",", - " \"dataSource\": \"test_datasource\",", - " \"granularity\": \"ALL\",", - " \"dimenions\": [],", - " \"aggregations\": [", - " {\"type\": \"spectatorHistogram\", \"name\": \"merged_cost_histogram\", \"fieldName\": " - + "\"histogram\"}", - " ],", - " \"postAggregations\": [", - " {\"type\": \"countSpectatorHistogram\", \"name\": \"count\", \"field\": {\"type\": \"fieldAccess\",\"fieldName\": \"merged_cost_histogram\"}}", - " ],", - " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", - "}" - ) + GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setAggregatorSpecs(new SpectatorHistogramAggregatorFactory("merged_cost_histogram", "histogram")) + .setPostAggregatorSpecs( + new SpectatorHistogramCountPostAggregator( + "count", + new FieldAccessPostAggregator(null, "merged_cost_histogram") + ) + ) + .setInterval("2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z") + .build() ); List results = seq.toList(); @@ -950,32 +782,23 @@ public void testCountPostAggregatorWithNullSketch() throws Exception new File(this.getClass().getClassLoader().getResource("input_data.tsv").getFile()), INPUT_ROW_SCHEMA, INPUT_FORMAT, - String.join( - "\n", - "[", - " {\"type\": \"spectatorHistogram\", \"name\": \"histogram\", \"fieldName\": \"cost\"}", - "]" - ), + List.of(new SpectatorHistogramAggregatorFactory("histogram", "cost")), 0, Granularities.NONE, 10, - String.join( - "\n", - "{", - " \"queryType\": \"groupBy\",", - " \"dataSource\": \"test_datasource\",", - " \"granularity\": \"ALL\",", - " \"dimensions\": [\"product\"],", - " \"aggregations\": [", - " {\"type\": \"spectatorHistogram\", \"name\": \"merged_histogram\", \"fieldName\": " - + "\"histogram\"}", - " ],", - " \"postAggregations\": [", - " {\"type\": \"countSpectatorHistogram\", \"name\": \"count\", \"field\": {\"type\": \"fieldAccess\",\"fieldName\": \"merged_histogram\"}}", - " ],", - " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", - "}" - ) + GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setDimensions(new DefaultDimensionSpec("product", "product")) + .setAggregatorSpecs(new SpectatorHistogramAggregatorFactory("merged_histogram", "histogram")) + .setPostAggregatorSpecs( + new SpectatorHistogramCountPostAggregator( + "count", + new FieldAccessPostAggregator(null, "merged_histogram") + ) + ) + .setInterval("2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z") + .build() ); List results = seq.toList(); diff --git a/extensions-contrib/tdigestsketch/src/test/java/org/apache/druid/query/aggregation/tdigestsketch/TDigestSketchAggregatorTest.java b/extensions-contrib/tdigestsketch/src/test/java/org/apache/druid/query/aggregation/tdigestsketch/TDigestSketchAggregatorTest.java index 24c8143404c2..43927bae9a99 100644 --- a/extensions-contrib/tdigestsketch/src/test/java/org/apache/druid/query/aggregation/tdigestsketch/TDigestSketchAggregatorTest.java +++ b/extensions-contrib/tdigestsketch/src/test/java/org/apache/druid/query/aggregation/tdigestsketch/TDigestSketchAggregatorTest.java @@ -26,10 +26,14 @@ import org.apache.druid.data.input.impl.DimensionsSpec; import org.apache.druid.data.input.impl.TimestampSpec; import org.apache.druid.jackson.DefaultObjectMapper; +import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.guava.Sequence; import org.apache.druid.query.aggregation.AggregationTestHelper; import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.DoubleSumAggregatorFactory; +import org.apache.druid.query.aggregation.post.FieldAccessPostAggregator; +import org.apache.druid.query.groupby.GroupByQuery; import org.apache.druid.query.groupby.GroupByQueryConfig; import org.apache.druid.query.groupby.GroupByQueryRunnerTest; import org.apache.druid.query.groupby.ResultRow; @@ -44,6 +48,7 @@ import java.io.File; import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; import java.util.List; @RunWith(Parameterized.class) @@ -104,29 +109,24 @@ public void buildingSketchesAtIngestionTime() throws Exception DelimitedInputFormat.forColumns( List.of("timestamp", "sequenceNumber", "product", "value") ), - "[{\"type\": \"tDigestSketch\", \"name\": \"sketch\", \"fieldName\": \"value\", \"compression\": 200}]", + List.of(new TDigestSketchAggregatorFactory("sketch", "value", 200)), 0, // minTimestamp Granularities.NONE, 10, // maxRowCount - String.join( - "\n", - "{", - " \"queryType\": \"groupBy\",", - " \"dataSource\": \"test_datasource\",", - " \"granularity\": \"ALL\",", - " \"dimensions\": [],", - " \"aggregations\": [", - " {\"type\": \"tDigestSketch\", \"name\": \"merged_sketch\", \"fieldName\": \"sketch\", " - + "\"compression\": " - + "200}", - " ],", - " \"postAggregations\": [", - " {\"type\": \"quantilesFromTDigestSketch\", \"name\": \"quantiles\", \"fractions\": [0, 0.5, 1], " - + "\"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"merged_sketch\"}}", - " ],", - " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", - "}" - ) + GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setDimensions(Collections.emptyList()) + .setAggregatorSpecs(new TDigestSketchAggregatorFactory("merged_sketch", "sketch", 200)) + .setPostAggregatorSpecs( + new TDigestSketchToQuantilesPostAggregator( + "quantiles", + new FieldAccessPostAggregator("merged_sketch", "merged_sketch"), + new double[]{0, 0.5, 1} + ) + ) + .setInterval(Intervals.of("2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z")) + .build() ); List results = seq.toList(); Assert.assertEquals(1, results.size()); @@ -154,26 +154,24 @@ public void buildingSketchesAtQueryTime() throws Exception DelimitedInputFormat.forColumns( List.of("timestamp", "sequenceNumber", "product", "value") ), - "[{\"type\": \"doubleSum\", \"name\": \"value\", \"fieldName\": \"value\"}]", + List.of(new DoubleSumAggregatorFactory("value", "value")), 0, // minTimestamp Granularities.NONE, 10, // maxRowCount - String.join( - "\n", - "{", - " \"queryType\": \"groupBy\",", - " \"dataSource\": \"test_datasource\",", - " \"granularity\": \"ALL\",", - " \"dimensions\": [],", - " \"aggregations\": [", - " {\"type\": \"tDigestSketch\", \"name\": \"sketch\", \"fieldName\": \"value\", \"compression\": 200}", - " ],", - " \"postAggregations\": [", - " {\"type\": \"quantilesFromTDigestSketch\", \"name\": \"quantiles\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}}", - " ],", - " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", - "}" - ) + GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setDimensions(Collections.emptyList()) + .setAggregatorSpecs(new TDigestSketchAggregatorFactory("sketch", "value", 200)) + .setPostAggregatorSpecs( + new TDigestSketchToQuantilesPostAggregator( + "quantiles", + new FieldAccessPostAggregator("sketch", "sketch"), + new double[]{0, 0.5, 1} + ) + ) + .setInterval(Intervals.of("2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z")) + .build() ); List results = seq.toList(); Assert.assertEquals(1, results.size()); @@ -202,35 +200,26 @@ public void testIngestingSketches() throws Exception DelimitedInputFormat.forColumns( List.of("timestamp", "product", "sketch") ), - String.join( - "\n", - "[", - " {\"type\": \"tDigestSketch\", \"name\": \"first_level_merge_sketch\", \"fieldName\": \"sketch\", " - + "\"compression\": " - + "200}", - "]" - ), + List.of(new TDigestSketchAggregatorFactory("first_level_merge_sketch", "sketch", 200)), 0, // minTimestamp Granularities.NONE, 10, // maxRowCount - String.join( - "\n", - "{", - " \"queryType\": \"groupBy\",", - " \"dataSource\": \"test_datasource\",", - " \"granularity\": \"ALL\",", - " \"dimensions\": [],", - " \"aggregations\": [", - " {\"type\": \"tDigestSketch\", \"name\": \"second_level_merge_sketch\", \"fieldName\": " - + "\"first_level_merge_sketch\", \"compression\": " - + "200}", - " ],", - " \"postAggregations\": [", - " {\"type\": \"quantilesFromTDigestSketch\", \"name\": \"quantiles\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"second_level_merge_sketch\"}}", - " ],", - " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", - "}" - ) + GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setDimensions(Collections.emptyList()) + .setAggregatorSpecs( + new TDigestSketchAggregatorFactory("second_level_merge_sketch", "first_level_merge_sketch", 200) + ) + .setPostAggregatorSpecs( + new TDigestSketchToQuantilesPostAggregator( + "quantiles", + new FieldAccessPostAggregator("second_level_merge_sketch", "second_level_merge_sketch"), + new double[]{0, 0.5, 1} + ) + ) + .setInterval(Intervals.of("2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z")) + .build() ); List results = seq.toList(); Assert.assertEquals(1, results.size()); diff --git a/extensions-contrib/time-min-max/src/test/java/org/apache/druid/query/aggregation/TimestampGroupByAggregationTest.java b/extensions-contrib/time-min-max/src/test/java/org/apache/druid/query/aggregation/TimestampGroupByAggregationTest.java index 0b63aa41c782..8aa47963fd71 100644 --- a/extensions-contrib/time-min-max/src/test/java/org/apache/druid/query/aggregation/TimestampGroupByAggregationTest.java +++ b/extensions-contrib/time-min-max/src/test/java/org/apache/druid/query/aggregation/TimestampGroupByAggregationTest.java @@ -29,6 +29,7 @@ import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.guava.Sequence; +import org.apache.druid.query.dimension.DefaultDimensionSpec; import org.apache.druid.query.groupby.GroupByQuery; import org.apache.druid.query.groupby.GroupByQueryConfig; import org.apache.druid.query.groupby.GroupByQueryRunnerTest; @@ -128,32 +129,26 @@ public void teardown() throws IOException helper.close(); } + private AggregatorFactory makeTimestampAggregator(String name, String fieldName) + { + return "timeMin".equals(aggType) + ? new TimestampMinAggregatorFactory(name, fieldName, null) + : new TimestampMaxAggregatorFactory(name, fieldName, null); + } + @Test public void testSimpleDataIngestionAndGroupByTest() throws Exception { - String aggregator = "[\n" + - " {\n" + - " \"type\": \"" + aggType + "\",\n" + - " \"name\": \"" + aggField + "\",\n" + - " \"fieldName\": \"timestamp\"\n" + - " }\n" + - "]"; - String groupBy = "{\n" + - " \"queryType\": \"groupBy\",\n" + - " \"dataSource\": \"test_datasource\",\n" + - " \"granularity\": \"MONTH\",\n" + - " \"dimensions\": [\"product\"],\n" + - " \"aggregations\": [\n" + - " {\n" + - " \"type\": \"" + aggType + "\",\n" + - " \"name\": \"" + groupByField + "\",\n" + - " \"fieldName\": \"" + aggField + "\"\n" + - " }\n" + - " ],\n" + - " \"intervals\": [\n" + - " \"2011-01-01T00:00:00.000Z/2011-05-01T00:00:00.000Z\"\n" + - " ]\n" + - "}"; + List aggregators = List.of(makeTimestampAggregator(aggField, "timestamp")); + + GroupByQuery groupByQuery = GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.MONTH) + .setDimensions(new DefaultDimensionSpec("product", "product")) + .setAggregatorSpecs(makeTimestampAggregator(groupByField, aggField)) + .setInterval("2011-01-01T00:00:00.000Z/2011-05-01T00:00:00.000Z") + .build(); + ZipFile zip = new ZipFile(new File(this.getClass().getClassLoader().getResource("druid.sample.tsv.zip").toURI())); Sequence seq = helper.createIndexAndRunQueryOnSegment( zip.getInputStream(zip.getEntry("druid.sample.tsv")), @@ -165,15 +160,14 @@ public void testSimpleDataIngestionAndGroupByTest() throws Exception DelimitedInputFormat.forColumns( List.of("timestamp", "cat", "product", "prefer", "prefer2", "pty_country") ), - aggregator, + aggregators, 0, Granularities.MONTH, 100, - groupBy + groupByQuery ); - int groupByFieldNumber = ((GroupByQuery) helper.readQuery(groupBy)).getResultRowSignature() - .indexOf(groupByField); + int groupByFieldNumber = groupByQuery.getResultRowSignature().indexOf(groupByField); List results = seq.toList(); Assert.assertEquals(36, results.size()); diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java index c29af60ff86a..10dfa22d329c 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java @@ -19,8 +19,6 @@ package org.apache.druid.query.aggregation.datasketches.hll; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import org.apache.datasketches.hll.HllSketch; @@ -33,11 +31,13 @@ import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.StringEncoding; import org.apache.druid.java.util.common.granularity.Granularities; -import org.apache.druid.java.util.common.granularity.Granularity; import org.apache.druid.java.util.common.guava.Sequence; +import org.apache.druid.query.Druids; import org.apache.druid.query.QueryContexts; import org.apache.druid.query.Result; import org.apache.druid.query.aggregation.AggregationTestHelper; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory; import org.apache.druid.query.aggregation.post.FieldAccessPostAggregator; import org.apache.druid.query.groupby.GroupByQuery; import org.apache.druid.query.groupby.GroupByQueryConfig; @@ -45,6 +45,7 @@ import org.apache.druid.query.groupby.ResultRow; import org.apache.druid.query.groupby.epinephelinae.GroupByTestColumnSelectorFactory; import org.apache.druid.query.groupby.epinephelinae.GrouperTestUtil; +import org.apache.druid.query.timeseries.TimeseriesQuery; import org.apache.druid.query.timeseries.TimeseriesResultValue; import org.apache.druid.testing.InitializedNullHandlingTest; import org.junit.Assert; @@ -58,9 +59,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; -import java.util.Collections; import java.util.List; -import java.util.Map; @RunWith(Parameterized.class) public class HllSketchAggregatorTest extends InitializedNullHandlingTest @@ -114,11 +113,11 @@ public void ingestSketches() throws Exception new File(this.getClass().getClassLoader().getResource("hll/hll_sketches.tsv").getFile()), buildInputRowSchema(List.of("dim", "multiDim")), buildInputFormat(List.of("timestamp", "dim", "multiDim", "sketch")), - buildAggregatorJson("HLLSketchMerge", "sketch", !ROUND, stringEncoding), + buildMergeAggregatorList("sketch", !ROUND, stringEncoding), 0, // minTimestamp Granularities.NONE, 200, // maxRowCount - buildGroupByQueryJson("HLLSketchMerge", "sketch", !ROUND, stringEncoding) + buildGroupByQuery("HLLSketchMerge", "sketch", !ROUND, stringEncoding) ); List results = seq.toList(); Assert.assertEquals(1, results.size()); @@ -132,12 +131,10 @@ public void ingestSketchesTimeseries() throws Exception final File inputFile = new File(this.getClass().getClassLoader().getResource("hll/hll_sketches.tsv").getFile()); final InputRowSchema inputRowSchema = buildInputRowSchema(List.of("dim", "multiDim")); final DelimitedInputFormat inputFormat = buildInputFormat(List.of("timestamp", "dim", "multiDim", "sketch")); - final String aggregators = - buildAggregatorJson("HLLSketchMerge", "sketch", !ROUND, HllSketchAggregatorFactory.DEFAULT_STRING_ENCODING); + final List aggregators = + buildMergeAggregatorList("sketch", !ROUND, HllSketchAggregatorFactory.DEFAULT_STRING_ENCODING); final int minTimestamp = 0; - final Granularity gran = Granularities.NONE; final int maxRowCount = 10; - final String queryJson = buildTimeseriesQueryJson("HLLSketchMerge", "sketch", !ROUND); File segmentDir1 = timeseriesFolder.newFolder(); timeseriesHelper.createIndex( @@ -147,7 +144,7 @@ public void ingestSketchesTimeseries() throws Exception aggregators, segmentDir1, minTimestamp, - gran, + Granularities.NONE, maxRowCount, true ); @@ -160,16 +157,19 @@ public void ingestSketchesTimeseries() throws Exception aggregators, segmentDir2, minTimestamp, - gran, + Granularities.NONE, maxRowCount, true ); - Sequence seq = timeseriesHelper.runQueryOnSegments(Arrays.asList(segmentDir1, segmentDir2), queryJson); - List results = seq.toList(); + Sequence> seq = timeseriesHelper.runQueryOnSegments( + Arrays.asList(segmentDir1, segmentDir2), + buildTimeseriesQuery("HLLSketchMerge", "sketch", !ROUND) + ); + List> results = seq.toList(); Assert.assertEquals(1, results.size()); - Result row = results.get(0); - Assert.assertEquals(200, (double) ((TimeseriesResultValue) row.getValue()).getMetric("sketch"), 0.1); + Result row = results.get(0); + Assert.assertEquals(200, (double) row.getValue().getMetric("sketch"), 0.1); } @Test @@ -179,11 +179,11 @@ public void buildSketchesAtIngestionTime() throws Exception new File(this.getClass().getClassLoader().getResource("hll/hll_raw.tsv").getFile()), buildInputRowSchema(List.of("dim")), buildInputFormat(List.of("timestamp", "dim", "multiDim", "id")), - buildAggregatorJson("HLLSketchBuild", "id", !ROUND, stringEncoding), + buildBuildAggregatorList("id", !ROUND, stringEncoding), 0, // minTimestamp Granularities.NONE, 200, // maxRowCount - buildGroupByQueryJson("HLLSketchMerge", "sketch", !ROUND, stringEncoding) + buildGroupByQuery("HLLSketchMerge", "sketch", !ROUND, stringEncoding) ); List results = seq.toList(); Assert.assertEquals(1, results.size()); @@ -194,20 +194,20 @@ public void buildSketchesAtIngestionTime() throws Exception @Test public void buildSketchesAtIngestionTimeTimeseries() throws Exception { - Sequence seq = timeseriesHelper.createIndexAndRunQueryOnSegment( + Sequence> seq = timeseriesHelper.createIndexAndRunQueryOnSegment( new File(this.getClass().getClassLoader().getResource("hll/hll_raw.tsv").getFile()), buildInputRowSchema(List.of("dim")), buildInputFormat(List.of("timestamp", "dim", "multiDim", "id")), - buildAggregatorJson("HLLSketchBuild", "id", !ROUND, stringEncoding), + buildBuildAggregatorList("id", !ROUND, stringEncoding), 0, // minTimestamp Granularities.NONE, 200, // maxRowCount - buildTimeseriesQueryJson("HLLSketchMerge", "sketch", !ROUND) + buildTimeseriesQuery("HLLSketchMerge", "sketch", !ROUND) ); - List results = seq.toList(); + List> results = seq.toList(); Assert.assertEquals(1, results.size()); - Result row = results.get(0); - Assert.assertEquals(200, (double) ((TimeseriesResultValue) row.getValue()).getMetric("sketch"), 0.1); + Result row = results.get(0); + Assert.assertEquals(200, (double) row.getValue().getMetric("sketch"), 0.1); } @Test @@ -217,11 +217,11 @@ public void buildSketchesAtQueryTime() throws Exception new File(this.getClass().getClassLoader().getResource("hll/hll_raw.tsv").getFile()), buildInputRowSchema(List.of("dim", "multiDim", "id")), buildInputFormat(List.of("timestamp", "dim", "multiDim", "id")), - "[]", + List.of(), 0, // minTimestamp Granularities.NONE, 200, // maxRowCount - buildGroupByQueryJson("HLLSketchBuild", "id", !ROUND, stringEncoding) + buildGroupByQuery("HLLSketchBuild", "id", !ROUND, stringEncoding) ); List results = seq.toList(); Assert.assertEquals(1, results.size()); @@ -232,40 +232,35 @@ public void buildSketchesAtQueryTime() throws Exception @Test public void buildSketchesAtQueryTimeTimeseries() throws Exception { - Sequence seq = timeseriesHelper.createIndexAndRunQueryOnSegment( + Sequence> seq = timeseriesHelper.createIndexAndRunQueryOnSegment( new File(this.getClass().getClassLoader().getResource("hll/hll_raw.tsv").getFile()), buildInputRowSchema(List.of("dim", "multiDim", "id")), buildInputFormat(List.of("timestamp", "dim", "multiDim", "id")), - "[]", + List.of(), 0, // minTimestamp Granularities.NONE, 200, // maxRowCount - buildTimeseriesQueryJson("HLLSketchBuild", "id", !ROUND) + buildTimeseriesQuery("HLLSketchBuild", "id", !ROUND) ); - List results = seq.toList(); + List> results = seq.toList(); Assert.assertEquals(1, results.size()); - Result row = results.get(0); - Assert.assertEquals(200, (double) ((TimeseriesResultValue) row.getValue()).getMetric("sketch"), 0.1); + Result row = results.get(0); + Assert.assertEquals(200, (double) row.getValue().getMetric("sketch"), 0.1); } @Test public void unsuccessfulComplexTypesInHLL() throws Exception { - String metricSpec = "[{" - + "\"type\": \"hyperUnique\"," - + "\"name\": \"index_hll\"," - + "\"fieldName\": \"id\"" - + "}]"; try { Sequence seq = groupByHelper.createIndexAndRunQueryOnSegment( new File(this.getClass().getClassLoader().getResource("hll/hll_sketches.tsv").getFile()), buildInputRowSchema(List.of("dim", "multiDim", "id")), buildInputFormat(List.of("timestamp", "dim", "multiDim", "id")), - metricSpec, + List.of(new HyperUniquesAggregatorFactory("index_hll", "id")), 0, // minTimestamp Granularities.NONE, 200, // maxRowCount - buildGroupByQueryJson("HLLSketchMerge", "sketch", ROUND, stringEncoding) + buildGroupByQuery("HLLSketchMerge", "sketch", ROUND, stringEncoding) ); } catch (RuntimeException e) { @@ -281,11 +276,11 @@ public void buildSketchesAtQueryTimeMultiValue() throws Exception new File(this.getClass().getClassLoader().getResource("hll/hll_raw.tsv").getFile()), buildInputRowSchema(List.of("dim", "multiDim", "id")), buildInputFormat(List.of("timestamp", "dim", "multiDim", "id")), - "[]", + List.of(), 0, // minTimestamp Granularities.NONE, 200, // maxRowCount - buildGroupByQueryJson("HLLSketchBuild", "multiDim", !ROUND, stringEncoding) + buildGroupByQuery("HLLSketchBuild", "multiDim", !ROUND, stringEncoding) ); List results = seq.toList(); Assert.assertEquals(1, results.size()); @@ -300,11 +295,11 @@ public void roundBuildSketch() throws Exception new File(this.getClass().getClassLoader().getResource("hll/hll_raw.tsv").getFile()), buildInputRowSchema(List.of("dim", "multiDim", "id")), buildInputFormat(List.of("timestamp", "dim", "multiDim", "id")), - "[]", + List.of(), 0, // minTimestamp Granularities.NONE, 200, // maxRowCount - buildGroupByQueryJson("HLLSketchBuild", "id", ROUND, stringEncoding) + buildGroupByQuery("HLLSketchBuild", "id", ROUND, stringEncoding) ); List results = seq.toList(); Assert.assertEquals(1, results.size()); @@ -319,11 +314,11 @@ public void roundMergeSketch() throws Exception new File(this.getClass().getClassLoader().getResource("hll/hll_sketches.tsv").getFile()), buildInputRowSchema(List.of("dim", "multiDim")), buildInputFormat(List.of("timestamp", "dim", "multiDim", "sketch")), - buildAggregatorJson("HLLSketchMerge", "sketch", ROUND, stringEncoding), + buildMergeAggregatorList("sketch", ROUND, stringEncoding), 0, // minTimestamp Granularities.NONE, 200, // maxRowCount - buildGroupByQueryJson("HLLSketchMerge", "sketch", ROUND, stringEncoding) + buildGroupByQuery("HLLSketchMerge", "sketch", ROUND, stringEncoding) ); List results = seq.toList(); Assert.assertEquals(1, results.size()); @@ -338,51 +333,49 @@ public void testPostAggs() throws Exception new File(this.getClass().getClassLoader().getResource("hll/hll_sketches.tsv").getFile()), buildInputRowSchema(List.of("dim", "multiDim")), buildInputFormat(List.of("timestamp", "dim", "multiDim", "sketch")), - buildAggregatorJson("HLLSketchMerge", "sketch", ROUND, stringEncoding), + buildMergeAggregatorList("sketch", ROUND, stringEncoding), 0, // minTimestamp Granularities.NONE, 200, // maxRowCount - groupByHelper.getObjectMapper().writeValueAsString( - GroupByQuery.builder() - .setDataSource("test_datasource") - .setGranularity(Granularities.ALL) - .setInterval(Intervals.ETERNITY) - .setAggregatorSpecs( - new HllSketchMergeAggregatorFactory("sketch", "sketch", null, null, null, null, false) - ) - .setPostAggregatorSpecs( - ImmutableList.of( - new HllSketchToEstimatePostAggregator( - "estimate", - new FieldAccessPostAggregator("f1", "sketch"), - false - ), - new HllSketchToEstimateWithBoundsPostAggregator( - "estimateWithBounds", - new FieldAccessPostAggregator( - "f1", - "sketch" - ), - 2 - ), - new HllSketchToStringPostAggregator( - "summary", - new FieldAccessPostAggregator("f1", "sketch") - ), - new HllSketchUnionPostAggregator( - "union", - ImmutableList.of(new FieldAccessPostAggregator( - "f1", - "sketch" - ), new FieldAccessPostAggregator("f2", "sketch")), - null, - null + GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval(Intervals.ETERNITY) + .setAggregatorSpecs( + new HllSketchMergeAggregatorFactory("sketch", "sketch", null, null, null, null, false) + ) + .setPostAggregatorSpecs( + ImmutableList.of( + new HllSketchToEstimatePostAggregator( + "estimate", + new FieldAccessPostAggregator("f1", "sketch"), + false + ), + new HllSketchToEstimateWithBoundsPostAggregator( + "estimateWithBounds", + new FieldAccessPostAggregator( + "f1", + "sketch" ), - new FieldAccessPostAggregator("fieldAccess", "sketch") - ) + 2 + ), + new HllSketchToStringPostAggregator( + "summary", + new FieldAccessPostAggregator("f1", "sketch") + ), + new HllSketchUnionPostAggregator( + "union", + ImmutableList.of(new FieldAccessPostAggregator( + "f1", + "sketch" + ), new FieldAccessPostAggregator("f2", "sketch")), + null, + null + ), + new FieldAccessPostAggregator("fieldAccess", "sketch") ) - .build() - ) + ) + .build() ); final String expectedSummary = "### HLL SKETCH SUMMARY: \n" + " Log Config K : 12\n" @@ -436,104 +429,83 @@ private static DelimitedInputFormat buildInputFormat(List columns) return new DelimitedInputFormat(columns, ",", null, null, null, 0, null); } - private static String toJson(Object object) - { - final String json; - try { - ObjectMapper objectMapper = new ObjectMapper(); - json = objectMapper.writerWithDefaultPrettyPrinter().writeValueAsString(object); - } - catch (JsonProcessingException e) { - throw new RuntimeException(e); - } - return json; - } - - private static String buildAggregatorJson( - String aggregationType, - String aggregationFieldName, - boolean aggregationRound, + private static List buildMergeAggregatorList( + String fieldName, + boolean round, StringEncoding stringEncoding ) { - Map aggregator = buildAggregatorObject( - aggregationType, - aggregationFieldName, - aggregationRound, - stringEncoding - ); - return toJson(Collections.singletonList(aggregator)); + return List.of(new HllSketchMergeAggregatorFactory("sketch", fieldName, null, "HLL_8", stringEncoding, null, round)); } - private static Map buildAggregatorObject( - String aggregationType, - String aggregationFieldName, - boolean aggregationRound, + private static List buildBuildAggregatorList( + String fieldName, + boolean round, StringEncoding stringEncoding ) { - return ImmutableMap.builder() - .put("type", aggregationType) - .put("name", "sketch") - .put("fieldName", aggregationFieldName) - .put("round", aggregationRound) - .put("tgtHllType", "HLL_8") - .put("stringEncoding", stringEncoding.toString()) - .build(); + return List.of(new HllSketchBuildAggregatorFactory("sketch", fieldName, null, "HLL_8", stringEncoding, null, round)); } - private String buildGroupByQueryJson( + private GroupByQuery buildGroupByQuery( String aggregationType, - String aggregationFieldName, - boolean aggregationRound, + String fieldName, + boolean round, StringEncoding stringEncoding ) { - Map aggregation = buildAggregatorObject( - aggregationType, - aggregationFieldName, - aggregationRound, - stringEncoding - ); - Map object = new ImmutableMap.Builder() - .put("queryType", "groupBy") - .put("dataSource", "test_dataSource") - .put("granularity", "ALL") - .put("dimensions", Collections.emptyList()) - .put("aggregations", Collections.singletonList(aggregation)) - .put( - "postAggregations", - Collections.singletonList( - ImmutableMap.of("type", "fieldAccess", "name", "sketch_raw", "fieldName", "sketch") - ) - ) - .put("intervals", Collections.singletonList("2017-01-01T00:00:00.000Z/2017-01-31T00:00:00.000Z")) - .put("context", ImmutableMap.of(QueryContexts.VECTORIZE_KEY, vectorize.toString())) - .build(); - return toJson(object); + final HllSketchAggregatorFactory agg; + if ("HLLSketchMerge".equals(aggregationType)) { + agg = new HllSketchMergeAggregatorFactory("sketch", fieldName, null, "HLL_8", stringEncoding, null, round); + } else { + agg = new HllSketchBuildAggregatorFactory("sketch", fieldName, null, "HLL_8", stringEncoding, null, round); + } + return GroupByQuery.builder() + .setDataSource("test_dataSource") + .setGranularity(Granularities.ALL) + .setInterval(Intervals.of("2017-01-01T00:00:00.000Z/2017-01-31T00:00:00.000Z")) + .setAggregatorSpecs(agg) + .setPostAggregatorSpecs( + new FieldAccessPostAggregator("sketch_raw", "sketch") + ) + .setContext(ImmutableMap.of(QueryContexts.VECTORIZE_KEY, vectorize.toString())) + .build(); } - private String buildTimeseriesQueryJson( + private TimeseriesQuery buildTimeseriesQuery( String aggregationType, - String aggregationFieldName, - boolean aggregationRound + String fieldName, + boolean round ) { - Map aggregation = buildAggregatorObject( - aggregationType, - aggregationFieldName, - aggregationRound, - HllSketchAggregatorFactory.DEFAULT_STRING_ENCODING - ); - Map object = new ImmutableMap.Builder() - .put("queryType", "timeseries") - .put("dataSource", "test_dataSource") - .put("granularity", "ALL") - .put("aggregations", Collections.singletonList(aggregation)) - .put("intervals", Collections.singletonList("2017-01-01T00:00:00.000Z/2017-01-31T00:00:00.000Z")) - .put("context", ImmutableMap.of(QueryContexts.VECTORIZE_KEY, vectorize.toString())) - .build(); - return toJson(object); + final HllSketchAggregatorFactory agg; + if ("HLLSketchMerge".equals(aggregationType)) { + agg = new HllSketchMergeAggregatorFactory( + "sketch", + fieldName, + null, + "HLL_8", + HllSketchAggregatorFactory.DEFAULT_STRING_ENCODING, + null, + round + ); + } else { + agg = new HllSketchBuildAggregatorFactory( + "sketch", + fieldName, + null, + "HLL_8", + HllSketchAggregatorFactory.DEFAULT_STRING_ENCODING, + null, + round + ); + } + return Druids.newTimeseriesQueryBuilder() + .dataSource("test_dataSource") + .granularity(Granularities.ALL) + .intervals(Intervals.of("2017-01-01T00:00:00.000Z/2017-01-31T00:00:00.000Z").toString()) + .aggregators(agg) + .context(ImmutableMap.of(QueryContexts.VECTORIZE_KEY, vectorize.toString())) + .build(); } - } diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchAggregatorTest.java index 34dee1442189..addabc196d98 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllDoublesSketchAggregatorTest.java @@ -27,14 +27,21 @@ import org.apache.druid.data.input.impl.DimensionsSpec; import org.apache.druid.data.input.impl.TimestampSpec; import org.apache.druid.jackson.DefaultObjectMapper; +import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.guava.Sequence; +import org.apache.druid.query.Druids; import org.apache.druid.query.QueryContexts; +import org.apache.druid.query.Result; import org.apache.druid.query.aggregation.AggregationTestHelper; import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.DoubleSumAggregatorFactory; +import org.apache.druid.query.aggregation.post.FieldAccessPostAggregator; +import org.apache.druid.query.groupby.GroupByQuery; import org.apache.druid.query.groupby.GroupByQueryConfig; import org.apache.druid.query.groupby.GroupByQueryRunnerTest; import org.apache.druid.query.groupby.ResultRow; +import org.apache.druid.query.timeseries.TimeseriesResultValue; import org.apache.druid.testing.InitializedNullHandlingTest; import org.junit.After; import org.junit.Assert; @@ -138,34 +145,35 @@ public void ingestingSketches() throws Exception ColumnsFilter.all() ), DelimitedInputFormat.forColumns(List.of("timestamp", "product", "sketch")), - String.join( - "\n", - "[", - " {\"type\": \"KllDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"sketch\", \"k\": 200},", - " {\"type\": \"KllDoublesSketch\", \"name\": \"non_existent_sketch\", \"fieldName\": \"non_existent_sketch\", \"k\": 200}", - "]" + List.of( + new KllDoublesSketchAggregatorFactory("sketch", "sketch", 200, null), + new KllDoublesSketchAggregatorFactory("non_existent_sketch", "non_existent_sketch", 200, null) ), 0, // minTimestamp Granularities.NONE, 10, // maxRowCount - String.join( - "\n", - "{", - " \"queryType\": \"groupBy\",", - " \"dataSource\": \"test_datasource\",", - " \"granularity\": \"ALL\",", - " \"dimensions\": [],", - " \"aggregations\": [", - " {\"type\": \"KllDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"sketch\", \"k\": 200},", - " {\"type\": \"KllDoublesSketch\", \"name\": \"non_existent_sketch\", \"fieldName\": \"non_existent_sketch\", \"k\": 200}", - " ],", - " \"postAggregations\": [", - " {\"type\": \"KllDoublesSketchToQuantiles\", \"name\": \"quantiles\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"KllDoublesSketchToHistogram\", \"name\": \"histogram\", \"splitPoints\": [0.25, 0.5, 0.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}}", - " ],", - " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", - "}" - ) + GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval(Intervals.of("2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z")) + .setAggregatorSpecs( + new KllDoublesSketchAggregatorFactory("sketch", "sketch", 200, null), + new KllDoublesSketchAggregatorFactory("non_existent_sketch", "non_existent_sketch", 200, null) + ) + .setPostAggregatorSpecs( + new KllDoublesSketchToQuantilesPostAggregator( + "quantiles", + new FieldAccessPostAggregator("field", "sketch"), + new double[]{0, 0.5, 1} + ), + new KllDoublesSketchToHistogramPostAggregator( + "histogram", + new FieldAccessPostAggregator("field", "sketch"), + new double[]{0.25, 0.5, 0.75}, + null + ) + ) + .build() ); List results = seq.toList(); Assert.assertEquals(1, results.size()); @@ -215,32 +223,47 @@ public void buildingSketchesAtIngestionTime() throws Exception DelimitedInputFormat.forColumns( List.of("timestamp", "sequenceNumber", "product", "value", "valueWithNulls") ), - "[{\"type\": \"KllDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"value\", \"k\": 200}," - + "{\"type\": \"KllDoublesSketch\", \"name\": \"sketchWithNulls\", \"fieldName\": \"valueWithNulls\", \"k\": 200}]", + List.of( + new KllDoublesSketchAggregatorFactory("sketch", "value", 200, null), + new KllDoublesSketchAggregatorFactory("sketchWithNulls", "valueWithNulls", 200, null) + ), 0, // minTimestamp Granularities.NONE, 10, // maxRowCount - String.join( - "\n", - "{", - " \"queryType\": \"groupBy\",", - " \"dataSource\": \"test_datasource\",", - " \"granularity\": \"ALL\",", - " \"dimensions\": [],", - " \"aggregations\": [", - " {\"type\": \"KllDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"sketch\", \"k\": 200},", - " {\"type\": \"KllDoublesSketch\", \"name\": \"sketchWithNulls\", \"fieldName\": \"sketchWithNulls\", \"k\": 200},", - " {\"type\": \"KllDoublesSketch\", \"name\": \"non_existent_sketch\", \"fieldName\": \"non_existent_sketch\", \"k\": 200}", - " ],", - " \"postAggregations\": [", - " {\"type\": \"KllDoublesSketchToQuantiles\", \"name\": \"quantiles\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"KllDoublesSketchToHistogram\", \"name\": \"histogram\", \"splitPoints\": [0.25, 0.5, 0.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"KllDoublesSketchToQuantiles\", \"name\": \"quantilesWithNulls\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketchWithNulls\"}},", - " {\"type\": \"KllDoublesSketchToHistogram\", \"name\": \"histogramWithNulls\", \"splitPoints\": [6.25, 7.5, 8.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketchWithNulls\"}}", - " ],", - " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", - "}" - ) + GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval(Intervals.of("2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z")) + .setAggregatorSpecs( + new KllDoublesSketchAggregatorFactory("sketch", "sketch", 200, null), + new KllDoublesSketchAggregatorFactory("sketchWithNulls", "sketchWithNulls", 200, null), + new KllDoublesSketchAggregatorFactory("non_existent_sketch", "non_existent_sketch", 200, null) + ) + .setPostAggregatorSpecs( + new KllDoublesSketchToQuantilesPostAggregator( + "quantiles", + new FieldAccessPostAggregator("field", "sketch"), + new double[]{0, 0.5, 1} + ), + new KllDoublesSketchToHistogramPostAggregator( + "histogram", + new FieldAccessPostAggregator("field", "sketch"), + new double[]{0.25, 0.5, 0.75}, + null + ), + new KllDoublesSketchToQuantilesPostAggregator( + "quantilesWithNulls", + new FieldAccessPostAggregator("field", "sketchWithNulls"), + new double[]{0, 0.5, 1} + ), + new KllDoublesSketchToHistogramPostAggregator( + "histogramWithNulls", + new FieldAccessPostAggregator("field", "sketchWithNulls"), + new double[]{6.25, 7.5, 8.75}, + null + ) + ) + .build() ); List results = seq.toList(); Assert.assertEquals(1, results.size()); @@ -304,33 +327,56 @@ public void buildingSketchesAtQueryTime() throws Exception DelimitedInputFormat.forColumns( List.of("timestamp", "sequenceNumber", "product", "value", "valueWithNulls") ), - "[{\"type\": \"doubleSum\", \"name\": \"value\", \"fieldName\": \"value\"}," - + "{\"type\": \"doubleSum\", \"name\": \"valueWithNulls\", \"fieldName\": \"valueWithNulls\"}]", + List.of( + new DoubleSumAggregatorFactory("value", "value"), + new DoubleSumAggregatorFactory("valueWithNulls", "valueWithNulls") + ), 0, // minTimestamp Granularities.NONE, 10, // maxRowCount - String.join( - "\n", - "{", - " \"queryType\": \"groupBy\",", - " \"dataSource\": \"test_datasource\",", - " \"granularity\": \"ALL\",", - " \"dimensions\": [],", - " \"aggregations\": [", - " {\"type\": \"KllDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"value\", \"k\": 200},", - " {\"type\": \"KllDoublesSketch\", \"name\": \"sketchWithNulls\", \"fieldName\": \"valueWithNulls\", \"k\": 200}", - " ],", - " \"postAggregations\": [", - " {\"type\": \"KllDoublesSketchToQuantile\", \"name\": \"quantile\", \"fraction\": 0.5, \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"KllDoublesSketchToQuantiles\", \"name\": \"quantiles\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"KllDoublesSketchToHistogram\", \"name\": \"histogram\", \"splitPoints\": [0.25, 0.5, 0.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"KllDoublesSketchToQuantile\", \"name\": \"quantileWithNulls\", \"fraction\": 0.5, \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketchWithNulls\"}},", - " {\"type\": \"KllDoublesSketchToQuantiles\", \"name\": \"quantilesWithNulls\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketchWithNulls\"}},", - " {\"type\": \"KllDoublesSketchToHistogram\", \"name\": \"histogramWithNulls\", \"splitPoints\": [6.25, 7.5, 8.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketchWithNulls\"}}", - " ],", - " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", - "}" - ) + GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval(Intervals.of("2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z")) + .setAggregatorSpecs( + new KllDoublesSketchAggregatorFactory("sketch", "value", 200, null), + new KllDoublesSketchAggregatorFactory("sketchWithNulls", "valueWithNulls", 200, null) + ) + .setPostAggregatorSpecs( + new KllDoublesSketchToQuantilePostAggregator( + "quantile", + new FieldAccessPostAggregator("field", "sketch"), + 0.5 + ), + new KllDoublesSketchToQuantilesPostAggregator( + "quantiles", + new FieldAccessPostAggregator("field", "sketch"), + new double[]{0, 0.5, 1} + ), + new KllDoublesSketchToHistogramPostAggregator( + "histogram", + new FieldAccessPostAggregator("field", "sketch"), + new double[]{0.25, 0.5, 0.75}, + null + ), + new KllDoublesSketchToQuantilePostAggregator( + "quantileWithNulls", + new FieldAccessPostAggregator("field", "sketchWithNulls"), + 0.5 + ), + new KllDoublesSketchToQuantilesPostAggregator( + "quantilesWithNulls", + new FieldAccessPostAggregator("field", "sketchWithNulls"), + new double[]{0, 0.5, 1} + ), + new KllDoublesSketchToHistogramPostAggregator( + "histogramWithNulls", + new FieldAccessPostAggregator("field", "sketchWithNulls"), + new double[]{6.25, 7.5, 8.75}, + null + ) + ) + .build() ); List results = seq.toList(); Assert.assertEquals(1, results.size()); @@ -406,28 +452,38 @@ public void queryingDataWithFieldNameValueAsFloatInsteadOfSketch() throws Except ColumnsFilter.all() ), DelimitedInputFormat.forColumns(List.of("timestamp", "sequenceNumber", "product", "value")), - "[{\"type\": \"doubleSum\", \"name\": \"value\", \"fieldName\": \"value\"}]", + List.of( + new DoubleSumAggregatorFactory("value", "value") + ), 0, // minTimestamp Granularities.NONE, 10, // maxRowCount - String.join( - "\n", - "{", - " \"queryType\": \"groupBy\",", - " \"dataSource\": \"test_datasource\",", - " \"granularity\": \"ALL\",", - " \"dimensions\": [],", - " \"aggregations\": [", - " {\"type\": \"KllDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"value\", \"k\": 200}", - " ],", - " \"postAggregations\": [", - " {\"type\": \"KllDoublesSketchToQuantile\", \"name\": \"quantile\", \"fraction\": 0.5, \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"KllDoublesSketchToQuantiles\", \"name\": \"quantiles\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"KllDoublesSketchToHistogram\", \"name\": \"histogram\", \"splitPoints\": [0.25, 0.5, 0.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}}", - " ],", - " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", - "}" - ) + GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval(Intervals.of("2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z")) + .setAggregatorSpecs( + new KllDoublesSketchAggregatorFactory("sketch", "value", 200, null) + ) + .setPostAggregatorSpecs( + new KllDoublesSketchToQuantilePostAggregator( + "quantile", + new FieldAccessPostAggregator("field", "sketch"), + 0.5 + ), + new KllDoublesSketchToQuantilesPostAggregator( + "quantiles", + new FieldAccessPostAggregator("field", "sketch"), + new double[]{0, 0.5, 1} + ), + new KllDoublesSketchToHistogramPostAggregator( + "histogram", + new FieldAccessPostAggregator("field", "sketch"), + new double[]{0.25, 0.5, 0.75}, + null + ) + ) + .build() ); List results = seq.toList(); Assert.assertEquals(1, results.size()); @@ -464,7 +520,7 @@ public void queryingDataWithFieldNameValueAsFloatInsteadOfSketch() throws Except @Test public void timeSeriesQueryInputAsFloat() throws Exception { - Sequence seq = timeSeriesHelper.createIndexAndRunQueryOnSegment( + Sequence> seq = timeSeriesHelper.createIndexAndRunQueryOnSegment( new File(this.getClass().getClassLoader().getResource("kll/kll_doubles_sketch_build_data.tsv").getFile()), new InputRowSchema( new TimestampSpec("timestamp", "yyyyMMddHH", null), @@ -472,29 +528,40 @@ public void timeSeriesQueryInputAsFloat() throws Exception ColumnsFilter.all() ), DelimitedInputFormat.forColumns(List.of("timestamp", "sequenceNumber", "product", "value")), - "[{\"type\": \"doubleSum\", \"name\": \"value\", \"fieldName\": \"value\"}]", + List.of( + new DoubleSumAggregatorFactory("value", "value") + ), 0, // minTimestamp Granularities.NONE, 10, // maxRowCount - String.join( - "\n", - "{", - " \"queryType\": \"timeseries\",", - " \"dataSource\": \"test_datasource\",", - " \"granularity\": \"ALL\",", - " \"aggregations\": [", - " {\"type\": \"KllDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"value\", \"k\": 200}", - " ],", - " \"postAggregations\": [", - " {\"type\": \"KllDoublesSketchToQuantile\", \"name\": \"quantile1\", \"fraction\": 0.5, \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"KllDoublesSketchToQuantiles\", \"name\": \"quantiles1\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"KllDoublesSketchToHistogram\", \"name\": \"histogram1\", \"splitPoints\": [0.25, 0.5, 0.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}}", - " ],", - " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", - "}" - ) + Druids.newTimeseriesQueryBuilder() + .dataSource("test_datasource") + .granularity(Granularities.ALL) + .intervals(Intervals.of("2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z").toString()) + .aggregators( + new KllDoublesSketchAggregatorFactory("sketch", "value", 200, null) + ) + .postAggregators( + new KllDoublesSketchToQuantilePostAggregator( + "quantile1", + new FieldAccessPostAggregator("field", "sketch"), + 0.5 + ), + new KllDoublesSketchToQuantilesPostAggregator( + "quantiles1", + new FieldAccessPostAggregator("field", "sketch"), + new double[]{0, 0.5, 1} + ), + new KllDoublesSketchToHistogramPostAggregator( + "histogram1", + new FieldAccessPostAggregator("field", "sketch"), + new double[]{0.25, 0.5, 0.75}, + null + ) + ) + .build() ); - List results = seq.toList(); + List> results = seq.toList(); Assert.assertEquals(1, results.size()); } @@ -509,28 +576,38 @@ public void testSuccessWhenMaxStreamLengthHit() throws Exception ColumnsFilter.all() ), DelimitedInputFormat.forColumns(List.of("timestamp", "sequenceNumber", "product", "value")), - "[{\"type\": \"doubleSum\", \"name\": \"value\", \"fieldName\": \"value\"}]", + List.of( + new DoubleSumAggregatorFactory("value", "value") + ), 0, // minTimestamp Granularities.NONE, 10, // maxRowCount - String.join( - "\n", - "{", - " \"queryType\": \"groupBy\",", - " \"dataSource\": \"test_datasource\",", - " \"granularity\": \"ALL\",", - " \"dimensions\": [],", - " \"aggregations\": [", - " {\"type\": \"KllDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"value\", \"k\": 200, \"maxStreamLength\": 10}", - " ],", - " \"postAggregations\": [", - " {\"type\": \"KllDoublesSketchToQuantile\", \"name\": \"quantile\", \"fraction\": 0.5, \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"KllDoublesSketchToQuantiles\", \"name\": \"quantiles\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"KllDoublesSketchToHistogram\", \"name\": \"histogram\", \"splitPoints\": [0.25, 0.5, 0.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}}", - " ],", - " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", - "}" - ) + GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval(Intervals.of("2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z")) + .setAggregatorSpecs( + new KllDoublesSketchAggregatorFactory("sketch", "value", 200, 10L) + ) + .setPostAggregatorSpecs( + new KllDoublesSketchToQuantilePostAggregator( + "quantile", + new FieldAccessPostAggregator("field", "sketch"), + 0.5 + ), + new KllDoublesSketchToQuantilesPostAggregator( + "quantiles", + new FieldAccessPostAggregator("field", "sketch"), + new double[]{0, 0.5, 1} + ), + new KllDoublesSketchToHistogramPostAggregator( + "histogram", + new FieldAccessPostAggregator("field", "sketch"), + new double[]{0.25, 0.5, 0.75}, + null + ) + ) + .build() ); seq.toList(); } diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchAggregatorTest.java index 8040f2e50838..e255ba91bc79 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/kll/KllFloatsSketchAggregatorTest.java @@ -27,14 +27,21 @@ import org.apache.druid.data.input.impl.DimensionsSpec; import org.apache.druid.data.input.impl.TimestampSpec; import org.apache.druid.jackson.DefaultObjectMapper; +import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.guava.Sequence; +import org.apache.druid.query.Druids; import org.apache.druid.query.QueryContexts; +import org.apache.druid.query.Result; import org.apache.druid.query.aggregation.AggregationTestHelper; import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.DoubleSumAggregatorFactory; +import org.apache.druid.query.aggregation.post.FieldAccessPostAggregator; +import org.apache.druid.query.groupby.GroupByQuery; import org.apache.druid.query.groupby.GroupByQueryConfig; import org.apache.druid.query.groupby.GroupByQueryRunnerTest; import org.apache.druid.query.groupby.ResultRow; +import org.apache.druid.query.timeseries.TimeseriesResultValue; import org.apache.druid.testing.InitializedNullHandlingTest; import org.junit.After; import org.junit.Assert; @@ -138,34 +145,35 @@ public void ingestingSketches() throws Exception ColumnsFilter.all() ), DelimitedInputFormat.forColumns(List.of("timestamp", "product", "sketch")), - String.join( - "\n", - "[", - " {\"type\": \"KllFloatsSketch\", \"name\": \"sketch\", \"fieldName\": \"sketch\", \"k\": 200},", - " {\"type\": \"KllFloatsSketch\", \"name\": \"non_existent_sketch\", \"fieldName\": \"non_existent_sketch\", \"k\": 200}", - "]" + List.of( + new KllFloatsSketchAggregatorFactory("sketch", "sketch", 200, null), + new KllFloatsSketchAggregatorFactory("non_existent_sketch", "non_existent_sketch", 200, null) ), 0, // minTimestamp Granularities.NONE, 10, // maxRowCount - String.join( - "\n", - "{", - " \"queryType\": \"groupBy\",", - " \"dataSource\": \"test_datasource\",", - " \"granularity\": \"ALL\",", - " \"dimensions\": [],", - " \"aggregations\": [", - " {\"type\": \"KllFloatsSketch\", \"name\": \"sketch\", \"fieldName\": \"sketch\", \"k\": 200},", - " {\"type\": \"KllFloatsSketch\", \"name\": \"non_existent_sketch\", \"fieldName\": \"non_existent_sketch\", \"k\": 200}", - " ],", - " \"postAggregations\": [", - " {\"type\": \"KllFloatsSketchToQuantiles\", \"name\": \"quantiles\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"KllFloatsSketchToHistogram\", \"name\": \"histogram\", \"splitPoints\": [0.25, 0.5, 0.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}}", - " ],", - " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", - "}" - ) + GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval(Intervals.of("2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z")) + .setAggregatorSpecs( + new KllFloatsSketchAggregatorFactory("sketch", "sketch", 200, null), + new KllFloatsSketchAggregatorFactory("non_existent_sketch", "non_existent_sketch", 200, null) + ) + .setPostAggregatorSpecs( + new KllFloatsSketchToQuantilesPostAggregator( + "quantiles", + new FieldAccessPostAggregator("field", "sketch"), + new double[]{0, 0.5, 1} + ), + new KllFloatsSketchToHistogramPostAggregator( + "histogram", + new FieldAccessPostAggregator("field", "sketch"), + new float[]{0.25f, 0.5f, 0.75f}, + null + ) + ) + .build() ); List results = seq.toList(); Assert.assertEquals(1, results.size()); @@ -215,32 +223,47 @@ public void buildingSketchesAtIngestionTime() throws Exception DelimitedInputFormat.forColumns( List.of("timestamp", "sequenceNumber", "product", "value", "valueWithNulls") ), - "[{\"type\": \"KllFloatsSketch\", \"name\": \"sketch\", \"fieldName\": \"value\", \"k\": 200}," - + "{\"type\": \"KllFloatsSketch\", \"name\": \"sketchWithNulls\", \"fieldName\": \"valueWithNulls\", \"k\": 200}]", + List.of( + new KllFloatsSketchAggregatorFactory("sketch", "value", 200, null), + new KllFloatsSketchAggregatorFactory("sketchWithNulls", "valueWithNulls", 200, null) + ), 0, // minTimestamp Granularities.NONE, 10, // maxRowCount - String.join( - "\n", - "{", - " \"queryType\": \"groupBy\",", - " \"dataSource\": \"test_datasource\",", - " \"granularity\": \"ALL\",", - " \"dimensions\": [],", - " \"aggregations\": [", - " {\"type\": \"KllFloatsSketch\", \"name\": \"sketch\", \"fieldName\": \"sketch\", \"k\": 200},", - " {\"type\": \"KllFloatsSketch\", \"name\": \"sketchWithNulls\", \"fieldName\": \"sketchWithNulls\", \"k\": 200},", - " {\"type\": \"KllFloatsSketch\", \"name\": \"non_existent_sketch\", \"fieldName\": \"non_existent_sketch\", \"k\": 200}", - " ],", - " \"postAggregations\": [", - " {\"type\": \"KllFloatsSketchToQuantiles\", \"name\": \"quantiles\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"KllFloatsSketchToHistogram\", \"name\": \"histogram\", \"splitPoints\": [0.25, 0.5, 0.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"KllFloatsSketchToQuantiles\", \"name\": \"quantilesWithNulls\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketchWithNulls\"}},", - " {\"type\": \"KllFloatsSketchToHistogram\", \"name\": \"histogramWithNulls\", \"splitPoints\": [6.25, 7.5, 8.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketchWithNulls\"}}", - " ],", - " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", - "}" - ) + GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval(Intervals.of("2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z")) + .setAggregatorSpecs( + new KllFloatsSketchAggregatorFactory("sketch", "sketch", 200, null), + new KllFloatsSketchAggregatorFactory("sketchWithNulls", "sketchWithNulls", 200, null), + new KllFloatsSketchAggregatorFactory("non_existent_sketch", "non_existent_sketch", 200, null) + ) + .setPostAggregatorSpecs( + new KllFloatsSketchToQuantilesPostAggregator( + "quantiles", + new FieldAccessPostAggregator("field", "sketch"), + new double[]{0, 0.5, 1} + ), + new KllFloatsSketchToHistogramPostAggregator( + "histogram", + new FieldAccessPostAggregator("field", "sketch"), + new float[]{0.25f, 0.5f, 0.75f}, + null + ), + new KllFloatsSketchToQuantilesPostAggregator( + "quantilesWithNulls", + new FieldAccessPostAggregator("field", "sketchWithNulls"), + new double[]{0, 0.5, 1} + ), + new KllFloatsSketchToHistogramPostAggregator( + "histogramWithNulls", + new FieldAccessPostAggregator("field", "sketchWithNulls"), + new float[]{6.25f, 7.5f, 8.75f}, + null + ) + ) + .build() ); List results = seq.toList(); Assert.assertEquals(1, results.size()); @@ -304,33 +327,56 @@ public void buildingSketchesAtQueryTime() throws Exception DelimitedInputFormat.forColumns( List.of("timestamp", "sequenceNumber", "product", "value", "valueWithNulls") ), - "[{\"type\": \"doubleSum\", \"name\": \"value\", \"fieldName\": \"value\"}," - + "{\"type\": \"doubleSum\", \"name\": \"valueWithNulls\", \"fieldName\": \"valueWithNulls\"}]", + List.of( + new DoubleSumAggregatorFactory("value", "value"), + new DoubleSumAggregatorFactory("valueWithNulls", "valueWithNulls") + ), 0, // minTimestamp Granularities.NONE, 10, // maxRowCount - String.join( - "\n", - "{", - " \"queryType\": \"groupBy\",", - " \"dataSource\": \"test_datasource\",", - " \"granularity\": \"ALL\",", - " \"dimensions\": [],", - " \"aggregations\": [", - " {\"type\": \"KllFloatsSketch\", \"name\": \"sketch\", \"fieldName\": \"value\", \"k\": 200},", - " {\"type\": \"KllFloatsSketch\", \"name\": \"sketchWithNulls\", \"fieldName\": \"valueWithNulls\", \"k\": 200}", - " ],", - " \"postAggregations\": [", - " {\"type\": \"KllFloatsSketchToQuantile\", \"name\": \"quantile\", \"fraction\": 0.5, \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"KllFloatsSketchToQuantiles\", \"name\": \"quantiles\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"KllFloatsSketchToHistogram\", \"name\": \"histogram\", \"splitPoints\": [0.25, 0.5, 0.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"KllFloatsSketchToQuantile\", \"name\": \"quantileWithNulls\", \"fraction\": 0.5, \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketchWithNulls\"}},", - " {\"type\": \"KllFloatsSketchToQuantiles\", \"name\": \"quantilesWithNulls\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketchWithNulls\"}},", - " {\"type\": \"KllFloatsSketchToHistogram\", \"name\": \"histogramWithNulls\", \"splitPoints\": [6.25, 7.5, 8.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketchWithNulls\"}}", - " ],", - " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", - "}" - ) + GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval(Intervals.of("2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z")) + .setAggregatorSpecs( + new KllFloatsSketchAggregatorFactory("sketch", "value", 200, null), + new KllFloatsSketchAggregatorFactory("sketchWithNulls", "valueWithNulls", 200, null) + ) + .setPostAggregatorSpecs( + new KllFloatsSketchToQuantilePostAggregator( + "quantile", + new FieldAccessPostAggregator("field", "sketch"), + 0.5 + ), + new KllFloatsSketchToQuantilesPostAggregator( + "quantiles", + new FieldAccessPostAggregator("field", "sketch"), + new double[]{0, 0.5, 1} + ), + new KllFloatsSketchToHistogramPostAggregator( + "histogram", + new FieldAccessPostAggregator("field", "sketch"), + new float[]{0.25f, 0.5f, 0.75f}, + null + ), + new KllFloatsSketchToQuantilePostAggregator( + "quantileWithNulls", + new FieldAccessPostAggregator("field", "sketchWithNulls"), + 0.5 + ), + new KllFloatsSketchToQuantilesPostAggregator( + "quantilesWithNulls", + new FieldAccessPostAggregator("field", "sketchWithNulls"), + new double[]{0, 0.5, 1} + ), + new KllFloatsSketchToHistogramPostAggregator( + "histogramWithNulls", + new FieldAccessPostAggregator("field", "sketchWithNulls"), + new float[]{6.25f, 7.5f, 8.75f}, + null + ) + ) + .build() ); List results = seq.toList(); Assert.assertEquals(1, results.size()); @@ -406,28 +452,38 @@ public void queryingDataWithFieldNameValueAsFloatInsteadOfSketch() throws Except ColumnsFilter.all() ), DelimitedInputFormat.forColumns(List.of("timestamp", "sequenceNumber", "product", "value")), - "[{\"type\": \"doubleSum\", \"name\": \"value\", \"fieldName\": \"value\"}]", + List.of( + new DoubleSumAggregatorFactory("value", "value") + ), 0, // minTimestamp Granularities.NONE, 10, // maxRowCount - String.join( - "\n", - "{", - " \"queryType\": \"groupBy\",", - " \"dataSource\": \"test_datasource\",", - " \"granularity\": \"ALL\",", - " \"dimensions\": [],", - " \"aggregations\": [", - " {\"type\": \"KllFloatsSketch\", \"name\": \"sketch\", \"fieldName\": \"value\", \"k\": 200}", - " ],", - " \"postAggregations\": [", - " {\"type\": \"KllFloatsSketchToQuantile\", \"name\": \"quantile\", \"fraction\": 0.5, \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"KllFloatsSketchToQuantiles\", \"name\": \"quantiles\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"KllFloatsSketchToHistogram\", \"name\": \"histogram\", \"splitPoints\": [0.25, 0.5, 0.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}}", - " ],", - " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", - "}" - ) + GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval(Intervals.of("2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z")) + .setAggregatorSpecs( + new KllFloatsSketchAggregatorFactory("sketch", "value", 200, null) + ) + .setPostAggregatorSpecs( + new KllFloatsSketchToQuantilePostAggregator( + "quantile", + new FieldAccessPostAggregator("field", "sketch"), + 0.5 + ), + new KllFloatsSketchToQuantilesPostAggregator( + "quantiles", + new FieldAccessPostAggregator("field", "sketch"), + new double[]{0, 0.5, 1} + ), + new KllFloatsSketchToHistogramPostAggregator( + "histogram", + new FieldAccessPostAggregator("field", "sketch"), + new float[]{0.25f, 0.5f, 0.75f}, + null + ) + ) + .build() ); List results = seq.toList(); Assert.assertEquals(1, results.size()); @@ -464,7 +520,7 @@ public void queryingDataWithFieldNameValueAsFloatInsteadOfSketch() throws Except @Test public void timeSeriesQueryInputAsFloat() throws Exception { - Sequence seq = timeSeriesHelper.createIndexAndRunQueryOnSegment( + Sequence> seq = timeSeriesHelper.createIndexAndRunQueryOnSegment( new File(this.getClass().getClassLoader().getResource("kll/kll_floats_sketch_build_data.tsv").getFile()), new InputRowSchema( new TimestampSpec("timestamp", "yyyyMMddHH", null), @@ -472,29 +528,40 @@ public void timeSeriesQueryInputAsFloat() throws Exception ColumnsFilter.all() ), DelimitedInputFormat.forColumns(List.of("timestamp", "sequenceNumber", "product", "value")), - "[{\"type\": \"doubleSum\", \"name\": \"value\", \"fieldName\": \"value\"}]", + List.of( + new DoubleSumAggregatorFactory("value", "value") + ), 0, // minTimestamp Granularities.NONE, 10, // maxRowCount - String.join( - "\n", - "{", - " \"queryType\": \"timeseries\",", - " \"dataSource\": \"test_datasource\",", - " \"granularity\": \"ALL\",", - " \"aggregations\": [", - " {\"type\": \"KllFloatsSketch\", \"name\": \"sketch\", \"fieldName\": \"value\", \"k\": 200}", - " ],", - " \"postAggregations\": [", - " {\"type\": \"KllFloatsSketchToQuantile\", \"name\": \"quantile1\", \"fraction\": 0.5, \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"KllFloatsSketchToQuantiles\", \"name\": \"quantiles1\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"KllFloatsSketchToHistogram\", \"name\": \"histogram1\", \"splitPoints\": [0.25, 0.5, 0.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}}", - " ],", - " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", - "}" - ) + Druids.newTimeseriesQueryBuilder() + .dataSource("test_datasource") + .granularity(Granularities.ALL) + .intervals(Intervals.of("2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z").toString()) + .aggregators( + new KllFloatsSketchAggregatorFactory("sketch", "value", 200, null) + ) + .postAggregators( + new KllFloatsSketchToQuantilePostAggregator( + "quantile1", + new FieldAccessPostAggregator("field", "sketch"), + 0.5 + ), + new KllFloatsSketchToQuantilesPostAggregator( + "quantiles1", + new FieldAccessPostAggregator("field", "sketch"), + new double[]{0, 0.5, 1} + ), + new KllFloatsSketchToHistogramPostAggregator( + "histogram1", + new FieldAccessPostAggregator("field", "sketch"), + new float[]{0.25f, 0.5f, 0.75f}, + null + ) + ) + .build() ); - List results = seq.toList(); + List> results = seq.toList(); Assert.assertEquals(1, results.size()); } @@ -509,28 +576,38 @@ public void testSuccessWhenMaxStreamLengthHit() throws Exception ColumnsFilter.all() ), DelimitedInputFormat.forColumns(List.of("timestamp", "sequenceNumber", "product", "value")), - "[{\"type\": \"doubleSum\", \"name\": \"value\", \"fieldName\": \"value\"}]", + List.of( + new DoubleSumAggregatorFactory("value", "value") + ), 0, // minTimestamp Granularities.NONE, 10, // maxRowCount - String.join( - "\n", - "{", - " \"queryType\": \"groupBy\",", - " \"dataSource\": \"test_datasource\",", - " \"granularity\": \"ALL\",", - " \"dimensions\": [],", - " \"aggregations\": [", - " {\"type\": \"KllFloatsSketch\", \"name\": \"sketch\", \"fieldName\": \"value\", \"k\": 200, \"maxStreamLength\": 10}", - " ],", - " \"postAggregations\": [", - " {\"type\": \"KllFloatsSketchToQuantile\", \"name\": \"quantile\", \"fraction\": 0.5, \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"KllFloatsSketchToQuantiles\", \"name\": \"quantiles\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"KllFloatsSketchToHistogram\", \"name\": \"histogram\", \"splitPoints\": [0.25, 0.5, 0.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}}", - " ],", - " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", - "}" - ) + GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval(Intervals.of("2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z")) + .setAggregatorSpecs( + new KllFloatsSketchAggregatorFactory("sketch", "value", 200, 10L) + ) + .setPostAggregatorSpecs( + new KllFloatsSketchToQuantilePostAggregator( + "quantile", + new FieldAccessPostAggregator("field", "sketch"), + 0.5 + ), + new KllFloatsSketchToQuantilesPostAggregator( + "quantiles", + new FieldAccessPostAggregator("field", "sketch"), + new double[]{0, 0.5, 1} + ), + new KllFloatsSketchToHistogramPostAggregator( + "histogram", + new FieldAccessPostAggregator("field", "sketch"), + new float[]{0.25f, 0.5f, 0.75f}, + null + ) + ) + .build() ); seq.toList(); } diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchAggregatorTest.java index badac2858e95..eeeb5556c913 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchAggregatorTest.java @@ -27,14 +27,21 @@ import org.apache.druid.data.input.impl.DimensionsSpec; import org.apache.druid.data.input.impl.TimestampSpec; import org.apache.druid.jackson.DefaultObjectMapper; +import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.guava.Sequence; +import org.apache.druid.query.Druids; import org.apache.druid.query.QueryContexts; +import org.apache.druid.query.Result; import org.apache.druid.query.aggregation.AggregationTestHelper; import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.DoubleSumAggregatorFactory; +import org.apache.druid.query.aggregation.post.FieldAccessPostAggregator; +import org.apache.druid.query.groupby.GroupByQuery; import org.apache.druid.query.groupby.GroupByQueryConfig; import org.apache.druid.query.groupby.GroupByQueryRunnerTest; import org.apache.druid.query.groupby.ResultRow; +import org.apache.druid.query.timeseries.TimeseriesResultValue; import org.apache.druid.testing.InitializedNullHandlingTest; import org.junit.After; import org.junit.Assert; @@ -137,34 +144,35 @@ public void ingestingSketches() throws Exception ColumnsFilter.all() ), DelimitedInputFormat.forColumns(List.of("timestamp", "product", "sketch")), - String.join( - "\n", - "[", - " {\"type\": \"quantilesDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"sketch\", \"k\": 128},", - " {\"type\": \"quantilesDoublesSketch\", \"name\": \"non_existent_sketch\", \"fieldName\": \"non_existent_sketch\", \"k\": 128}", - "]" + List.of( + new DoublesSketchAggregatorFactory("sketch", "sketch", 128), + new DoublesSketchAggregatorFactory("non_existent_sketch", "non_existent_sketch", 128) ), 0, // minTimestamp Granularities.NONE, 10, // maxRowCount - String.join( - "\n", - "{", - " \"queryType\": \"groupBy\",", - " \"dataSource\": \"test_datasource\",", - " \"granularity\": \"ALL\",", - " \"dimensions\": [],", - " \"aggregations\": [", - " {\"type\": \"quantilesDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"sketch\", \"k\": 128},", - " {\"type\": \"quantilesDoublesSketch\", \"name\": \"non_existent_sketch\", \"fieldName\": \"non_existent_sketch\", \"k\": 128}", - " ],", - " \"postAggregations\": [", - " {\"type\": \"quantilesDoublesSketchToQuantiles\", \"name\": \"quantiles\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"quantilesDoublesSketchToHistogram\", \"name\": \"histogram\", \"splitPoints\": [0.25, 0.5, 0.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}}", - " ],", - " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", - "}" - ) + GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval(Intervals.of("2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z")) + .setAggregatorSpecs( + new DoublesSketchAggregatorFactory("sketch", "sketch", 128), + new DoublesSketchAggregatorFactory("non_existent_sketch", "non_existent_sketch", 128) + ) + .setPostAggregatorSpecs( + new DoublesSketchToQuantilesPostAggregator( + "quantiles", + new FieldAccessPostAggregator("field", "sketch"), + new double[]{0, 0.5, 1} + ), + new DoublesSketchToHistogramPostAggregator( + "histogram", + new FieldAccessPostAggregator("field", "sketch"), + new double[]{0.25, 0.5, 0.75}, + null + ) + ) + .build() ); List results = seq.toList(); Assert.assertEquals(1, results.size()); @@ -214,32 +222,47 @@ public void buildingSketchesAtIngestionTime() throws Exception DelimitedInputFormat.forColumns( List.of("timestamp", "sequenceNumber", "product", "value", "valueWithNulls") ), - "[{\"type\": \"quantilesDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"value\", \"k\": 128}," - + "{\"type\": \"quantilesDoublesSketch\", \"name\": \"sketchWithNulls\", \"fieldName\": \"valueWithNulls\", \"k\": 128}]", + List.of( + new DoublesSketchAggregatorFactory("sketch", "value", 128), + new DoublesSketchAggregatorFactory("sketchWithNulls", "valueWithNulls", 128) + ), 0, // minTimestamp Granularities.NONE, 10, // maxRowCount - String.join( - "\n", - "{", - " \"queryType\": \"groupBy\",", - " \"dataSource\": \"test_datasource\",", - " \"granularity\": \"ALL\",", - " \"dimensions\": [],", - " \"aggregations\": [", - " {\"type\": \"quantilesDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"sketch\", \"k\": 128},", - " {\"type\": \"quantilesDoublesSketch\", \"name\": \"sketchWithNulls\", \"fieldName\": \"sketchWithNulls\", \"k\": 128},", - " {\"type\": \"quantilesDoublesSketch\", \"name\": \"non_existent_sketch\", \"fieldName\": \"non_existent_sketch\", \"k\": 128}", - " ],", - " \"postAggregations\": [", - " {\"type\": \"quantilesDoublesSketchToQuantiles\", \"name\": \"quantiles\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"quantilesDoublesSketchToHistogram\", \"name\": \"histogram\", \"splitPoints\": [0.25, 0.5, 0.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"quantilesDoublesSketchToQuantiles\", \"name\": \"quantilesWithNulls\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketchWithNulls\"}},", - " {\"type\": \"quantilesDoublesSketchToHistogram\", \"name\": \"histogramWithNulls\", \"splitPoints\": [6.25, 7.5, 8.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketchWithNulls\"}}", - " ],", - " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", - "}" - ) + GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval(Intervals.of("2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z")) + .setAggregatorSpecs( + new DoublesSketchAggregatorFactory("sketch", "sketch", 128), + new DoublesSketchAggregatorFactory("sketchWithNulls", "sketchWithNulls", 128), + new DoublesSketchAggregatorFactory("non_existent_sketch", "non_existent_sketch", 128) + ) + .setPostAggregatorSpecs( + new DoublesSketchToQuantilesPostAggregator( + "quantiles", + new FieldAccessPostAggregator("field", "sketch"), + new double[]{0, 0.5, 1} + ), + new DoublesSketchToHistogramPostAggregator( + "histogram", + new FieldAccessPostAggregator("field", "sketch"), + new double[]{0.25, 0.5, 0.75}, + null + ), + new DoublesSketchToQuantilesPostAggregator( + "quantilesWithNulls", + new FieldAccessPostAggregator("field", "sketchWithNulls"), + new double[]{0, 0.5, 1} + ), + new DoublesSketchToHistogramPostAggregator( + "histogramWithNulls", + new FieldAccessPostAggregator("field", "sketchWithNulls"), + new double[]{6.25, 7.5, 8.75}, + null + ) + ) + .build() ); List results = seq.toList(); Assert.assertEquals(1, results.size()); @@ -303,33 +326,56 @@ public void buildingSketchesAtQueryTime() throws Exception DelimitedInputFormat.forColumns( List.of("timestamp", "sequenceNumber", "product", "value", "valueWithNulls") ), - "[{\"type\": \"doubleSum\", \"name\": \"value\", \"fieldName\": \"value\"}," - + "{\"type\": \"doubleSum\", \"name\": \"valueWithNulls\", \"fieldName\": \"valueWithNulls\"}]", + List.of( + new DoubleSumAggregatorFactory("value", "value"), + new DoubleSumAggregatorFactory("valueWithNulls", "valueWithNulls") + ), 0, // minTimestamp Granularities.NONE, 10, // maxRowCount - String.join( - "\n", - "{", - " \"queryType\": \"groupBy\",", - " \"dataSource\": \"test_datasource\",", - " \"granularity\": \"ALL\",", - " \"dimensions\": [],", - " \"aggregations\": [", - " {\"type\": \"quantilesDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"value\", \"k\": 128},", - " {\"type\": \"quantilesDoublesSketch\", \"name\": \"sketchWithNulls\", \"fieldName\": \"valueWithNulls\", \"k\": 128}", - " ],", - " \"postAggregations\": [", - " {\"type\": \"quantilesDoublesSketchToQuantile\", \"name\": \"quantile\", \"fraction\": 0.5, \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"quantilesDoublesSketchToQuantiles\", \"name\": \"quantiles\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"quantilesDoublesSketchToHistogram\", \"name\": \"histogram\", \"splitPoints\": [0.25, 0.5, 0.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"quantilesDoublesSketchToQuantile\", \"name\": \"quantileWithNulls\", \"fraction\": 0.5, \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketchWithNulls\"}},", - " {\"type\": \"quantilesDoublesSketchToQuantiles\", \"name\": \"quantilesWithNulls\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketchWithNulls\"}},", - " {\"type\": \"quantilesDoublesSketchToHistogram\", \"name\": \"histogramWithNulls\", \"splitPoints\": [6.25, 7.5, 8.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketchWithNulls\"}}", - " ],", - " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", - "}" - ) + GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval(Intervals.of("2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z")) + .setAggregatorSpecs( + new DoublesSketchAggregatorFactory("sketch", "value", 128), + new DoublesSketchAggregatorFactory("sketchWithNulls", "valueWithNulls", 128) + ) + .setPostAggregatorSpecs( + new DoublesSketchToQuantilePostAggregator( + "quantile", + new FieldAccessPostAggregator("field", "sketch"), + 0.5 + ), + new DoublesSketchToQuantilesPostAggregator( + "quantiles", + new FieldAccessPostAggregator("field", "sketch"), + new double[]{0, 0.5, 1} + ), + new DoublesSketchToHistogramPostAggregator( + "histogram", + new FieldAccessPostAggregator("field", "sketch"), + new double[]{0.25, 0.5, 0.75}, + null + ), + new DoublesSketchToQuantilePostAggregator( + "quantileWithNulls", + new FieldAccessPostAggregator("field", "sketchWithNulls"), + 0.5 + ), + new DoublesSketchToQuantilesPostAggregator( + "quantilesWithNulls", + new FieldAccessPostAggregator("field", "sketchWithNulls"), + new double[]{0, 0.5, 1} + ), + new DoublesSketchToHistogramPostAggregator( + "histogramWithNulls", + new FieldAccessPostAggregator("field", "sketchWithNulls"), + new double[]{6.25, 7.5, 8.75}, + null + ) + ) + .build() ); List results = seq.toList(); Assert.assertEquals(1, results.size()); @@ -405,28 +451,38 @@ public void queryingDataWithFieldNameValueAsFloatInsteadOfSketch() throws Except ColumnsFilter.all() ), DelimitedInputFormat.forColumns(List.of("timestamp", "sequenceNumber", "product", "value")), - "[{\"type\": \"doubleSum\", \"name\": \"value\", \"fieldName\": \"value\"}]", + List.of( + new DoubleSumAggregatorFactory("value", "value") + ), 0, // minTimestamp Granularities.NONE, 10, // maxRowCount - String.join( - "\n", - "{", - " \"queryType\": \"groupBy\",", - " \"dataSource\": \"test_datasource\",", - " \"granularity\": \"ALL\",", - " \"dimensions\": [],", - " \"aggregations\": [", - " {\"type\": \"quantilesDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"value\", \"k\": 128}", - " ],", - " \"postAggregations\": [", - " {\"type\": \"quantilesDoublesSketchToQuantile\", \"name\": \"quantile\", \"fraction\": 0.5, \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"quantilesDoublesSketchToQuantiles\", \"name\": \"quantiles\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"quantilesDoublesSketchToHistogram\", \"name\": \"histogram\", \"splitPoints\": [0.25, 0.5, 0.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}}", - " ],", - " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", - "}" - ) + GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval(Intervals.of("2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z")) + .setAggregatorSpecs( + new DoublesSketchAggregatorFactory("sketch", "value", 128) + ) + .setPostAggregatorSpecs( + new DoublesSketchToQuantilePostAggregator( + "quantile", + new FieldAccessPostAggregator("field", "sketch"), + 0.5 + ), + new DoublesSketchToQuantilesPostAggregator( + "quantiles", + new FieldAccessPostAggregator("field", "sketch"), + new double[]{0, 0.5, 1} + ), + new DoublesSketchToHistogramPostAggregator( + "histogram", + new FieldAccessPostAggregator("field", "sketch"), + new double[]{0.25, 0.5, 0.75}, + null + ) + ) + .build() ); List results = seq.toList(); Assert.assertEquals(1, results.size()); @@ -463,7 +519,7 @@ public void queryingDataWithFieldNameValueAsFloatInsteadOfSketch() throws Except @Test public void timeSeriesQueryInputAsFloat() throws Exception { - Sequence seq = timeSeriesHelper.createIndexAndRunQueryOnSegment( + Sequence> seq = timeSeriesHelper.createIndexAndRunQueryOnSegment( new File(this.getClass().getClassLoader().getResource("quantiles/doubles_build_data.tsv").getFile()), new InputRowSchema( new TimestampSpec("timestamp", "yyyyMMddHH", null), @@ -471,29 +527,40 @@ public void timeSeriesQueryInputAsFloat() throws Exception ColumnsFilter.all() ), DelimitedInputFormat.forColumns(List.of("timestamp", "sequenceNumber", "product", "value")), - "[{\"type\": \"doubleSum\", \"name\": \"value\", \"fieldName\": \"value\"}]", + List.of( + new DoubleSumAggregatorFactory("value", "value") + ), 0, // minTimestamp Granularities.NONE, 10, // maxRowCount - String.join( - "\n", - "{", - " \"queryType\": \"timeseries\",", - " \"dataSource\": \"test_datasource\",", - " \"granularity\": \"ALL\",", - " \"aggregations\": [", - " {\"type\": \"quantilesDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"value\", \"k\": 128}", - " ],", - " \"postAggregations\": [", - " {\"type\": \"quantilesDoublesSketchToQuantile\", \"name\": \"quantile1\", \"fraction\": 0.5, \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"quantilesDoublesSketchToQuantiles\", \"name\": \"quantiles1\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"quantilesDoublesSketchToHistogram\", \"name\": \"histogram1\", \"splitPoints\": [0.25, 0.5, 0.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}}", - " ],", - " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", - "}" - ) + Druids.newTimeseriesQueryBuilder() + .dataSource("test_datasource") + .granularity(Granularities.ALL) + .intervals(Intervals.of("2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z").toString()) + .aggregators( + new DoublesSketchAggregatorFactory("sketch", "value", 128) + ) + .postAggregators( + new DoublesSketchToQuantilePostAggregator( + "quantile1", + new FieldAccessPostAggregator("field", "sketch"), + 0.5 + ), + new DoublesSketchToQuantilesPostAggregator( + "quantiles1", + new FieldAccessPostAggregator("field", "sketch"), + new double[]{0, 0.5, 1} + ), + new DoublesSketchToHistogramPostAggregator( + "histogram1", + new FieldAccessPostAggregator("field", "sketch"), + new double[]{0.25, 0.5, 0.75}, + null + ) + ) + .build() ); - List results = seq.toList(); + List> results = seq.toList(); Assert.assertEquals(1, results.size()); } @@ -508,28 +575,38 @@ public void testSuccessWhenMaxStreamLengthHit() throws Exception ColumnsFilter.all() ), DelimitedInputFormat.forColumns(List.of("timestamp", "sequenceNumber", "product", "value")), - "[{\"type\": \"doubleSum\", \"name\": \"value\", \"fieldName\": \"value\"}]", + List.of( + new DoubleSumAggregatorFactory("value", "value") + ), 0, // minTimestamp Granularities.NONE, 10, // maxRowCount - String.join( - "\n", - "{", - " \"queryType\": \"groupBy\",", - " \"dataSource\": \"test_datasource\",", - " \"granularity\": \"ALL\",", - " \"dimensions\": [],", - " \"aggregations\": [", - " {\"type\": \"quantilesDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"value\", \"k\": 128, \"maxStreamLength\": 10}", - " ],", - " \"postAggregations\": [", - " {\"type\": \"quantilesDoublesSketchToQuantile\", \"name\": \"quantile\", \"fraction\": 0.5, \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"quantilesDoublesSketchToQuantiles\", \"name\": \"quantiles\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"quantilesDoublesSketchToHistogram\", \"name\": \"histogram\", \"splitPoints\": [0.25, 0.5, 0.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}}", - " ],", - " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", - "}" - ) + GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval(Intervals.of("2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z")) + .setAggregatorSpecs( + new DoublesSketchAggregatorFactory("sketch", "value", 128, 10L, null) + ) + .setPostAggregatorSpecs( + new DoublesSketchToQuantilePostAggregator( + "quantile", + new FieldAccessPostAggregator("field", "sketch"), + 0.5 + ), + new DoublesSketchToQuantilesPostAggregator( + "quantiles", + new FieldAccessPostAggregator("field", "sketch"), + new double[]{0, 0.5, 1} + ), + new DoublesSketchToHistogramPostAggregator( + "histogram", + new FieldAccessPostAggregator("field", "sketch"), + new double[]{0.25, 0.5, 0.75}, + null + ) + ) + .build() ); seq.toList(); } diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregationTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregationTest.java index 1ea350e656e4..96bd9601522d 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregationTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregationTest.java @@ -19,11 +19,9 @@ package org.apache.druid.query.aggregation.datasketches.theta; -import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; -import com.google.common.io.Files; import org.apache.datasketches.common.Family; import org.apache.datasketches.theta.SetOperation; import org.apache.datasketches.theta.Sketch; @@ -37,22 +35,30 @@ import org.apache.druid.data.input.impl.DimensionsSpec; import org.apache.druid.data.input.impl.TimestampSpec; import org.apache.druid.java.util.common.DateTimes; +import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.guava.Sequence; -import org.apache.druid.query.Query; import org.apache.druid.query.QueryContexts; import org.apache.druid.query.aggregation.AggregationTestHelper; import org.apache.druid.query.aggregation.Aggregator; import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.CountAggregatorFactory; +import org.apache.druid.query.aggregation.FilteredAggregatorFactory; import org.apache.druid.query.aggregation.PostAggregator; import org.apache.druid.query.aggregation.TestObjectColumnSelector; import org.apache.druid.query.aggregation.post.FieldAccessPostAggregator; +import org.apache.druid.query.dimension.DefaultDimensionSpec; +import org.apache.druid.query.filter.AndDimFilter; +import org.apache.druid.query.filter.IntervalDimFilter; +import org.apache.druid.query.filter.SelectorDimFilter; import org.apache.druid.query.groupby.GroupByQuery; import org.apache.druid.query.groupby.GroupByQueryConfig; import org.apache.druid.query.groupby.GroupByQueryRunnerTest; import org.apache.druid.query.groupby.ResultRow; import org.apache.druid.query.groupby.epinephelinae.GroupByTestColumnSelectorFactory; import org.apache.druid.query.groupby.epinephelinae.GrouperTestUtil; +import org.apache.druid.query.groupby.orderby.DefaultLimitSpec; +import org.apache.druid.query.groupby.orderby.OrderByColumnSpec; import org.junit.After; import org.junit.Assert; import org.junit.Rule; @@ -63,7 +69,6 @@ import java.io.File; import java.io.IOException; -import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -115,8 +120,72 @@ public void teardown() throws IOException @Test public void testSketchDataIngestAndGpByQuery() throws Exception { - final GroupByQuery groupByQuery = - readQueryFromClasspath("sketch_test_data_group_by_query.json", helper.getObjectMapper(), vectorize); + final GroupByQuery groupByQuery = GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval("2014-10-19T00:00:00.000Z/2014-10-22T00:00:00.000Z") + .setAggregatorSpecs( + new SketchMergeAggregatorFactory("sids_sketch_count", "sids_sketch", 16384, null, null, null), + new SketchMergeAggregatorFactory("sids_sketch_count_with_err", "sids_sketch", 16384, null, null, 2), + new SketchMergeAggregatorFactory("non_existing_col_validation", "non_existing_col", 16384, null, null, null) + ) + .setPostAggregatorSpecs( + new SketchEstimatePostAggregator( + "sketchEstimatePostAgg", + new FieldAccessPostAggregator("sketchEstimatePostAgg", "sids_sketch_count"), + null + ), + new SketchEstimatePostAggregator( + "sketchEstimatePostAggWithErrorBounds", + new FieldAccessPostAggregator("sketchEstimatePostAggWithErrorBounds", "sids_sketch_count"), + 2 + ), + new SketchEstimatePostAggregator( + "sketchIntersectionPostAggEstimate", + new SketchSetPostAggregator( + "sketchIntersectionPostAgg", + "INTERSECT", + 16384, + ImmutableList.of( + new FieldAccessPostAggregator("sids_sketch_count", "sids_sketch_count"), + new FieldAccessPostAggregator("sids_sketch_count", "sids_sketch_count") + ) + ), + null + ), + new SketchEstimatePostAggregator( + "sketchAnotBPostAggEstimate", + new SketchSetPostAggregator( + "sketchAnotBUnionPostAgg", + "NOT", + null, + ImmutableList.of( + new FieldAccessPostAggregator("sids_sketch_count", "sids_sketch_count"), + new FieldAccessPostAggregator("sids_sketch_count", "sids_sketch_count") + ) + ), + null + ), + new SketchEstimatePostAggregator( + "sketchUnionPostAggEstimate", + new SketchSetPostAggregator( + "sketchUnionPostAgg", + "UNION", + 16384, + ImmutableList.of( + new FieldAccessPostAggregator("sids_sketch_count", "sids_sketch_count"), + new FieldAccessPostAggregator("sids_sketch_count", "sids_sketch_count") + ) + ), + null + ), + new SketchToStringPostAggregator( + "sketchSummary", + new FieldAccessPostAggregator("sketchSummary", "sids_sketch_count") + ) + ) + .setContext(ImmutableMap.of("vectorize", vectorize.toString())) + .build(); final Sequence seq = helper.createIndexAndRunQueryOnSegment( new File(SketchAggregationTest.class.getClassLoader().getResource("sketch_test_data.tsv").getFile()), @@ -126,7 +195,10 @@ public void testSketchDataIngestAndGpByQuery() throws Exception ColumnsFilter.all() ), DelimitedInputFormat.forColumns(List.of("timestamp", "product", "sketch")), - readFileFromClasspathAsString("sketch_test_data_aggregators.json"), + List.of( + new SketchMergeAggregatorFactory("sids_sketch", "sketch", 16384, null, true, null), + new SketchMergeAggregatorFactory("non_existing_col_validation", "non_existing_col", 16384, null, true, null) + ), 0, Granularities.NONE, 1000, @@ -180,8 +252,17 @@ public void testSketchDataIngestAndGpByQuery() throws Exception @Test public void testEmptySketchAggregateCombine() throws Exception { - final GroupByQuery groupByQuery = - readQueryFromClasspath("empty_sketch_group_by_query.json", helper.getObjectMapper(), vectorize); + final GroupByQuery groupByQuery = GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval("2019-07-14T00:00:00.000Z/2019-07-15T00:00:00.000Z") + .setDimensions(new DefaultDimensionSpec("product", "product")) + .setDimFilter(new SelectorDimFilter("product", "product_b", null)) + .setAggregatorSpecs( + new SketchMergeAggregatorFactory("sketch_count", "product_sketch", 16384, null, null, null) + ) + .setContext(ImmutableMap.of("vectorize", vectorize.toString())) + .build(); final Sequence seq = helper.createIndexAndRunQueryOnSegment( new File(SketchAggregationTest.class.getClassLoader().getResource("empty_sketch_data.tsv").getFile()), @@ -191,7 +272,7 @@ public void testEmptySketchAggregateCombine() throws Exception ColumnsFilter.all() ), DelimitedInputFormat.forColumns(List.of("timestamp", "product", "product_code", "product_sketch")), - readFileFromClasspathAsString("empty_sketch_test_data_aggregators.json"), + List.of(new SketchMergeAggregatorFactory("product_sketch", "product_sketch", 16384, null, true, null)), 0, Granularities.NONE, 5, @@ -219,8 +300,72 @@ public void testEmptySketchAggregateCombine() throws Exception @Test public void testThetaCardinalityOnSimpleColumn() throws Exception { - final GroupByQuery groupByQuery = - readQueryFromClasspath("simple_test_data_group_by_query.json", helper.getObjectMapper(), vectorize); + final GroupByQuery groupByQuery = GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval("2014-10-19T00:00:00.000Z/2014-10-22T00:00:00.000Z") + .setDimensions(new DefaultDimensionSpec("product", "product")) + .setAggregatorSpecs( + new SketchMergeAggregatorFactory("sketch_count", "pty_country", 16384, null, null, null), + new SketchMergeAggregatorFactory("non_existing_col_validation", "non_existing_col", 16384, null, null, null) + ) + .setPostAggregatorSpecs( + new SketchEstimatePostAggregator( + "sketchEstimatePostAgg", + new FieldAccessPostAggregator("sketchEstimatePostAgg", "sketch_count"), + null + ), + new SketchEstimatePostAggregator( + "sketchIntersectionPostAggEstimate", + new SketchSetPostAggregator( + "sketchIntersectionPostAgg", + "INTERSECT", + 16384, + ImmutableList.of( + new FieldAccessPostAggregator("sketch_count", "sketch_count"), + new FieldAccessPostAggregator("sketch_count", "sketch_count") + ) + ), + null + ), + new SketchEstimatePostAggregator( + "sketchAnotBPostAggEstimate", + new SketchSetPostAggregator( + "sketchAnotBUnionPostAgg", + "NOT", + 16384, + ImmutableList.of( + new FieldAccessPostAggregator("sketch_count", "sketch_count"), + new FieldAccessPostAggregator("sketch_count", "sketch_count") + ) + ), + null + ), + new SketchEstimatePostAggregator( + "sketchUnionPostAggEstimate", + new SketchSetPostAggregator( + "sketchUnionPostAgg", + "UNION", + 16384, + ImmutableList.of( + new FieldAccessPostAggregator("sketch_count", "sketch_count"), + new FieldAccessPostAggregator("sketch_count", "sketch_count") + ) + ), + null + ) + ) + .setLimitSpec( + new DefaultLimitSpec( + ImmutableList.of( + new OrderByColumnSpec("sketchEstimatePostAgg", OrderByColumnSpec.Direction.ASCENDING), + new OrderByColumnSpec("product", OrderByColumnSpec.Direction.ASCENDING) + ), + null + ) + ) + .setContext(ImmutableMap.of("vectorize", vectorize.toString())) + .build(); final Sequence seq = helper.createIndexAndRunQueryOnSegment( new File(SketchAggregationTest.class.getClassLoader().getResource("simple_test_data.tsv").getFile()), @@ -230,12 +375,7 @@ public void testThetaCardinalityOnSimpleColumn() throws Exception ColumnsFilter.all() ), DelimitedInputFormat.forColumns(List.of("timestamp", "product", "pty_country")), - "[" - + " {" - + " \"type\": \"count\"," - + " \"name\": \"count\"" - + " }" - + "]", + List.of(new CountAggregatorFactory("count")), 0, Granularities.NONE, 1000, @@ -450,8 +590,71 @@ public void testCacheKey() @Test public void testRetentionDataIngestAndGpByQuery() throws Exception { - final GroupByQuery groupByQuery = - readQueryFromClasspath("retention_test_data_group_by_query.json", helper.getObjectMapper(), vectorize); + final GroupByQuery groupByQuery = GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval("2014-10-19T00:00:00.000Z/2014-10-23T00:00:00.000Z") + .setDimensions(new DefaultDimensionSpec("product", "product")) + .setDimFilter(new SelectorDimFilter("product", "product_1", null)) + .setAggregatorSpecs( + new FilteredAggregatorFactory( + new SketchMergeAggregatorFactory("p1_unique_country_day_1", "pty_country", null, null, null, null), + new AndDimFilter(ImmutableList.of( + new SelectorDimFilter("product", "product_1", null), + new IntervalDimFilter("__time", ImmutableList.of(Intervals.of("2014-10-20T00:00:00.000Z/2014-10-21T00:00:00.000Z")), null) + )) + ), + new FilteredAggregatorFactory( + new SketchMergeAggregatorFactory("p1_unique_country_day_2", "pty_country", null, null, null, null), + new AndDimFilter(ImmutableList.of( + new SelectorDimFilter("product", "product_1", null), + new IntervalDimFilter("__time", ImmutableList.of(Intervals.of("2014-10-21T00:00:00.000Z/2014-10-22T00:00:00.000Z")), null) + )) + ), + new FilteredAggregatorFactory( + new SketchMergeAggregatorFactory("p1_unique_country_day_3", "pty_country", null, null, null, null), + new AndDimFilter(ImmutableList.of( + new SelectorDimFilter("product", "product_1", null), + new IntervalDimFilter("__time", ImmutableList.of(Intervals.of("2014-10-22T00:00:00.000Z/2014-10-23T00:00:00.000Z")), null) + )) + ), + new SketchMergeAggregatorFactory("non_existing_col_validation", "non_existing_col", 16384, null, null, null) + ) + .setPostAggregatorSpecs( + new SketchEstimatePostAggregator( + "sketchEstimatePostAgg", + new FieldAccessPostAggregator("sketchEstimatePostAgg", "p1_unique_country_day_1"), + null + ), + new SketchEstimatePostAggregator( + "sketchIntersectionPostAggEstimate1", + new SketchSetPostAggregator( + "sketchIntersectionPostAgg", + "INTERSECT", + 16384, + ImmutableList.of( + new FieldAccessPostAggregator("p1_unique_country_day_1", "p1_unique_country_day_1"), + new FieldAccessPostAggregator("p1_unique_country_day_2", "p1_unique_country_day_2") + ) + ), + null + ), + new SketchEstimatePostAggregator( + "sketchIntersectionPostAggEstimate2", + new SketchSetPostAggregator( + "sketchIntersectionPostAgg2", + "INTERSECT", + 16384, + ImmutableList.of( + new FieldAccessPostAggregator("p1_unique_country_day_1", "p1_unique_country_day_1"), + new FieldAccessPostAggregator("p1_unique_country_day_3", "p1_unique_country_day_3") + ) + ), + null + ) + ) + .setContext(ImmutableMap.of("vectorize", vectorize.toString())) + .build(); final Sequence seq = helper.createIndexAndRunQueryOnSegment( new File(this.getClass().getClassLoader().getResource("retention_test_data.tsv").getFile()), @@ -461,7 +664,10 @@ public void testRetentionDataIngestAndGpByQuery() throws Exception ColumnsFilter.all() ), DelimitedInputFormat.forColumns(List.of("timestamp", "product", "pty_country")), - readFileFromClasspathAsString("simple_test_data_aggregators.json"), + List.of( + new SketchMergeAggregatorFactory("pty_country", "pty_country", null, null, null, null), + new SketchMergeAggregatorFactory("non_existing_col_validation", "non_existing_col", null, null, null, null) + ), 0, Granularities.NONE, 5, @@ -623,24 +829,4 @@ private void assertPostAggregatorSerde(PostAggregator agg) throws Exception ); } - public static > Q readQueryFromClasspath( - final String fileName, - final ObjectMapper objectMapper, - final QueryContexts.Vectorize vectorize - ) throws IOException - { - final String queryString = readFileFromClasspathAsString(fileName); - - //noinspection unchecked - return (Q) objectMapper.readValue(queryString, Query.class) - .withOverriddenContext(ImmutableMap.of("vectorize", vectorize.toString())); - } - - public static String readFileFromClasspathAsString(String fileName) throws IOException - { - return Files.asCharSource( - new File(SketchAggregationTest.class.getClassLoader().getResource(fileName).getFile()), - StandardCharsets.UTF_8 - ).read(); - } } diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregationWithSimpleDataTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregationWithSimpleDataTest.java index bf83d3c45a94..fee143ce7f65 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregationWithSimpleDataTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregationWithSimpleDataTest.java @@ -22,7 +22,6 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterables; -import com.google.common.io.Files; import org.apache.druid.data.input.ColumnsFilter; import org.apache.druid.data.input.InputFormat; import org.apache.druid.data.input.InputRowSchema; @@ -33,16 +32,23 @@ import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.guava.Sequence; -import org.apache.druid.query.Query; +import org.apache.druid.query.Druids; import org.apache.druid.query.QueryContexts; import org.apache.druid.query.Result; import org.apache.druid.query.aggregation.AggregationTestHelper; +import org.apache.druid.query.aggregation.post.FieldAccessPostAggregator; +import org.apache.druid.query.dimension.DefaultDimensionSpec; import org.apache.druid.query.groupby.GroupByQuery; import org.apache.druid.query.groupby.GroupByQueryConfig; import org.apache.druid.query.groupby.GroupByQueryRunnerTest; import org.apache.druid.query.groupby.ResultRow; +import org.apache.druid.query.groupby.orderby.DefaultLimitSpec; +import org.apache.druid.query.groupby.orderby.OrderByColumnSpec; import org.apache.druid.query.timeseries.TimeseriesResultValue; import org.apache.druid.query.topn.DimensionAndMetricValueExtractor; +import org.apache.druid.query.topn.InvertedTopNMetricSpec; +import org.apache.druid.query.topn.NumericTopNMetricSpec; +import org.apache.druid.query.topn.TopNQueryBuilder; import org.apache.druid.query.topn.TopNResultValue; import org.apache.druid.testing.InitializedNullHandlingTest; import org.junit.Assert; @@ -54,8 +60,6 @@ import org.junit.runners.Parameterized; import java.io.File; -import java.io.IOException; -import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collection; import java.util.List; @@ -118,7 +122,10 @@ public void setup() throws Exception new File(this.getClass().getClassLoader().getResource("simple_test_data.tsv").getFile()), schema, format, - readFileFromClasspathAsString("simple_test_data_aggregators.json"), + List.of( + new SketchMergeAggregatorFactory("pty_country", "pty_country", null, null, null, null), + new SketchMergeAggregatorFactory("non_existing_col_validation", "non_existing_col", null, null, null, null) + ), s1, 0, Granularities.NONE, @@ -130,7 +137,10 @@ public void setup() throws Exception new File(this.getClass().getClassLoader().getResource("simple_test_data.tsv").getFile()), schema, format, - readFileFromClasspathAsString("simple_test_data_aggregators.json"), + List.of( + new SketchMergeAggregatorFactory("pty_country", "pty_country", null, null, null, null), + new SketchMergeAggregatorFactory("non_existing_col_validation", "non_existing_col", null, null, null, null) + ), s2, 0, Granularities.NONE, @@ -149,11 +159,72 @@ public void testSimpleDataIngestAndGpByQuery() throws Exception tempFolder ) ) { - final GroupByQuery groupByQuery = SketchAggregationTest.readQueryFromClasspath( - "simple_test_data_group_by_query.json", - gpByQueryAggregationTestHelper.getObjectMapper(), - vectorize - ); + final GroupByQuery groupByQuery = GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval("2014-10-19T00:00:00.000Z/2014-10-22T00:00:00.000Z") + .setDimensions(new DefaultDimensionSpec("product", "product")) + .setAggregatorSpecs( + new SketchMergeAggregatorFactory("sketch_count", "pty_country", 16384, null, null, null), + new SketchMergeAggregatorFactory("non_existing_col_validation", "non_existing_col", 16384, null, null, null) + ) + .setPostAggregatorSpecs( + new SketchEstimatePostAggregator( + "sketchEstimatePostAgg", + new FieldAccessPostAggregator("sketchEstimatePostAgg", "sketch_count"), + null + ), + new SketchEstimatePostAggregator( + "sketchIntersectionPostAggEstimate", + new SketchSetPostAggregator( + "sketchIntersectionPostAgg", + "INTERSECT", + 16384, + ImmutableList.of( + new FieldAccessPostAggregator("sketch_count", "sketch_count"), + new FieldAccessPostAggregator("sketch_count", "sketch_count") + ) + ), + null + ), + new SketchEstimatePostAggregator( + "sketchAnotBPostAggEstimate", + new SketchSetPostAggregator( + "sketchAnotBUnionPostAgg", + "NOT", + 16384, + ImmutableList.of( + new FieldAccessPostAggregator("sketch_count", "sketch_count"), + new FieldAccessPostAggregator("sketch_count", "sketch_count") + ) + ), + null + ), + new SketchEstimatePostAggregator( + "sketchUnionPostAggEstimate", + new SketchSetPostAggregator( + "sketchUnionPostAgg", + "UNION", + 16384, + ImmutableList.of( + new FieldAccessPostAggregator("sketch_count", "sketch_count"), + new FieldAccessPostAggregator("sketch_count", "sketch_count") + ) + ), + null + ) + ) + .setLimitSpec( + new DefaultLimitSpec( + ImmutableList.of( + new OrderByColumnSpec("sketchEstimatePostAgg", OrderByColumnSpec.Direction.ASCENDING), + new OrderByColumnSpec("product", OrderByColumnSpec.Direction.ASCENDING) + ), + null + ) + ) + .setContext(ImmutableMap.of("vectorize", vectorize.toString())) + .build(); Sequence seq = gpByQueryAggregationTestHelper.runQueryOnSegments( ImmutableList.of(s1, s2), @@ -243,16 +314,67 @@ public void testSimpleDataIngestAndTimeseriesQuery() throws Exception tempFolder ); - Sequence seq = timeseriesQueryAggregationTestHelper.runQueryOnSegments( + Sequence> seq = timeseriesQueryAggregationTestHelper.runQueryOnSegments( ImmutableList.of(s1, s2), - (Query) SketchAggregationTest.readQueryFromClasspath( - "timeseries_query.json", - timeseriesQueryAggregationTestHelper.getObjectMapper(), - vectorize - ) + Druids.newTimeseriesQueryBuilder() + .dataSource("test_datasource") + .granularity(Granularities.ALL) + .intervals("2014-10-19T00:00:00.000Z/2014-10-22T00:00:00.000Z") + .aggregators( + new SketchMergeAggregatorFactory("sketch_count", "pty_country", 16384, null, null, null), + new SketchMergeAggregatorFactory("non_existing_col_validation", "non_existing_col", 16384, null, null, null) + ) + .postAggregators( + new SketchEstimatePostAggregator( + "sketchEstimatePostAgg", + new FieldAccessPostAggregator("sketchEstimatePostAgg", "sketch_count"), + null + ), + new SketchEstimatePostAggregator( + "sketchIntersectionPostAggEstimate", + new SketchSetPostAggregator( + "sketchIntersectionPostAgg", + "INTERSECT", + 16384, + ImmutableList.of( + new FieldAccessPostAggregator("sketch_count", "sketch_count"), + new FieldAccessPostAggregator("sketch_count", "sketch_count") + ) + ), + null + ), + new SketchEstimatePostAggregator( + "sketchAnotBPostAggEstimate", + new SketchSetPostAggregator( + "sketchAnotBUnionPostAgg", + "NOT", + 16384, + ImmutableList.of( + new FieldAccessPostAggregator("sketch_count", "sketch_count"), + new FieldAccessPostAggregator("sketch_count", "sketch_count") + ) + ), + null + ), + new SketchEstimatePostAggregator( + "sketchUnionPostAggEstimate", + new SketchSetPostAggregator( + "sketchUnionPostAgg", + "UNION", + 16384, + ImmutableList.of( + new FieldAccessPostAggregator("sketch_count", "sketch_count"), + new FieldAccessPostAggregator("sketch_count", "sketch_count") + ) + ), + null + ) + ) + .context(ImmutableMap.of("vectorize", vectorize.toString())) + .build() ); - Result result = (Result) Iterables.getOnlyElement(seq.toList()); + Result result = Iterables.getOnlyElement(seq.toList()); Assert.assertEquals(DateTimes.of("2014-10-20T00:00:00.000Z"), result.getTimestamp()); @@ -273,16 +395,70 @@ public void testSimpleDataIngestAndTopNQuery() throws Exception tempFolder ); - Sequence seq = topNQueryAggregationTestHelper.runQueryOnSegments( + Sequence> seq = topNQueryAggregationTestHelper.runQueryOnSegments( ImmutableList.of(s1, s2), - (Query) SketchAggregationTest.readQueryFromClasspath( - "topn_query.json", - topNQueryAggregationTestHelper.getObjectMapper(), - vectorize - ) + new TopNQueryBuilder() + .dataSource("test_datasource") + .granularity(Granularities.ALL) + .intervals("2014-10-19T00:00:00.000Z/2014-10-22T00:00:00.000Z") + .dimension(new DefaultDimensionSpec("product", "product")) + .metric(new InvertedTopNMetricSpec(new NumericTopNMetricSpec("sketch_count"))) + .threshold(1) + .aggregators( + new SketchMergeAggregatorFactory("sketch_count", "pty_country", 16384, null, null, null), + new SketchMergeAggregatorFactory("non_existing_col_validation", "non_existing_col", 16384, null, null, null) + ) + .postAggregators( + new SketchEstimatePostAggregator( + "sketchEstimatePostAgg", + new FieldAccessPostAggregator("sketchEstimatePostAgg", "sketch_count"), + null + ), + new SketchEstimatePostAggregator( + "sketchIntersectionPostAggEstimate", + new SketchSetPostAggregator( + "sketchIntersectionPostAgg", + "INTERSECT", + 16384, + ImmutableList.of( + new FieldAccessPostAggregator("sketch_count", "sketch_count"), + new FieldAccessPostAggregator("sketch_count", "sketch_count") + ) + ), + null + ), + new SketchEstimatePostAggregator( + "sketchAnotBPostAggEstimate", + new SketchSetPostAggregator( + "sketchAnotBUnionPostAgg", + "NOT", + 16384, + ImmutableList.of( + new FieldAccessPostAggregator("sketch_count", "sketch_count"), + new FieldAccessPostAggregator("sketch_count", "sketch_count") + ) + ), + null + ), + new SketchEstimatePostAggregator( + "sketchUnionPostAggEstimate", + new SketchSetPostAggregator( + "sketchUnionPostAgg", + "UNION", + 16384, + ImmutableList.of( + new FieldAccessPostAggregator("sketch_count", "sketch_count"), + new FieldAccessPostAggregator("sketch_count", "sketch_count") + ) + ), + null + ) + ) + .context(ImmutableMap.of("vectorize", vectorize.toString())) + .build() ); - Result result = (Result) Iterables.getOnlyElement(seq.toList()); + Result result = Iterables.getOnlyElement(seq.toList()); Assert.assertEquals(DateTimes.of("2014-10-20T00:00:00.000Z"), result.getTimestamp()); @@ -304,16 +480,75 @@ public void testTopNQueryWithSketchConstant() throws Exception tempFolder ); - Sequence seq = topNQueryAggregationTestHelper.runQueryOnSegments( + final String sketchConst = "AgMDAAAazJMCAAAAAACAPzz9j7pWTMdROWGf15uY1nI="; + Sequence> seq = topNQueryAggregationTestHelper.runQueryOnSegments( ImmutableList.of(s1, s2), - (Query) SketchAggregationTest.readQueryFromClasspath( - "topn_query_sketch_const.json", - topNQueryAggregationTestHelper.getObjectMapper(), - vectorize - ) + new TopNQueryBuilder() + .dataSource("test_datasource") + .granularity(Granularities.ALL) + .intervals("2014-10-19T00:00:00.000Z/2014-10-22T00:00:00.000Z") + .dimension(new DefaultDimensionSpec("product", "product")) + .metric(new InvertedTopNMetricSpec(new NumericTopNMetricSpec("sketch_count"))) + .threshold(3) + .aggregators( + new SketchMergeAggregatorFactory("sketch_count", "pty_country", 16384, null, null, null) + ) + .postAggregators( + new SketchEstimatePostAggregator( + "sketchEstimatePostAgg", + new FieldAccessPostAggregator("sketchEstimatePostAgg", "sketch_count"), + null + ), + new SketchEstimatePostAggregator( + "sketchEstimatePostAggForSketchConstant", + new SketchConstantPostAggregator("theta_sketch_count", sketchConst), + null + ), + new SketchEstimatePostAggregator( + "sketchIntersectionPostAggEstimate", + new SketchSetPostAggregator( + "sketchIntersectionPostAgg", + "INTERSECT", + 16384, + ImmutableList.of( + new FieldAccessPostAggregator("sketch_count", "sketch_count"), + new SketchConstantPostAggregator("theta_sketch_count", sketchConst) + ) + ), + null + ), + new SketchEstimatePostAggregator( + "sketchAnotBPostAggEstimate", + new SketchSetPostAggregator( + "sketchAnotBUnionPostAgg", + "NOT", + 16384, + ImmutableList.of( + new FieldAccessPostAggregator("sketch_count", "sketch_count"), + new SketchConstantPostAggregator("theta_sketch_count", sketchConst) + ) + ), + null + ), + new SketchEstimatePostAggregator( + "sketchUnionPostAggEstimate", + new SketchSetPostAggregator( + "sketchUnionPostAgg", + "UNION", + 16384, + ImmutableList.of( + new FieldAccessPostAggregator("sketch_count", "sketch_count"), + new SketchConstantPostAggregator("theta_sketch_count", sketchConst) + ) + ), + null + ) + ) + .context(ImmutableMap.of("vectorize", vectorize.toString())) + .build() ); - Result result = (Result) Iterables.getOnlyElement(seq.toList()); + Result result = Iterables.getOnlyElement(seq.toList()); Assert.assertEquals(DateTimes.of("2014-10-20T00:00:00.000Z"), result.getTimestamp()); @@ -345,11 +580,4 @@ public void testTopNQueryWithSketchConstant() throws Exception Assert.assertEquals("product_2", value3.getDimensionValue("product")); } - public static String readFileFromClasspathAsString(String fileName) throws IOException - { - return Files.asCharSource( - new File(SketchAggregationTest.class.getClassLoader().getResource(fileName).getFile()), - StandardCharsets.UTF_8 - ).read(); - } } diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/oldapi/OldApiSketchAggregationTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/oldapi/OldApiSketchAggregationTest.java index 45256bdb75f7..70ee94ec4475 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/oldapi/OldApiSketchAggregationTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/oldapi/OldApiSketchAggregationTest.java @@ -21,7 +21,6 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; -import com.google.common.io.Files; import org.apache.datasketches.theta.Sketches; import org.apache.datasketches.theta.UpdateSketch; import org.apache.druid.data.input.ColumnsFilter; @@ -31,9 +30,9 @@ import org.apache.druid.data.input.impl.DimensionsSpec; import org.apache.druid.data.input.impl.TimestampSpec; import org.apache.druid.java.util.common.DateTimes; +import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.guava.Sequence; -import org.apache.druid.query.Query; import org.apache.druid.query.aggregation.AggregationTestHelper; import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.aggregation.PostAggregator; @@ -56,7 +55,6 @@ import java.io.File; import java.io.IOException; -import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collection; import java.util.List; @@ -103,9 +101,57 @@ public void teardown() throws IOException @Test public void testSimpleDataIngestAndQuery() throws Exception { - final String groupByQueryString = readFileFromClasspathAsString("oldapi/old_simple_test_data_group_by_query.json"); - final GroupByQuery groupByQuery = (GroupByQuery) helper.getObjectMapper() - .readValue(groupByQueryString, Query.class); + final GroupByQuery groupByQuery = GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval(Intervals.of("2014-10-19T00:00:00.000Z/2014-10-22T00:00:00.000Z")) + .setAggregatorSpecs( + new OldSketchMergeAggregatorFactory("sketch_count", "pty_country", 16384, null), + new OldSketchMergeAggregatorFactory("non_existing_col_validation", "non_existing_col", 16384, null) + ) + .setPostAggregatorSpecs( + new OldSketchEstimatePostAggregator( + "sketchEstimatePostAgg", + new FieldAccessPostAggregator("field", "sketch_count") + ), + new OldSketchEstimatePostAggregator( + "sketchIntersectionPostAggEstimate", + new OldSketchSetPostAggregator( + "sketchIntersectionPostAgg", + "INTERSECT", + 16384, + Lists.newArrayList( + new FieldAccessPostAggregator("field1", "sketch_count"), + new FieldAccessPostAggregator("field2", "sketch_count") + ) + ) + ), + new OldSketchEstimatePostAggregator( + "sketchAnotBPostAggEstimate", + new OldSketchSetPostAggregator( + "sketchAnotBUnionPostAgg", + "NOT", + 16384, + Lists.newArrayList( + new FieldAccessPostAggregator("field1", "sketch_count"), + new FieldAccessPostAggregator("field2", "sketch_count") + ) + ) + ), + new OldSketchEstimatePostAggregator( + "sketchUnionPostAggEstimate", + new OldSketchSetPostAggregator( + "sketchUnionPostAgg", + "UNION", + 16384, + Lists.newArrayList( + new FieldAccessPostAggregator("field1", "sketch_count"), + new FieldAccessPostAggregator("field2", "sketch_count") + ) + ) + ) + ) + .build(); final Sequence seq = helper.createIndexAndRunQueryOnSegment( new File(this.getClass().getClassLoader().getResource("simple_test_data.tsv").getFile()), @@ -115,11 +161,14 @@ public void testSimpleDataIngestAndQuery() throws Exception ColumnsFilter.all() ), DelimitedInputFormat.forColumns(List.of("timestamp", "product", "pty_country")), - readFileFromClasspathAsString("oldapi/old_simple_test_data_aggregators.json"), + List.of( + new OldSketchBuildAggregatorFactory("pty_country", "pty_country", null), + new OldSketchBuildAggregatorFactory("non_existing_col_validation", "non_existing_col", null) + ), 0, Granularities.NONE, 1000, - groupByQueryString + groupByQuery ); List results = seq.toList(); @@ -147,9 +196,57 @@ public void testSimpleDataIngestAndQuery() throws Exception @Test public void testSketchDataIngestAndQuery() throws Exception { - final String groupByQueryString = readFileFromClasspathAsString("oldapi/old_sketch_test_data_group_by_query.json"); - final GroupByQuery groupByQuery = (GroupByQuery) helper.getObjectMapper() - .readValue(groupByQueryString, Query.class); + final GroupByQuery groupByQuery = GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval(Intervals.of("2014-10-19T00:00:00.000Z/2014-10-22T00:00:00.000Z")) + .setAggregatorSpecs( + new OldSketchMergeAggregatorFactory("sids_sketch_count", "sids_sketch", 16384, null), + new OldSketchMergeAggregatorFactory("non_existing_col_validation", "non_existing_col", 16384, null) + ) + .setPostAggregatorSpecs( + new OldSketchEstimatePostAggregator( + "sketchEstimatePostAgg", + new FieldAccessPostAggregator("field", "sids_sketch_count") + ), + new OldSketchEstimatePostAggregator( + "sketchIntersectionPostAggEstimate", + new OldSketchSetPostAggregator( + "sketchIntersectionPostAgg", + "INTERSECT", + 16384, + Lists.newArrayList( + new FieldAccessPostAggregator("field1", "sids_sketch_count"), + new FieldAccessPostAggregator("field2", "sids_sketch_count") + ) + ) + ), + new OldSketchEstimatePostAggregator( + "sketchAnotBPostAggEstimate", + new OldSketchSetPostAggregator( + "sketchAnotBUnionPostAgg", + "NOT", + null, + Lists.newArrayList( + new FieldAccessPostAggregator("field1", "sids_sketch_count"), + new FieldAccessPostAggregator("field2", "sids_sketch_count") + ) + ) + ), + new OldSketchEstimatePostAggregator( + "sketchUnionPostAggEstimate", + new OldSketchSetPostAggregator( + "sketchUnionPostAgg", + "UNION", + 16384, + Lists.newArrayList( + new FieldAccessPostAggregator("field1", "sids_sketch_count"), + new FieldAccessPostAggregator("field2", "sids_sketch_count") + ) + ) + ) + ) + .build(); final Sequence seq = helper.createIndexAndRunQueryOnSegment( new File(OldApiSketchAggregationTest.class.getClassLoader().getResource("sketch_test_data.tsv").getFile()), @@ -159,11 +256,14 @@ public void testSketchDataIngestAndQuery() throws Exception ColumnsFilter.all() ), DelimitedInputFormat.forColumns(List.of("timestamp", "product", "sketch")), - readFileFromClasspathAsString("oldapi/old_sketch_test_data_aggregators.json"), + List.of( + new OldSketchMergeAggregatorFactory("sids_sketch", "sketch", 16384, null), + new OldSketchMergeAggregatorFactory("non_existing_col_validation", "non_existing_col", 16384, null) + ), 0, Granularities.NONE, 1000, - groupByQueryString + groupByQuery ); List results = seq.toList(); @@ -284,12 +384,4 @@ private void assertPostAggregatorSerde(PostAggregator agg) throws Exception ) ); } - - public static String readFileFromClasspathAsString(String fileName) throws IOException - { - return Files.asCharSource( - new File(OldApiSketchAggregationTest.class.getClassLoader().getResource(fileName).getFile()), - StandardCharsets.UTF_8 - ).read(); - } } diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/tuple/ArrayOfDoublesSketchAggregationTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/tuple/ArrayOfDoublesSketchAggregationTest.java index 42bd8f799005..4adc2df9508c 100755 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/tuple/ArrayOfDoublesSketchAggregationTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/tuple/ArrayOfDoublesSketchAggregationTest.java @@ -19,6 +19,7 @@ package org.apache.druid.query.aggregation.datasketches.tuple; +import com.google.common.collect.ImmutableList; import org.apache.datasketches.quantiles.DoublesSketch; import org.apache.druid.data.input.ColumnsFilter; import org.apache.druid.data.input.InputRowSchema; @@ -30,12 +31,22 @@ import org.apache.druid.initialization.DruidModule; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.guava.Sequence; +import org.apache.druid.query.Druids; import org.apache.druid.query.Result; import org.apache.druid.query.aggregation.AggregationTestHelper; +import org.apache.druid.query.aggregation.CountAggregatorFactory; +import org.apache.druid.query.aggregation.DoubleSumAggregatorFactory; +import org.apache.druid.query.aggregation.FilteredAggregatorFactory; +import org.apache.druid.query.aggregation.post.FieldAccessPostAggregator; +import org.apache.druid.query.expression.TestExprMacroTable; +import org.apache.druid.query.filter.SelectorDimFilter; +import org.apache.druid.query.groupby.GroupByQuery; import org.apache.druid.query.groupby.GroupByQueryConfig; import org.apache.druid.query.groupby.GroupByQueryRunnerTest; import org.apache.druid.query.groupby.ResultRow; import org.apache.druid.query.timeseries.TimeseriesResultValue; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.virtual.ExpressionVirtualColumn; import org.apache.druid.testing.InitializedNullHandlingTest; import org.junit.After; import org.junit.Assert; @@ -95,58 +106,77 @@ public void ingestingSketches() throws Exception ColumnsFilter.all() ), DelimitedInputFormat.forColumns(List.of("timestamp", "product", "sketch")), - String.join( - "\n", - "[", - " {\"type\": \"arrayOfDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"sketch\", \"nominalEntries\": 1024},", - " {\"type\": \"arrayOfDoublesSketch\", \"name\": \"non_existing_sketch\", \"fieldName\": \"non_existing_sketch\"}", - "]" + List.of( + new ArrayOfDoublesSketchAggregatorFactory("sketch", "sketch", 1024, null, null), + new ArrayOfDoublesSketchAggregatorFactory("non_existing_sketch", "non_existing_sketch", null, null, null) ), 0, // minTimestamp Granularities.NONE, 10, // maxRowCount - String.join( - "\n", - "{", - " \"queryType\": \"groupBy\",", - " \"dataSource\": \"test_datasource\",", - " \"granularity\": \"ALL\",", - " \"dimensions\": [],", - " \"aggregations\": [", - " {\"type\": \"arrayOfDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"sketch\", \"nominalEntries\": 1024},", - " {\"type\": \"arrayOfDoublesSketch\", \"name\": \"non_existing_sketch\", \"fieldName\": \"non_existing_sketch\"}", - " ],", - " \"postAggregations\": [", - " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"estimate\", \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"arrayOfDoublesSketchToEstimateAndBounds\", \"name\": \"estimateAndBounds\", \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, \"numStdDevs\": 2},", - " {\"type\": \"arrayOfDoublesSketchToQuantilesSketch\", \"name\": \"quantiles-sketch\", \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"union\", \"field\": {", - " \"type\": \"arrayOfDoublesSketchSetOp\",", - " \"name\": \"union\",", - " \"operation\": \"UNION\",", - " \"nominalEntries\": 1024,", - " \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", - " }},", - " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"intersection\", \"field\": {", - " \"type\": \"arrayOfDoublesSketchSetOp\",", - " \"name\": \"intersection\",", - " \"operation\": \"INTERSECT\",", - " \"nominalEntries\": 1024,", - " \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", - " }},", - " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"anotb\", \"field\": {", - " \"type\": \"arrayOfDoublesSketchSetOp\",", - " \"name\": \"anotb\",", - " \"operation\": \"NOT\",", - " \"nominalEntries\": 1024,", - " \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", - " }},", - " {\"type\": \"arrayOfDoublesSketchToString\", \"name\": \"summary\", \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"arrayOfDoublesSketchToVariances\", \"name\": \"variances\", \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}}", - " ],", - " \"intervals\": [\"2015-01-01T00:00:00.000Z/2015-01-31T00:00:00.000Z\"]", - "}" - ) + GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval("2015-01-01T00:00:00.000Z/2015-01-31T00:00:00.000Z") + .setAggregatorSpecs( + new ArrayOfDoublesSketchAggregatorFactory("sketch", "sketch", 1024, null, null), + new ArrayOfDoublesSketchAggregatorFactory("non_existing_sketch", "non_existing_sketch", null, null, null) + ) + .setPostAggregatorSpecs( + new ArrayOfDoublesSketchToEstimatePostAggregator( + "estimate", + new FieldAccessPostAggregator("sketch", "sketch") + ), + new ArrayOfDoublesSketchToEstimateAndBoundsPostAggregator( + "estimateAndBounds", + new FieldAccessPostAggregator("sketch", "sketch"), + 2 + ), + new ArrayOfDoublesSketchToQuantilesSketchPostAggregator( + "quantiles-sketch", + new FieldAccessPostAggregator("sketch", "sketch"), + null, + null + ), + new ArrayOfDoublesSketchToEstimatePostAggregator( + "union", + new ArrayOfDoublesSketchSetOpPostAggregator( + "union", "UNION", 1024, null, + ImmutableList.of( + new FieldAccessPostAggregator("sketch", "sketch"), + new FieldAccessPostAggregator("sketch", "sketch") + ) + ) + ), + new ArrayOfDoublesSketchToEstimatePostAggregator( + "intersection", + new ArrayOfDoublesSketchSetOpPostAggregator( + "intersection", "INTERSECT", 1024, null, + ImmutableList.of( + new FieldAccessPostAggregator("sketch", "sketch"), + new FieldAccessPostAggregator("sketch", "sketch") + ) + ) + ), + new ArrayOfDoublesSketchToEstimatePostAggregator( + "anotb", + new ArrayOfDoublesSketchSetOpPostAggregator( + "anotb", "NOT", 1024, null, + ImmutableList.of( + new FieldAccessPostAggregator("sketch", "sketch"), + new FieldAccessPostAggregator("sketch", "sketch") + ) + ) + ), + new ArrayOfDoublesSketchToStringPostAggregator( + "summary", + new FieldAccessPostAggregator("sketch", "sketch") + ), + new ArrayOfDoublesSketchToVariancesPostAggregator( + "variances", + new FieldAccessPostAggregator("sketch", "sketch") + ) + ) + .build() ); List results = seq.toList(); Assert.assertEquals(1, results.size()); @@ -195,61 +225,66 @@ public void ingestingSketchesTwoValues() throws Exception DelimitedInputFormat.forColumns( List.of("timestamp", "product", "sketch") ), - String.join( - "\n", - "[", - " {\"type\": \"arrayOfDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"sketch\", \"nominalEntries\": 1024, \"numberOfValues\": 2}", - "]" + List.of( + new ArrayOfDoublesSketchAggregatorFactory("sketch", "sketch", 1024, null, 2) ), 0, // minTimestamp Granularities.NONE, 10, // maxRowCount - String.join( - "\n", - "{", - " \"queryType\": \"groupBy\",", - " \"dataSource\": \"test_datasource\",", - " \"granularity\": \"ALL\",", - " \"dimensions\": [],", - " \"aggregations\": [", - " {\"type\": \"arrayOfDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"sketch\", \"nominalEntries\": 1024, \"numberOfValues\": 2}", - " ],", - " \"postAggregations\": [", - " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"estimate\", \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"arrayOfDoublesSketchToQuantilesSketch\", \"name\": \"quantiles-sketch\", \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"union\", \"field\": {", - " \"type\": \"arrayOfDoublesSketchSetOp\",", - " \"name\": \"union\",", - " \"operation\": \"UNION\",", - " \"nominalEntries\": 1024,", - " \"numberOfValues\": 2,", - " \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", - " }},", - " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"intersection\", \"field\": {", - " \"type\": \"arrayOfDoublesSketchSetOp\",", - " \"name\": \"intersection\",", - " \"operation\": \"INTERSECT\",", - " \"nominalEntries\": 1024,", - " \"numberOfValues\": 2,", - " \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", - " }},", - " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"anotb\", \"field\": {", - " \"type\": \"arrayOfDoublesSketchSetOp\",", - " \"name\": \"anotb\",", - " \"operation\": \"NOT\",", - " \"nominalEntries\": 1024,", - " \"numberOfValues\": 2,", - " \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", - " }},", - " {", - " \"type\": \"arrayOfDoublesSketchToMeans\",", - " \"name\": \"means\",", - " \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}", - " }", - " ],", - " \"intervals\": [\"2015-01-01T00:00:00.000Z/2015-01-31T00:00:00.000Z\"]", - "}" - ) + GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval("2015-01-01T00:00:00.000Z/2015-01-31T00:00:00.000Z") + .setAggregatorSpecs( + new ArrayOfDoublesSketchAggregatorFactory("sketch", "sketch", 1024, null, 2) + ) + .setPostAggregatorSpecs( + new ArrayOfDoublesSketchToEstimatePostAggregator( + "estimate", + new FieldAccessPostAggregator("sketch", "sketch") + ), + new ArrayOfDoublesSketchToQuantilesSketchPostAggregator( + "quantiles-sketch", + new FieldAccessPostAggregator("sketch", "sketch"), + null, + null + ), + new ArrayOfDoublesSketchToEstimatePostAggregator( + "union", + new ArrayOfDoublesSketchSetOpPostAggregator( + "union", "UNION", 1024, 2, + ImmutableList.of( + new FieldAccessPostAggregator("sketch", "sketch"), + new FieldAccessPostAggregator("sketch", "sketch") + ) + ) + ), + new ArrayOfDoublesSketchToEstimatePostAggregator( + "intersection", + new ArrayOfDoublesSketchSetOpPostAggregator( + "intersection", "INTERSECT", 1024, 2, + ImmutableList.of( + new FieldAccessPostAggregator("sketch", "sketch"), + new FieldAccessPostAggregator("sketch", "sketch") + ) + ) + ), + new ArrayOfDoublesSketchToEstimatePostAggregator( + "anotb", + new ArrayOfDoublesSketchSetOpPostAggregator( + "anotb", "NOT", 1024, 2, + ImmutableList.of( + new FieldAccessPostAggregator("sketch", "sketch"), + new FieldAccessPostAggregator("sketch", "sketch") + ) + ) + ), + new ArrayOfDoublesSketchToMeansPostAggregator( + "means", + new FieldAccessPostAggregator("sketch", "sketch") + ) + ) + .build() ); List results = seq.toList(); Assert.assertEquals(1, results.size()); @@ -288,53 +323,62 @@ public void buildingSketchesAtIngestionTime() throws Exception DelimitedInputFormat.forColumns( List.of("timestamp", "product", "key", "key_num", "value") ), - String.join( - "\n", - "[", - " {\"type\": \"arrayOfDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"key\", \"metricColumns\": [\"value\"], \"nominalEntries\": 1024}", - "]" + List.of( + new ArrayOfDoublesSketchAggregatorFactory("sketch", "key", 1024, List.of("value"), null) ), 0, // minTimestamp Granularities.NONE, 10, // maxRowCount - String.join( - "\n", - "{", - " \"queryType\": \"groupBy\",", - " \"dataSource\": \"test_datasource\",", - " \"granularity\": \"ALL\",", - " \"dimensions\": [],", - " \"aggregations\": [", - " {\"type\": \"arrayOfDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"sketch\", \"size\": 1024}", - " ],", - " \"postAggregations\": [", - " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"estimate\", \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"arrayOfDoublesSketchToQuantilesSketch\", \"name\": \"quantiles-sketch\", \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"union\", \"field\": {", - " \"type\": \"arrayOfDoublesSketchSetOp\",", - " \"name\": \"union\",", - " \"operation\": \"UNION\",", - " \"nominalEntries\": 1024,", - " \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", - " }},", - " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"intersection\", \"field\": {", - " \"type\": \"arrayOfDoublesSketchSetOp\",", - " \"name\": \"intersection\",", - " \"operation\": \"INTERSECT\",", - " \"nominalEntries\": 1024,", - " \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", - " }},", - " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"anotb\", \"field\": {", - " \"type\": \"arrayOfDoublesSketchSetOp\",", - " \"name\": \"anotb\",", - " \"operation\": \"NOT\",", - " \"nominalEntries\": 1024,", - " \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", - " }}", - " ],", - " \"intervals\": [\"2015-01-01T00:00:00.000Z/2015-01-31T00:00:00.000Z\"]", - "}" - ) + GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval("2015-01-01T00:00:00.000Z/2015-01-31T00:00:00.000Z") + .setAggregatorSpecs( + new ArrayOfDoublesSketchAggregatorFactory("sketch", "sketch", null, null, null) + ) + .setPostAggregatorSpecs( + new ArrayOfDoublesSketchToEstimatePostAggregator( + "estimate", + new FieldAccessPostAggregator("sketch", "sketch") + ), + new ArrayOfDoublesSketchToQuantilesSketchPostAggregator( + "quantiles-sketch", + new FieldAccessPostAggregator("sketch", "sketch"), + null, + null + ), + new ArrayOfDoublesSketchToEstimatePostAggregator( + "union", + new ArrayOfDoublesSketchSetOpPostAggregator( + "union", "UNION", 1024, null, + ImmutableList.of( + new FieldAccessPostAggregator("sketch", "sketch"), + new FieldAccessPostAggregator("sketch", "sketch") + ) + ) + ), + new ArrayOfDoublesSketchToEstimatePostAggregator( + "intersection", + new ArrayOfDoublesSketchSetOpPostAggregator( + "intersection", "INTERSECT", 1024, null, + ImmutableList.of( + new FieldAccessPostAggregator("sketch", "sketch"), + new FieldAccessPostAggregator("sketch", "sketch") + ) + ) + ), + new ArrayOfDoublesSketchToEstimatePostAggregator( + "anotb", + new ArrayOfDoublesSketchSetOpPostAggregator( + "anotb", "NOT", 1024, null, + ImmutableList.of( + new FieldAccessPostAggregator("sketch", "sketch"), + new FieldAccessPostAggregator("sketch", "sketch") + ) + ) + ) + ) + .build() ); List results = seq.toList(); Assert.assertEquals(1, results.size()); @@ -367,61 +411,66 @@ public void buildingSketchesAtIngestionTimeTwoValues() throws Exception DelimitedInputFormat.forColumns( List.of("timestamp", "product", "key", "value1", "value2") ), - String.join( - "\n", - "[", - " {\"type\": \"arrayOfDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"key\", \"metricColumns\": [ \"value1\", \"value2\" ], \"nominalEntries\": 1024}", - "]" + List.of( + new ArrayOfDoublesSketchAggregatorFactory("sketch", "key", 1024, List.of("value1", "value2"), null) ), 0, // minTimestamp Granularities.NONE, 10, // maxRowCount - String.join( - "\n", - "{", - " \"queryType\": \"groupBy\",", - " \"dataSource\": \"test_datasource\",", - " \"granularity\": \"ALL\",", - " \"dimensions\": [],", - " \"aggregations\": [", - " {\"type\": \"arrayOfDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"sketch\", \"nominalEntries\": 1024, \"numberOfValues\": 2}", - " ],", - " \"postAggregations\": [", - " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"estimate\", \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"arrayOfDoublesSketchToQuantilesSketch\", \"name\": \"quantiles-sketch\", \"column\": 2, \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"union\", \"field\": {", - " \"type\": \"arrayOfDoublesSketchSetOp\",", - " \"name\": \"union\",", - " \"operation\": \"UNION\",", - " \"nominalEntries\": 1024,", - " \"numberOfValues\": 2,", - " \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", - " }},", - " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"intersection\", \"field\": {", - " \"type\": \"arrayOfDoublesSketchSetOp\",", - " \"name\": \"intersection\",", - " \"operation\": \"INTERSECT\",", - " \"nominalEntries\": 1024,", - " \"numberOfValues\": 2,", - " \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", - " }},", - " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"anotb\", \"field\": {", - " \"type\": \"arrayOfDoublesSketchSetOp\",", - " \"name\": \"anotb\",", - " \"operation\": \"NOT\",", - " \"nominalEntries\": 1024,", - " \"numberOfValues\": 2,", - " \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", - " }},", - " {", - " \"type\": \"arrayOfDoublesSketchToMeans\",", - " \"name\": \"means\",", - " \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}", - " }", - " ],", - " \"intervals\": [\"2015-01-01T00:00:00.000Z/2015-01-31T00:00:00.000Z\"]", - "}" - ) + GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval("2015-01-01T00:00:00.000Z/2015-01-31T00:00:00.000Z") + .setAggregatorSpecs( + new ArrayOfDoublesSketchAggregatorFactory("sketch", "sketch", 1024, null, 2) + ) + .setPostAggregatorSpecs( + new ArrayOfDoublesSketchToEstimatePostAggregator( + "estimate", + new FieldAccessPostAggregator("sketch", "sketch") + ), + new ArrayOfDoublesSketchToQuantilesSketchPostAggregator( + "quantiles-sketch", + new FieldAccessPostAggregator("sketch", "sketch"), + 2, + null + ), + new ArrayOfDoublesSketchToEstimatePostAggregator( + "union", + new ArrayOfDoublesSketchSetOpPostAggregator( + "union", "UNION", 1024, 2, + ImmutableList.of( + new FieldAccessPostAggregator("sketch", "sketch"), + new FieldAccessPostAggregator("sketch", "sketch") + ) + ) + ), + new ArrayOfDoublesSketchToEstimatePostAggregator( + "intersection", + new ArrayOfDoublesSketchSetOpPostAggregator( + "intersection", "INTERSECT", 1024, 2, + ImmutableList.of( + new FieldAccessPostAggregator("sketch", "sketch"), + new FieldAccessPostAggregator("sketch", "sketch") + ) + ) + ), + new ArrayOfDoublesSketchToEstimatePostAggregator( + "anotb", + new ArrayOfDoublesSketchSetOpPostAggregator( + "anotb", "NOT", 1024, 2, + ImmutableList.of( + new FieldAccessPostAggregator("sketch", "sketch"), + new FieldAccessPostAggregator("sketch", "sketch") + ) + ) + ), + new ArrayOfDoublesSketchToMeansPostAggregator( + "means", + new FieldAccessPostAggregator("sketch", "sketch") + ) + ) + .build() ); List results = seq.toList(); Assert.assertEquals(1, results.size()); @@ -467,61 +516,66 @@ public void buildingSketchesAtIngestionTimeTwoValuesAndNumericalKey() throws Exc DelimitedInputFormat.forColumns( List.of("timestamp", "product", "key", "key_num", "value1", "value2") ), - String.join( - "\n", - "[", - " {\"type\": \"arrayOfDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"key_num\", \"metricColumns\": [ \"value1\", \"value2\" ], \"nominalEntries\": 1024}", - "]" + List.of( + new ArrayOfDoublesSketchAggregatorFactory("sketch", "key_num", 1024, List.of("value1", "value2"), null) ), 0, // minTimestamp Granularities.NONE, 10, // maxRowCount - String.join( - "\n", - "{", - " \"queryType\": \"groupBy\",", - " \"dataSource\": \"test_datasource\",", - " \"granularity\": \"ALL\",", - " \"dimensions\": [],", - " \"aggregations\": [", - " {\"type\": \"arrayOfDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"sketch\", \"nominalEntries\": 1024, \"numberOfValues\": 2}", - " ],", - " \"postAggregations\": [", - " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"estimate\", \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"arrayOfDoublesSketchToQuantilesSketch\", \"name\": \"quantiles-sketch\", \"column\": 2, \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"union\", \"field\": {", - " \"type\": \"arrayOfDoublesSketchSetOp\",", - " \"name\": \"union\",", - " \"operation\": \"UNION\",", - " \"nominalEntries\": 1024,", - " \"numberOfValues\": 2,", - " \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", - " }},", - " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"intersection\", \"field\": {", - " \"type\": \"arrayOfDoublesSketchSetOp\",", - " \"name\": \"intersection\",", - " \"operation\": \"INTERSECT\",", - " \"nominalEntries\": 1024,", - " \"numberOfValues\": 2,", - " \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", - " }},", - " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"anotb\", \"field\": {", - " \"type\": \"arrayOfDoublesSketchSetOp\",", - " \"name\": \"anotb\",", - " \"operation\": \"NOT\",", - " \"nominalEntries\": 1024,", - " \"numberOfValues\": 2,", - " \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", - " }},", - " {", - " \"type\": \"arrayOfDoublesSketchToMeans\",", - " \"name\": \"means\",", - " \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}", - " }", - " ],", - " \"intervals\": [\"2015-01-01T00:00:00.000Z/2015-01-31T00:00:00.000Z\"]", - "}" - ) + GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval("2015-01-01T00:00:00.000Z/2015-01-31T00:00:00.000Z") + .setAggregatorSpecs( + new ArrayOfDoublesSketchAggregatorFactory("sketch", "sketch", 1024, null, 2) + ) + .setPostAggregatorSpecs( + new ArrayOfDoublesSketchToEstimatePostAggregator( + "estimate", + new FieldAccessPostAggregator("sketch", "sketch") + ), + new ArrayOfDoublesSketchToQuantilesSketchPostAggregator( + "quantiles-sketch", + new FieldAccessPostAggregator("sketch", "sketch"), + 2, + null + ), + new ArrayOfDoublesSketchToEstimatePostAggregator( + "union", + new ArrayOfDoublesSketchSetOpPostAggregator( + "union", "UNION", 1024, 2, + ImmutableList.of( + new FieldAccessPostAggregator("sketch", "sketch"), + new FieldAccessPostAggregator("sketch", "sketch") + ) + ) + ), + new ArrayOfDoublesSketchToEstimatePostAggregator( + "intersection", + new ArrayOfDoublesSketchSetOpPostAggregator( + "intersection", "INTERSECT", 1024, 2, + ImmutableList.of( + new FieldAccessPostAggregator("sketch", "sketch"), + new FieldAccessPostAggregator("sketch", "sketch") + ) + ) + ), + new ArrayOfDoublesSketchToEstimatePostAggregator( + "anotb", + new ArrayOfDoublesSketchSetOpPostAggregator( + "anotb", "NOT", 1024, 2, + ImmutableList.of( + new FieldAccessPostAggregator("sketch", "sketch"), + new FieldAccessPostAggregator("sketch", "sketch") + ) + ) + ), + new ArrayOfDoublesSketchToMeansPostAggregator( + "means", + new FieldAccessPostAggregator("sketch", "sketch") + ) + ) + .build() ); List results = seq.toList(); Assert.assertEquals(1, results.size()); @@ -564,62 +618,72 @@ public void buildingSketchesAtIngestionTimeThreeValuesAndNulls() throws Exceptio DelimitedInputFormat.forColumns( List.of("timestamp", "product", "key", "value1", "value2", "value3") ), - String.join( - "\n", - "[", - " {\"type\": \"arrayOfDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"key\", \"metricColumns\": [ \"value1\", \"value2\", \"value3\" ], \"nominalEntries\": 1024}", - "]" + List.of( + new ArrayOfDoublesSketchAggregatorFactory("sketch", "key", 1024, List.of("value1", "value2", "value3"), null) ), 0, // minTimestamp Granularities.NONE, 10, // maxRowCount - String.join( - "\n", - "{", - " \"queryType\": \"groupBy\",", - " \"dataSource\": \"test_datasource\",", - " \"granularity\": \"ALL\",", - " \"dimensions\": [],", - " \"aggregations\": [", - " {\"type\": \"arrayOfDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"sketch\", \"nominalEntries\": 1024, \"numberOfValues\": 3}", - " ],", - " \"postAggregations\": [", - " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"estimate\", \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"arrayOfDoublesSketchToQuantilesSketch\", \"name\": \"quantiles-sketch\", \"column\": 2, \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"union\", \"field\": {", - " \"type\": \"arrayOfDoublesSketchSetOp\",", - " \"name\": \"union\",", - " \"operation\": \"UNION\",", - " \"nominalEntries\": 1024,", - " \"numberOfValues\": 3,", - " \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", - " }},", - " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"intersection\", \"field\": {", - " \"type\": \"arrayOfDoublesSketchSetOp\",", - " \"name\": \"intersection\",", - " \"operation\": \"INTERSECT\",", - " \"nominalEntries\": 1024,", - " \"numberOfValues\": 3,", - " \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", - " }},", - " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"anotb\", \"field\": {", - " \"type\": \"arrayOfDoublesSketchSetOp\",", - " \"name\": \"anotb\",", - " \"operation\": \"NOT\",", - " \"nominalEntries\": 1024,", - " \"numberOfValues\": 3,", - " \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", - " }},", - " {", - " \"type\": \"arrayOfDoublesSketchToMeans\",", - " \"name\": \"means\",", - " \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}", - " },", - " {\"type\": \"arrayOfDoublesSketchToQuantilesSketch\", \"name\": \"quantiles-sketch-with-nulls\", \"column\": 3, \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}}", - " ],", - " \"intervals\": [\"2015-01-01T00:00:00.000Z/2015-01-31T00:00:00.000Z\"]", - "}" - ) + GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval("2015-01-01T00:00:00.000Z/2015-01-31T00:00:00.000Z") + .setAggregatorSpecs( + new ArrayOfDoublesSketchAggregatorFactory("sketch", "sketch", 1024, null, 3) + ) + .setPostAggregatorSpecs( + new ArrayOfDoublesSketchToEstimatePostAggregator( + "estimate", + new FieldAccessPostAggregator("sketch", "sketch") + ), + new ArrayOfDoublesSketchToQuantilesSketchPostAggregator( + "quantiles-sketch", + new FieldAccessPostAggregator("sketch", "sketch"), + 2, + null + ), + new ArrayOfDoublesSketchToEstimatePostAggregator( + "union", + new ArrayOfDoublesSketchSetOpPostAggregator( + "union", "UNION", 1024, 3, + ImmutableList.of( + new FieldAccessPostAggregator("sketch", "sketch"), + new FieldAccessPostAggregator("sketch", "sketch") + ) + ) + ), + new ArrayOfDoublesSketchToEstimatePostAggregator( + "intersection", + new ArrayOfDoublesSketchSetOpPostAggregator( + "intersection", "INTERSECT", 1024, 3, + ImmutableList.of( + new FieldAccessPostAggregator("sketch", "sketch"), + new FieldAccessPostAggregator("sketch", "sketch") + ) + ) + ), + new ArrayOfDoublesSketchToEstimatePostAggregator( + "anotb", + new ArrayOfDoublesSketchSetOpPostAggregator( + "anotb", "NOT", 1024, 3, + ImmutableList.of( + new FieldAccessPostAggregator("sketch", "sketch"), + new FieldAccessPostAggregator("sketch", "sketch") + ) + ) + ), + new ArrayOfDoublesSketchToMeansPostAggregator( + "means", + new FieldAccessPostAggregator("sketch", "sketch") + ), + new ArrayOfDoublesSketchToQuantilesSketchPostAggregator( + "quantiles-sketch-with-nulls", + new FieldAccessPostAggregator("sketch", "sketch"), + 3, + null + ) + ) + .build() ); List results = seq.toList(); Assert.assertEquals(1, results.size()); @@ -672,54 +736,63 @@ public void buildingSketchesAtQueryTime() throws Exception DelimitedInputFormat.forColumns( List.of("timestamp", "product", "key", "key_num", "value") ), - String.join( - "\n", - "[", - " {\"type\": \"doubleSum\", \"name\": \"value\", \"fieldName\": \"value\"}", - "]" + List.of( + new DoubleSumAggregatorFactory("value", "value") ), 0, // minTimestamp Granularities.NONE, 40, // maxRowCount - String.join( - "\n", - "{", - " \"queryType\": \"groupBy\",", - " \"dataSource\": \"test_datasource\",", - " \"granularity\": \"ALL\",", - " \"dimensions\": [],", - " \"aggregations\": [", - " {\"type\": \"arrayOfDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"key\", \"metricColumns\": [\"value\"], \"nominalEntries\": 1024},", - " {\"type\": \"count\", \"name\":\"cnt\"}", - " ],", - " \"postAggregations\": [", - " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"estimate\", \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"arrayOfDoublesSketchToQuantilesSketch\", \"name\": \"quantiles-sketch\", \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"union\", \"field\": {", - " \"type\": \"arrayOfDoublesSketchSetOp\",", - " \"name\": \"union\",", - " \"operation\": \"UNION\",", - " \"nominalEntries\": 1024,", - " \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", - " }},", - " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"intersection\", \"field\": {", - " \"type\": \"arrayOfDoublesSketchSetOp\",", - " \"name\": \"intersection\",", - " \"operation\": \"INTERSECT\",", - " \"nominalEntries\": 1024,", - " \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", - " }},", - " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"anotb\", \"field\": {", - " \"type\": \"arrayOfDoublesSketchSetOp\",", - " \"name\": \"anotb\",", - " \"operation\": \"NOT\",", - " \"nominalEntries\": 1024,", - " \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", - " }}", - " ],", - " \"intervals\": [\"2015-01-01T00:00:00.000Z/2015-01-31T00:00:00.000Z\"]", - "}" - ) + GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval("2015-01-01T00:00:00.000Z/2015-01-31T00:00:00.000Z") + .setAggregatorSpecs( + new ArrayOfDoublesSketchAggregatorFactory("sketch", "key", 1024, List.of("value"), null), + new CountAggregatorFactory("cnt") + ) + .setPostAggregatorSpecs( + new ArrayOfDoublesSketchToEstimatePostAggregator( + "estimate", + new FieldAccessPostAggregator("sketch", "sketch") + ), + new ArrayOfDoublesSketchToQuantilesSketchPostAggregator( + "quantiles-sketch", + new FieldAccessPostAggregator("sketch", "sketch"), + null, + null + ), + new ArrayOfDoublesSketchToEstimatePostAggregator( + "union", + new ArrayOfDoublesSketchSetOpPostAggregator( + "union", "UNION", 1024, null, + ImmutableList.of( + new FieldAccessPostAggregator("sketch", "sketch"), + new FieldAccessPostAggregator("sketch", "sketch") + ) + ) + ), + new ArrayOfDoublesSketchToEstimatePostAggregator( + "intersection", + new ArrayOfDoublesSketchSetOpPostAggregator( + "intersection", "INTERSECT", 1024, null, + ImmutableList.of( + new FieldAccessPostAggregator("sketch", "sketch"), + new FieldAccessPostAggregator("sketch", "sketch") + ) + ) + ), + new ArrayOfDoublesSketchToEstimatePostAggregator( + "anotb", + new ArrayOfDoublesSketchSetOpPostAggregator( + "anotb", "NOT", 1024, null, + ImmutableList.of( + new FieldAccessPostAggregator("sketch", "sketch"), + new FieldAccessPostAggregator("sketch", "sketch") + ) + ) + ) + ) + .build() ); List results = seq.toList(); Assert.assertEquals(1, results.size()); @@ -758,54 +831,63 @@ public void buildingSketchesAtQueryTimeUseNumerical() throws Exception DelimitedInputFormat.forColumns( List.of("timestamp", "product", "key", "key_num", "value") ), - String.join( - "\n", - "[", - " {\"type\": \"doubleSum\", \"name\": \"value\", \"fieldName\": \"value\"}", - "]" + List.of( + new DoubleSumAggregatorFactory("value", "value") ), 0, // minTimestamp Granularities.NONE, 40, // maxRowCount - String.join( - "\n", - "{", - " \"queryType\": \"groupBy\",", - " \"dataSource\": \"test_datasource\",", - " \"granularity\": \"ALL\",", - " \"dimensions\": [],", - " \"aggregations\": [", - " {\"type\": \"arrayOfDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"key_num\", \"metricColumns\": [\"value\"], \"nominalEntries\": 1024},", - " {\"type\": \"count\", \"name\":\"cnt\"}", - " ],", - " \"postAggregations\": [", - " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"estimate\", \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"arrayOfDoublesSketchToQuantilesSketch\", \"name\": \"quantiles-sketch\", \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"union\", \"field\": {", - " \"type\": \"arrayOfDoublesSketchSetOp\",", - " \"name\": \"union\",", - " \"operation\": \"UNION\",", - " \"nominalEntries\": 1024,", - " \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", - " }},", - " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"intersection\", \"field\": {", - " \"type\": \"arrayOfDoublesSketchSetOp\",", - " \"name\": \"intersection\",", - " \"operation\": \"INTERSECT\",", - " \"nominalEntries\": 1024,", - " \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", - " }},", - " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"anotb\", \"field\": {", - " \"type\": \"arrayOfDoublesSketchSetOp\",", - " \"name\": \"anotb\",", - " \"operation\": \"NOT\",", - " \"nominalEntries\": 1024,", - " \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", - " }}", - " ],", - " \"intervals\": [\"2015-01-01T00:00:00.000Z/2015-01-31T00:00:00.000Z\"]", - "}" - ) + GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval("2015-01-01T00:00:00.000Z/2015-01-31T00:00:00.000Z") + .setAggregatorSpecs( + new ArrayOfDoublesSketchAggregatorFactory("sketch", "key_num", 1024, List.of("value"), null), + new CountAggregatorFactory("cnt") + ) + .setPostAggregatorSpecs( + new ArrayOfDoublesSketchToEstimatePostAggregator( + "estimate", + new FieldAccessPostAggregator("sketch", "sketch") + ), + new ArrayOfDoublesSketchToQuantilesSketchPostAggregator( + "quantiles-sketch", + new FieldAccessPostAggregator("sketch", "sketch"), + null, + null + ), + new ArrayOfDoublesSketchToEstimatePostAggregator( + "union", + new ArrayOfDoublesSketchSetOpPostAggregator( + "union", "UNION", 1024, null, + ImmutableList.of( + new FieldAccessPostAggregator("sketch", "sketch"), + new FieldAccessPostAggregator("sketch", "sketch") + ) + ) + ), + new ArrayOfDoublesSketchToEstimatePostAggregator( + "intersection", + new ArrayOfDoublesSketchSetOpPostAggregator( + "intersection", "INTERSECT", 1024, null, + ImmutableList.of( + new FieldAccessPostAggregator("sketch", "sketch"), + new FieldAccessPostAggregator("sketch", "sketch") + ) + ) + ), + new ArrayOfDoublesSketchToEstimatePostAggregator( + "anotb", + new ArrayOfDoublesSketchSetOpPostAggregator( + "anotb", "NOT", 1024, null, + ImmutableList.of( + new FieldAccessPostAggregator("sketch", "sketch"), + new FieldAccessPostAggregator("sketch", "sketch") + ) + ) + ) + ) + .build() ); List results = seq.toList(); Assert.assertEquals(1, results.size()); @@ -844,53 +926,63 @@ public void buildingSketchesAtQueryTimeTimeseries() throws Exception DelimitedInputFormat.forColumns( List.of("timestamp", "product", "key", "key_num", "value") ), - String.join( - "\n", - "[", - " {\"type\": \"doubleSum\", \"name\": \"value\", \"fieldName\": \"value\"}", - "]" + List.of( + new DoubleSumAggregatorFactory("value", "value") ), 0, // minTimestamp Granularities.NONE, 40, // maxRowCount - String.join( - "\n", - "{", - " \"queryType\": \"timeseries\",", - " \"dataSource\": \"test_datasource\",", - " \"granularity\": \"ALL\",", - " \"aggregations\": [", - " {\"type\": \"arrayOfDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"key\", \"metricColumns\": [\"value\"], \"nominalEntries\": 1024},", - " {\"type\": \"count\", \"name\":\"cnt\"}", - " ],", - " \"postAggregations\": [", - " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"estimate\", \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"arrayOfDoublesSketchToQuantilesSketch\", \"name\": \"quantiles-sketch\", \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"union\", \"field\": {", - " \"type\": \"arrayOfDoublesSketchSetOp\",", - " \"name\": \"union\",", - " \"operation\": \"UNION\",", - " \"nominalEntries\": 1024,", - " \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", - " }},", - " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"intersection\", \"field\": {", - " \"type\": \"arrayOfDoublesSketchSetOp\",", - " \"name\": \"intersection\",", - " \"operation\": \"INTERSECT\",", - " \"nominalEntries\": 1024,", - " \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", - " }},", - " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"anotb\", \"field\": {", - " \"type\": \"arrayOfDoublesSketchSetOp\",", - " \"name\": \"anotb\",", - " \"operation\": \"NOT\",", - " \"nominalEntries\": 1024,", - " \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", - " }}", - " ],", - " \"intervals\": [\"2015-01-01T00:00:00.000Z/2015-01-31T00:00:00.000Z\"]", - "}" - ) + Druids.newTimeseriesQueryBuilder() + .dataSource("test_datasource") + .granularity(Granularities.ALL) + .intervals("2015-01-01T00:00:00.000Z/2015-01-31T00:00:00.000Z") + .aggregators( + new ArrayOfDoublesSketchAggregatorFactory("sketch", "key", 1024, List.of("value"), null), + new CountAggregatorFactory("cnt") + ) + .postAggregators( + new ArrayOfDoublesSketchToEstimatePostAggregator( + "estimate", + new FieldAccessPostAggregator("sketch", "sketch") + ), + new ArrayOfDoublesSketchToQuantilesSketchPostAggregator( + "quantiles-sketch", + new FieldAccessPostAggregator("sketch", "sketch"), + null, + null + ), + new ArrayOfDoublesSketchToEstimatePostAggregator( + "union", + new ArrayOfDoublesSketchSetOpPostAggregator( + "union", "UNION", 1024, null, + ImmutableList.of( + new FieldAccessPostAggregator("sketch", "sketch"), + new FieldAccessPostAggregator("sketch", "sketch") + ) + ) + ), + new ArrayOfDoublesSketchToEstimatePostAggregator( + "intersection", + new ArrayOfDoublesSketchSetOpPostAggregator( + "intersection", "INTERSECT", 1024, null, + ImmutableList.of( + new FieldAccessPostAggregator("sketch", "sketch"), + new FieldAccessPostAggregator("sketch", "sketch") + ) + ) + ), + new ArrayOfDoublesSketchToEstimatePostAggregator( + "anotb", + new ArrayOfDoublesSketchSetOpPostAggregator( + "anotb", "NOT", 1024, null, + ImmutableList.of( + new FieldAccessPostAggregator("sketch", "sketch"), + new FieldAccessPostAggregator("sketch", "sketch") + ) + ) + ) + ) + .build() ); List> results = seq.toList(); Assert.assertEquals(1, results.size()); @@ -929,53 +1021,63 @@ public void buildingSketchesAtQueryTimeUsingNumericalTimeseries() throws Excepti DelimitedInputFormat.forColumns( List.of("timestamp", "product", "key", "key_num", "value") ), - String.join( - "\n", - "[", - " {\"type\": \"doubleSum\", \"name\": \"value\", \"fieldName\": \"value\"}", - "]" + List.of( + new DoubleSumAggregatorFactory("value", "value") ), 0, // minTimestamp Granularities.NONE, 40, // maxRowCount - String.join( - "\n", - "{", - " \"queryType\": \"timeseries\",", - " \"dataSource\": \"test_datasource\",", - " \"granularity\": \"ALL\",", - " \"aggregations\": [", - " {\"type\": \"arrayOfDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"key_num\", \"metricColumns\": [\"value\"], \"nominalEntries\": 1024},", - " {\"type\": \"count\", \"name\":\"cnt\"}", - " ],", - " \"postAggregations\": [", - " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"estimate\", \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"arrayOfDoublesSketchToQuantilesSketch\", \"name\": \"quantiles-sketch\", \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"union\", \"field\": {", - " \"type\": \"arrayOfDoublesSketchSetOp\",", - " \"name\": \"union\",", - " \"operation\": \"UNION\",", - " \"nominalEntries\": 1024,", - " \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", - " }},", - " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"intersection\", \"field\": {", - " \"type\": \"arrayOfDoublesSketchSetOp\",", - " \"name\": \"intersection\",", - " \"operation\": \"INTERSECT\",", - " \"nominalEntries\": 1024,", - " \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", - " }},", - " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"anotb\", \"field\": {", - " \"type\": \"arrayOfDoublesSketchSetOp\",", - " \"name\": \"anotb\",", - " \"operation\": \"NOT\",", - " \"nominalEntries\": 1024,", - " \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", - " }}", - " ],", - " \"intervals\": [\"2015-01-01T00:00:00.000Z/2015-01-31T00:00:00.000Z\"]", - "}" - ) + Druids.newTimeseriesQueryBuilder() + .dataSource("test_datasource") + .granularity(Granularities.ALL) + .intervals("2015-01-01T00:00:00.000Z/2015-01-31T00:00:00.000Z") + .aggregators( + new ArrayOfDoublesSketchAggregatorFactory("sketch", "key_num", 1024, List.of("value"), null), + new CountAggregatorFactory("cnt") + ) + .postAggregators( + new ArrayOfDoublesSketchToEstimatePostAggregator( + "estimate", + new FieldAccessPostAggregator("sketch", "sketch") + ), + new ArrayOfDoublesSketchToQuantilesSketchPostAggregator( + "quantiles-sketch", + new FieldAccessPostAggregator("sketch", "sketch"), + null, + null + ), + new ArrayOfDoublesSketchToEstimatePostAggregator( + "union", + new ArrayOfDoublesSketchSetOpPostAggregator( + "union", "UNION", 1024, null, + ImmutableList.of( + new FieldAccessPostAggregator("sketch", "sketch"), + new FieldAccessPostAggregator("sketch", "sketch") + ) + ) + ), + new ArrayOfDoublesSketchToEstimatePostAggregator( + "intersection", + new ArrayOfDoublesSketchSetOpPostAggregator( + "intersection", "INTERSECT", 1024, null, + ImmutableList.of( + new FieldAccessPostAggregator("sketch", "sketch"), + new FieldAccessPostAggregator("sketch", "sketch") + ) + ) + ), + new ArrayOfDoublesSketchToEstimatePostAggregator( + "anotb", + new ArrayOfDoublesSketchSetOpPostAggregator( + "anotb", "NOT", 1024, null, + ImmutableList.of( + new FieldAccessPostAggregator("sketch", "sketch"), + new FieldAccessPostAggregator("sketch", "sketch") + ) + ) + ) + ) + .build() ); List> results = seq.toList(); Assert.assertEquals(1, results.size()); @@ -1010,45 +1112,40 @@ public void buildingSketchesAtQueryTimeTwoBucketsTest() throws Exception DelimitedInputFormat.forColumns( List.of("timestamp", "label", "userid", "parameter") ), - String.join( - "\n", - "[", - " {\"type\": \"doubleSum\", \"name\": \"parameter\", \"fieldName\": \"parameter\"}", - "]" + List.of( + new DoubleSumAggregatorFactory("parameter", "parameter") ), 0, // minTimestamp Granularities.NONE, 2000, // maxRowCount - String.join( - "\n", - "{", - " \"queryType\": \"groupBy\",", - " \"dataSource\": \"test_datasource\",", - " \"granularity\": \"ALL\",", - " \"dimensions\": [],", - " \"aggregations\": [", - " {", - " \"type\": \"filtered\",", - " \"filter\": {\"type\": \"selector\", \"dimension\": \"label\", \"value\": \"test\"},", - " \"aggregator\": {\"type\": \"arrayOfDoublesSketch\", \"name\": \"sketch-test\", \"fieldName\": \"userid\", \"metricColumns\": [\"parameter\"]}", - " },", - " {", - " \"type\": \"filtered\",", - " \"filter\": {\"type\": \"selector\", \"dimension\": \"label\", \"value\": \"control\"},", - " \"aggregator\": {\"type\": \"arrayOfDoublesSketch\", \"name\": \"sketch-control\", \"fieldName\": \"userid\", \"metricColumns\": [\"parameter\"]}", - " }", - " ],", - " \"postAggregations\": [", - " {\"type\": \"arrayOfDoublesSketchTTest\",", - " \"name\": \"p-value\", \"fields\": [", - " {\"type\": \"fieldAccess\", \"fieldName\": \"sketch-test\"},", - " {\"type\": \"fieldAccess\", \"fieldName\": \"sketch-control\"}", - " ]", - " }", - " ],", - " \"intervals\": [\"2017-01-01T00:00:00.000Z/2017-01-31T00:00:00.000Z\"]", - "}" - ) + GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval("2017-01-01T00:00:00.000Z/2017-01-31T00:00:00.000Z") + .setAggregatorSpecs( + new FilteredAggregatorFactory( + new ArrayOfDoublesSketchAggregatorFactory( + "sketch-test", "userid", null, List.of("parameter"), null + ), + new SelectorDimFilter("label", "test", null) + ), + new FilteredAggregatorFactory( + new ArrayOfDoublesSketchAggregatorFactory( + "sketch-control", "userid", null, List.of("parameter"), null + ), + new SelectorDimFilter("label", "control", null) + ) + ) + .setPostAggregatorSpecs( + new ArrayOfDoublesSketchTTestPostAggregator( + "p-value", + ImmutableList.of( + new FieldAccessPostAggregator("sketch-test", "sketch-test"), + new FieldAccessPostAggregator("sketch-control", "sketch-control") + ) + ) + ) + .build() ); List results = seq.toList(); Assert.assertEquals(1, results.size()); @@ -1080,68 +1177,94 @@ public void buildingSketchesAtQueryTimeWithNullsTest() throws Exception DelimitedInputFormat.forColumns( List.of("timestamp", "product", "key", "value1", "value2", "value3") ), - String.join( - "\n", - "[", - " {\"type\": \"doubleSum\", \"name\": \"value1\", \"fieldName\": \"value1\"},", - " {\"type\": \"doubleSum\", \"name\": \"value2\", \"fieldName\": \"value2\"},", - " {\"type\": \"doubleSum\", \"name\": \"value3\", \"fieldName\": \"value3\"}", - "]" + List.of( + new DoubleSumAggregatorFactory("value1", "value1"), + new DoubleSumAggregatorFactory("value2", "value2"), + new DoubleSumAggregatorFactory("value3", "value3") ), 0, // minTimestamp Granularities.NONE, 10, // maxRowCount - String.join( - "\n", - "{", - " \"queryType\": \"groupBy\",", - " \"dataSource\": \"test_datasource\",", - " \"granularity\": \"ALL\",", - " \"dimensions\": [],", - " \"virtualColumns\": [{\"type\": \"expression\",\"name\": \"nonulls3\",\"expression\": \"nvl(value3, 0.0)\",\"outputType\": \"DOUBLE\"}],", - " \"aggregations\": [", - " {\"type\": \"arrayOfDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"key\", \"metricColumns\": [ \"value1\", \"value2\", \"value3\" ], \"nominalEntries\": 1024},", - " {\"type\": \"arrayOfDoublesSketch\", \"name\": \"sketchNoNulls\", \"fieldName\": \"key\", \"metricColumns\": [ \"value1\", \"value2\", \"nonulls3\" ], \"nominalEntries\": 1024}", - " ],", - " \"postAggregations\": [", - " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"estimate\", \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"estimateNoNulls\", \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketchNoNulls\"}},", - " {\"type\": \"arrayOfDoublesSketchToQuantilesSketch\", \"name\": \"quantiles-sketch\", \"column\": 2, \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"union\", \"field\": {", - " \"type\": \"arrayOfDoublesSketchSetOp\",", - " \"name\": \"union\",", - " \"operation\": \"UNION\",", - " \"nominalEntries\": 1024,", - " \"numberOfValues\": 3,", - " \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", - " }},", - " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"intersection\", \"field\": {", - " \"type\": \"arrayOfDoublesSketchSetOp\",", - " \"name\": \"intersection\",", - " \"operation\": \"INTERSECT\",", - " \"nominalEntries\": 1024,", - " \"numberOfValues\": 3,", - " \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", - " }},", - " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"anotb\", \"field\": {", - " \"type\": \"arrayOfDoublesSketchSetOp\",", - " \"name\": \"anotb\",", - " \"operation\": \"NOT\",", - " \"nominalEntries\": 1024,", - " \"numberOfValues\": 3,", - " \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", - " }},", - " {", - " \"type\": \"arrayOfDoublesSketchToMeans\",", - " \"name\": \"means\",", - " \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}", - " },", - " {\"type\": \"arrayOfDoublesSketchToQuantilesSketch\", \"name\": \"quantiles-sketch-with-nulls\", \"column\": 3, \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"arrayOfDoublesSketchToQuantilesSketch\", \"name\": \"quantiles-sketch-with-no-nulls\", \"column\": 3, \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketchNoNulls\"}}", - " ],", - " \"intervals\": [\"2015-01-01T00:00:00.000Z/2015-01-31T00:00:00.000Z\"]", - "}" - ) + GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval("2015-01-01T00:00:00.000Z/2015-01-31T00:00:00.000Z") + .setVirtualColumns( + new ExpressionVirtualColumn( + "nonulls3", "nvl(value3, 0.0)", ColumnType.DOUBLE, TestExprMacroTable.INSTANCE + ) + ) + .setAggregatorSpecs( + new ArrayOfDoublesSketchAggregatorFactory( + "sketch", "key", 1024, List.of("value1", "value2", "value3"), null + ), + new ArrayOfDoublesSketchAggregatorFactory( + "sketchNoNulls", "key", 1024, List.of("value1", "value2", "nonulls3"), null + ) + ) + .setPostAggregatorSpecs( + new ArrayOfDoublesSketchToEstimatePostAggregator( + "estimate", + new FieldAccessPostAggregator("sketch", "sketch") + ), + new ArrayOfDoublesSketchToEstimatePostAggregator( + "estimateNoNulls", + new FieldAccessPostAggregator("sketchNoNulls", "sketchNoNulls") + ), + new ArrayOfDoublesSketchToQuantilesSketchPostAggregator( + "quantiles-sketch", + new FieldAccessPostAggregator("sketch", "sketch"), + 2, + null + ), + new ArrayOfDoublesSketchToEstimatePostAggregator( + "union", + new ArrayOfDoublesSketchSetOpPostAggregator( + "union", "UNION", 1024, 3, + ImmutableList.of( + new FieldAccessPostAggregator("sketch", "sketch"), + new FieldAccessPostAggregator("sketch", "sketch") + ) + ) + ), + new ArrayOfDoublesSketchToEstimatePostAggregator( + "intersection", + new ArrayOfDoublesSketchSetOpPostAggregator( + "intersection", "INTERSECT", 1024, 3, + ImmutableList.of( + new FieldAccessPostAggregator("sketch", "sketch"), + new FieldAccessPostAggregator("sketch", "sketch") + ) + ) + ), + new ArrayOfDoublesSketchToEstimatePostAggregator( + "anotb", + new ArrayOfDoublesSketchSetOpPostAggregator( + "anotb", "NOT", 1024, 3, + ImmutableList.of( + new FieldAccessPostAggregator("sketch", "sketch"), + new FieldAccessPostAggregator("sketch", "sketch") + ) + ) + ), + new ArrayOfDoublesSketchToMeansPostAggregator( + "means", + new FieldAccessPostAggregator("sketch", "sketch") + ), + new ArrayOfDoublesSketchToQuantilesSketchPostAggregator( + "quantiles-sketch-with-nulls", + new FieldAccessPostAggregator("sketch", "sketch"), + 3, + null + ), + new ArrayOfDoublesSketchToQuantilesSketchPostAggregator( + "quantiles-sketch-with-no-nulls", + new FieldAccessPostAggregator("sketchNoNulls", "sketchNoNulls"), + 3, + null + ) + ) + .build() ); List results = seq.toList(); Assert.assertEquals(1, results.size()); @@ -1190,6 +1313,8 @@ public void buildingSketchesAtQueryTimeWithNullsTest() throws Exception @Test public void testConstantAndBase64WithEstimateSumPostAgg() throws Exception { + final String externalSketchBase64 = + "AQEJAwgCzJP/////////fwIAAAAAAAAAbakWvEpmYR4+utyjb2+2IAAAAAAAAPA/AAAAAAAAAEAAAAAAAADwPwAAAAAAAABA"; Sequence seq = helper.createIndexAndRunQueryOnSegment( new File(this.getClass().getClassLoader().getResource("tuple/array_of_doubles_sketch_data_two_values.tsv") .getFile()), @@ -1201,47 +1326,50 @@ public void testConstantAndBase64WithEstimateSumPostAgg() throws Exception DelimitedInputFormat.forColumns( List.of("timestamp", "product", "sketch") ), - String.join( - "\n", - "[", - " {\"type\": \"arrayOfDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"sketch\", \"nominalEntries\": 1024, \"numberOfValues\": 2}", - "]" + List.of( + new ArrayOfDoublesSketchAggregatorFactory("sketch", "sketch", 1024, null, 2) ), 0, // minTimestamp Granularities.NONE, 10, // maxRowCount - String.join( - "\n", - "{", - " \"queryType\": \"groupBy\",", - " \"dataSource\": \"test_datasource\",", - " \"granularity\": \"ALL\",", - " \"dimensions\": [],", - " \"aggregations\": [", - " {\"type\": \"arrayOfDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"sketch\", \"nominalEntries\": 1024, \"numberOfValues\": 2}", - " ],", - " \"postAggregations\": [", - " {\"type\": \"arrayOfDoublesSketchToMetricsSumEstimate\", \"name\": \"estimateSum\", \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", - " {\"type\": \"arrayOfDoublesSketchToMetricsSumEstimate\", \"name\": \"intersection\", \"field\": {", - " \"type\": \"arrayOfDoublesSketchSetOp\",", - " \"name\": \"intersection\",", - " \"operation\": \"INTERSECT\",", - " \"nominalEntries\": 1024,", - " \"numberOfValues\": 2,", - " \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"arrayOfDoublesSketchConstant\", \"name\": \"external_sketch\", \"value\": \"AQEJAwgCzJP/////////fwIAAAAAAAAAbakWvEpmYR4+utyjb2+2IAAAAAAAAPA/AAAAAAAAAEAAAAAAAADwPwAAAAAAAABA\"}]", - " }},", - " {\"type\": \"arrayOfDoublesSketchToBase64String\", \"name\": \"intersectionString\", \"field\": {", - " \"type\": \"arrayOfDoublesSketchSetOp\",", - " \"name\": \"intersection\",", - " \"operation\": \"INTERSECT\",", - " \"nominalEntries\": 1024,", - " \"numberOfValues\": 2,", - " \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"arrayOfDoublesSketchConstant\", \"name\": \"external_sketch\", \"value\": \"AQEJAwgCzJP/////////fwIAAAAAAAAAbakWvEpmYR4+utyjb2+2IAAAAAAAAPA/AAAAAAAAAEAAAAAAAADwPwAAAAAAAABA\"}]", - " }}", - " ],", - " \"intervals\": [\"2015-01-01T00:00:00.000Z/2015-01-31T00:00:00.000Z\"]", - "}" - ) + GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval("2015-01-01T00:00:00.000Z/2015-01-31T00:00:00.000Z") + .setAggregatorSpecs( + new ArrayOfDoublesSketchAggregatorFactory("sketch", "sketch", 1024, null, 2) + ) + .setPostAggregatorSpecs( + new ArrayOfDoublesSketchToMetricsSumEstimatePostAggregator( + "estimateSum", + new FieldAccessPostAggregator("sketch", "sketch") + ), + new ArrayOfDoublesSketchToMetricsSumEstimatePostAggregator( + "intersection", + new ArrayOfDoublesSketchSetOpPostAggregator( + "intersection", "INTERSECT", 1024, 2, + ImmutableList.of( + new FieldAccessPostAggregator("sketch", "sketch"), + new ArrayOfDoublesSketchConstantPostAggregator( + "external_sketch", externalSketchBase64 + ) + ) + ) + ), + new ArrayOfDoublesSketchToBase64StringPostAggregator( + "intersectionString", + new ArrayOfDoublesSketchSetOpPostAggregator( + "intersection", "INTERSECT", 1024, 2, + ImmutableList.of( + new FieldAccessPostAggregator("sketch", "sketch"), + new ArrayOfDoublesSketchConstantPostAggregator( + "external_sketch", externalSketchBase64 + ) + ) + ) + ) + ) + .build() ); List results = seq.toList(); Assert.assertEquals(1, results.size()); diff --git a/extensions-core/datasketches/src/test/resources/empty_sketch_group_by_query.json b/extensions-core/datasketches/src/test/resources/empty_sketch_group_by_query.json deleted file mode 100644 index 7d2622a7763b..000000000000 --- a/extensions-core/datasketches/src/test/resources/empty_sketch_group_by_query.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "queryType": "groupBy", - "dataSource": "test_datasource", - "granularity":"ALL", - "dimensions": ["product"], - "filter" : { - "type" : "selector", "dimension" : "product", "value" : "product_b" - }, - "aggregations": [ - { - "type": "thetaSketch", - "name": "sketch_count", - "fieldName": "product_sketch", - "size": 16384 - } - ], - "intervals": [ - "2019-07-14T00:00:00.000Z/2019-07-15T00:00:00.000Z" - ] -} diff --git a/extensions-core/datasketches/src/test/resources/empty_sketch_test_data_aggregators.json b/extensions-core/datasketches/src/test/resources/empty_sketch_test_data_aggregators.json deleted file mode 100644 index 1d0980183637..000000000000 --- a/extensions-core/datasketches/src/test/resources/empty_sketch_test_data_aggregators.json +++ /dev/null @@ -1,9 +0,0 @@ -[ - { - "type": "thetaSketch", - "name": "product_sketch", - "fieldName": "product_sketch", - "isInputThetaSketch": true, - "size": 16384 - } -] diff --git a/extensions-core/datasketches/src/test/resources/oldapi/old_simple_test_data_aggregators.json b/extensions-core/datasketches/src/test/resources/oldapi/old_simple_test_data_aggregators.json deleted file mode 100644 index fbefe20e2d24..000000000000 --- a/extensions-core/datasketches/src/test/resources/oldapi/old_simple_test_data_aggregators.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - { - "type": "sketchBuild", - "name": "pty_country", - "fieldName": "pty_country" - }, - { - "type": "sketchBuild", - "name": "non_existing_col_validation", - "fieldName": "non_existing_col" - } -] diff --git a/extensions-core/datasketches/src/test/resources/oldapi/old_simple_test_data_group_by_query.json b/extensions-core/datasketches/src/test/resources/oldapi/old_simple_test_data_group_by_query.json deleted file mode 100644 index 362652cac339..000000000000 --- a/extensions-core/datasketches/src/test/resources/oldapi/old_simple_test_data_group_by_query.json +++ /dev/null @@ -1,93 +0,0 @@ -{ - "queryType": "groupBy", - "dataSource": "test_datasource", - "granularity": "ALL", - "dimensions": [], - "aggregations": [ - { - "type": "sketchMerge", - "name": "sketch_count", - "fieldName": "pty_country", - "size": 16384 - }, - { - "type": "sketchMerge", - "name": "non_existing_col_validation", - "fieldName": "non_existing_col", - "size": 16384 - } - ], - "postAggregations": [ - { - "type": "sketchEstimate", - "name": "sketchEstimatePostAgg", - "field": { - "type": "fieldAccess", - "fieldName": "sketch_count" - } - }, - { - "type": "sketchEstimate", - "name": "sketchIntersectionPostAggEstimate", - "field": { - "type": "sketchSetOper", - "name": "sketchIntersectionPostAgg", - "func": "INTERSECT", - "size": 16384, - "fields": [ - { - "type": "fieldAccess", - "fieldName": "sketch_count" - }, - { - "type": "fieldAccess", - "fieldName": "sketch_count" - } - ] - } - }, - { - "type": "sketchEstimate", - "name": "sketchAnotBPostAggEstimate", - "field": { - "type": "sketchSetOper", - "name": "sketchAnotBUnionPostAgg", - "func": "NOT", - "size": 16384, - "fields": [ - { - "type": "fieldAccess", - "fieldName": "sketch_count" - }, - { - "type": "fieldAccess", - "fieldName": "sketch_count" - } - ] - } - }, - { - "type": "sketchEstimate", - "name": "sketchUnionPostAggEstimate", - "field": { - "type": "sketchSetOper", - "name": "sketchUnionPostAgg", - "func": "UNION", - "size": 16384, - "fields": [ - { - "type": "fieldAccess", - "fieldName": "sketch_count" - }, - { - "type": "fieldAccess", - "fieldName": "sketch_count" - } - ] - } - } - ], - "intervals": [ - "2014-10-19T00:00:00.000Z/2014-10-22T00:00:00.000Z" - ] -} diff --git a/extensions-core/datasketches/src/test/resources/oldapi/old_sketch_test_data_aggregators.json b/extensions-core/datasketches/src/test/resources/oldapi/old_sketch_test_data_aggregators.json deleted file mode 100644 index 7cd10e17678d..000000000000 --- a/extensions-core/datasketches/src/test/resources/oldapi/old_sketch_test_data_aggregators.json +++ /dev/null @@ -1,14 +0,0 @@ -[ - { - "type": "sketchMerge", - "name": "sids_sketch", - "fieldName": "sketch", - "size": 16384 - }, - { - "type": "sketchMerge", - "name": "non_existing_col_validation", - "fieldName": "non_existing_col", - "size": 16384 - } -] diff --git a/extensions-core/datasketches/src/test/resources/oldapi/old_sketch_test_data_group_by_query.json b/extensions-core/datasketches/src/test/resources/oldapi/old_sketch_test_data_group_by_query.json deleted file mode 100644 index 027e0a610cb6..000000000000 --- a/extensions-core/datasketches/src/test/resources/oldapi/old_sketch_test_data_group_by_query.json +++ /dev/null @@ -1,92 +0,0 @@ -{ - "queryType": "groupBy", - "dataSource": "test_datasource", - "granularity": "ALL", - "dimensions": [], - "aggregations": [ - { - "type": "sketchMerge", - "name": "sids_sketch_count", - "fieldName": "sids_sketch", - "size": 16384 - }, - { - "type": "sketchMerge", - "name": "non_existing_col_validation", - "fieldName": "non_existing_col", - "size": 16384 - } - ], - "postAggregations": [ - { - "type": "sketchEstimate", - "name": "sketchEstimatePostAgg", - "field": { - "type": "fieldAccess", - "fieldName": "sids_sketch_count" - } - }, - { - "type": "sketchEstimate", - "name": "sketchIntersectionPostAggEstimate", - "field": { - "type": "sketchSetOper", - "name": "sketchIntersectionPostAgg", - "func": "INTERSECT", - "size": 16384, - "fields": [ - { - "type": "fieldAccess", - "fieldName": "sids_sketch_count" - }, - { - "type": "fieldAccess", - "fieldName": "sids_sketch_count" - } - ] - } - }, - { - "type": "sketchEstimate", - "name": "sketchAnotBPostAggEstimate", - "field": { - "type": "sketchSetOper", - "name": "sketchAnotBUnionPostAgg", - "func": "NOT", - "fields": [ - { - "type": "fieldAccess", - "fieldName": "sids_sketch_count" - }, - { - "type": "fieldAccess", - "fieldName": "sids_sketch_count" - } - ] - } - }, - { - "type": "sketchEstimate", - "name": "sketchUnionPostAggEstimate", - "field": { - "type": "sketchSetOper", - "name": "sketchUnionPostAgg", - "func": "UNION", - "size": 16384, - "fields": [ - { - "type": "fieldAccess", - "fieldName": "sids_sketch_count" - }, - { - "type": "fieldAccess", - "fieldName": "sids_sketch_count" - } - ] - } - } - ], - "intervals": [ - "2014-10-19T00:00:00.000Z/2014-10-22T00:00:00.000Z" - ] -} diff --git a/extensions-core/datasketches/src/test/resources/retention_test_data_group_by_query.json b/extensions-core/datasketches/src/test/resources/retention_test_data_group_by_query.json deleted file mode 100644 index 18c792e52e9e..000000000000 --- a/extensions-core/datasketches/src/test/resources/retention_test_data_group_by_query.json +++ /dev/null @@ -1,135 +0,0 @@ -{ - "queryType": "groupBy", - "dataSource": "test_datasource", - "granularity":"ALL", - "dimensions": ["product"], - "filter": { - "type": "selector", - "dimension": "product", - "value": "product_1" - }, - "aggregations": [ - { - "type" : "filtered", - "filter" : { - "type" : "and", - "fields" : [ - { - "type" : "selector", - "dimension" : "product", - "value" : "product_1" - }, - { - "type" : "interval", - "dimension" : "__time", - "intervals" : ["2014-10-20T00:00:00.000Z/2014-10-21T00:00:00.000Z"] - } - ] - }, - "aggregator" : { - "type": "thetaSketch", "name": "p1_unique_country_day_1", "fieldName": "pty_country" - } - }, - { - "type" : "filtered", - "filter" : { - "type" : "and", - "fields" : [ - { - "type" : "selector", - "dimension" : "product", - "value" : "product_1" - }, - { - "type" : "interval", - "dimension" : "__time", - "intervals" : ["2014-10-21T00:00:00.000Z/2014-10-22T00:00:00.000Z"] - } - ] - }, - "aggregator" : { - "type": "thetaSketch", "name": "p1_unique_country_day_2", "fieldName": "pty_country" - } - }, - { - "type" : "filtered", - "filter" : { - "type" : "and", - "fields" : [ - { - "type" : "selector", - "dimension" : "product", - "value" : "product_1" - }, - { - "type" : "interval", - "dimension" : "__time", - "intervals" : ["2014-10-22T00:00:00.000Z/2014-10-23T00:00:00.000Z"] - } - ] - }, - "aggregator" : { - "type": "thetaSketch", "name": "p1_unique_country_day_3", "fieldName": "pty_country" - } - }, - { - "type": "thetaSketch", - "name": "non_existing_col_validation", - "fieldName": "non_existing_col", - "size": 16384 - } - ], - "postAggregations": [ - { - "type": "thetaSketchEstimate", - "name": "sketchEstimatePostAgg", - "field": { - "type": "fieldAccess", - "fieldName": "p1_unique_country_day_1" - } - }, - { - "type": "thetaSketchEstimate", - "name": "sketchIntersectionPostAggEstimate1", - "field": { - "type": "thetaSketchSetOp", - "name": "sketchIntersectionPostAgg", - "func": "INTERSECT", - "size": 16384, - "fields": [ - { - "type": "fieldAccess", - "fieldName": "p1_unique_country_day_1" - }, - { - "type": "fieldAccess", - "fieldName": "p1_unique_country_day_2" - } - ] - } - }, - { - "type": "thetaSketchEstimate", - "name": "sketchIntersectionPostAggEstimate2", - "field": { - "type": "thetaSketchSetOp", - "name": "sketchIntersectionPostAgg2", - "func": "INTERSECT", - "size": 16384, - "fields": [ - { - "type": "fieldAccess", - "fieldName": "p1_unique_country_day_1" - }, - { - "type": "fieldAccess", - "fieldName": "p1_unique_country_day_3" - } - ] - } - } - ], - "intervals": [ - "2014-10-19T00:00:00.000Z/2014-10-23T00:00:00.000Z" - ] -} diff --git a/extensions-core/datasketches/src/test/resources/simple_test_data_aggregators.json b/extensions-core/datasketches/src/test/resources/simple_test_data_aggregators.json deleted file mode 100644 index c98b97187871..000000000000 --- a/extensions-core/datasketches/src/test/resources/simple_test_data_aggregators.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - { - "type": "thetaSketch", - "name": "pty_country", - "fieldName": "pty_country" - }, - { - "type": "thetaSketch", - "name": "non_existing_col_validation", - "fieldName": "non_existing_col" - } -] diff --git a/extensions-core/datasketches/src/test/resources/simple_test_data_group_by_query.json b/extensions-core/datasketches/src/test/resources/simple_test_data_group_by_query.json deleted file mode 100644 index 01ee218e8672..000000000000 --- a/extensions-core/datasketches/src/test/resources/simple_test_data_group_by_query.json +++ /dev/null @@ -1,106 +0,0 @@ -{ - "queryType": "groupBy", - "dataSource": "test_datasource", - "granularity":"ALL", - "dimensions": ["product"], - "aggregations": [ - { - "type": "thetaSketch", - "name": "sketch_count", - "fieldName": "pty_country", - "size": 16384 - }, - { - "type": "thetaSketch", - "name": "non_existing_col_validation", - "fieldName": "non_existing_col", - "size": 16384 - } - ], - "postAggregations": [ - { - "type": "thetaSketchEstimate", - "name": "sketchEstimatePostAgg", - "field": { - "type": "fieldAccess", - "fieldName": "sketch_count" - } - }, - { - "type": "thetaSketchEstimate", - "name": "sketchIntersectionPostAggEstimate", - "field": { - "type": "thetaSketchSetOp", - "name": "sketchIntersectionPostAgg", - "func": "INTERSECT", - "size": 16384, - "fields": [ - { - "type": "fieldAccess", - "fieldName": "sketch_count" - }, - { - "type": "fieldAccess", - "fieldName": "sketch_count" - } - ] - } - }, - { - "type": "thetaSketchEstimate", - "name": "sketchAnotBPostAggEstimate", - "field": { - "type": "thetaSketchSetOp", - "name": "sketchAnotBUnionPostAgg", - "func": "NOT", - "size": 16384, - "fields": [ - { - "type": "fieldAccess", - "fieldName": "sketch_count" - }, - { - "type": "fieldAccess", - "fieldName": "sketch_count" - } - ] - } - }, - { - "type": "thetaSketchEstimate", - "name": "sketchUnionPostAggEstimate", - "field": { - "type": "thetaSketchSetOp", - "name": "sketchUnionPostAgg", - "func": "UNION", - "size": 16384, - "fields": [ - { - "type": "fieldAccess", - "fieldName": "sketch_count" - }, - { - "type": "fieldAccess", - "fieldName": "sketch_count" - } - ] - } - } - ], - "intervals": [ - "2014-10-19T00:00:00.000Z/2014-10-22T00:00:00.000Z" - ], - "limitSpec": { - "type": "default", - "columns": [ - { - "dimension": "sketchEstimatePostAgg", - "direction": "ASC" - }, - { - "dimension": "product", - "direction": "ASC" - } - ] - } -} diff --git a/extensions-core/datasketches/src/test/resources/sketch_test_data_aggregators.json b/extensions-core/datasketches/src/test/resources/sketch_test_data_aggregators.json deleted file mode 100644 index 05cd0baa59bb..000000000000 --- a/extensions-core/datasketches/src/test/resources/sketch_test_data_aggregators.json +++ /dev/null @@ -1,16 +0,0 @@ -[ - { - "type": "thetaSketch", - "name": "sids_sketch", - "fieldName": "sketch", - "isInputThetaSketch": true, - "size": 16384 - }, - { - "type": "thetaSketch", - "name": "non_existing_col_validation", - "fieldName": "non_existing_col", - "isInputThetaSketch": true, - "size": 16384 - } -] diff --git a/extensions-core/datasketches/src/test/resources/sketch_test_data_group_by_query.json b/extensions-core/datasketches/src/test/resources/sketch_test_data_group_by_query.json deleted file mode 100644 index 56dd9fd8fc59..000000000000 --- a/extensions-core/datasketches/src/test/resources/sketch_test_data_group_by_query.json +++ /dev/null @@ -1,116 +0,0 @@ -{ - "queryType": "groupBy", - "dataSource": "test_datasource", - "granularity": "ALL", - "dimensions": [], - "aggregations": [ - { - "type": "thetaSketch", - "name": "sids_sketch_count", - "fieldName": "sids_sketch", - "size": 16384 - }, - { - "type": "thetaSketch", - "name": "sids_sketch_count_with_err", - "fieldName": "sids_sketch", - "size": 16384, - "errorBoundsStdDev": 2 - }, - { - "type": "thetaSketch", - "name": "non_existing_col_validation", - "fieldName": "non_existing_col", - "size": 16384 - } - ], - "postAggregations": [ - { - "type": "thetaSketchEstimate", - "name": "sketchEstimatePostAgg", - "field": { - "type": "fieldAccess", - "fieldName": "sids_sketch_count" - } - }, - { - "type": "thetaSketchEstimate", - "name": "sketchEstimatePostAggWithErrorBounds", - "errorBoundsStdDev": 2, - "field": { - "type": "fieldAccess", - "fieldName": "sids_sketch_count" - } - }, - { - "type": "thetaSketchEstimate", - "name": "sketchIntersectionPostAggEstimate", - "field": { - "type": "thetaSketchSetOp", - "name": "sketchIntersectionPostAgg", - "func": "INTERSECT", - "size": 16384, - "fields": [ - { - "type": "fieldAccess", - "fieldName": "sids_sketch_count" - }, - { - "type": "fieldAccess", - "fieldName": "sids_sketch_count" - } - ] - } - }, - { - "type": "thetaSketchEstimate", - "name": "sketchAnotBPostAggEstimate", - "field": { - "type": "thetaSketchSetOp", - "name": "sketchAnotBUnionPostAgg", - "func": "NOT", - "fields": [ - { - "type": "fieldAccess", - "fieldName": "sids_sketch_count" - }, - { - "type": "fieldAccess", - "fieldName": "sids_sketch_count" - } - ] - } - }, - { - "type": "thetaSketchEstimate", - "name": "sketchUnionPostAggEstimate", - "field": { - "type": "thetaSketchSetOp", - "name": "sketchUnionPostAgg", - "func": "UNION", - "size": 16384, - "fields": [ - { - "type": "fieldAccess", - "fieldName": "sids_sketch_count" - }, - { - "type": "fieldAccess", - "fieldName": "sids_sketch_count" - } - ] - } - }, - { - "type": "thetaSketchToString", - "name": "sketchSummary", - "field": { - "type": "fieldAccess", - "fieldName": "sids_sketch_count" - } - } - ], - "intervals": [ - "2014-10-19T00:00:00.000Z/2014-10-22T00:00:00.000Z" - ] -} diff --git a/extensions-core/datasketches/src/test/resources/timeseries_query.json b/extensions-core/datasketches/src/test/resources/timeseries_query.json deleted file mode 100644 index 439d7585e9a3..000000000000 --- a/extensions-core/datasketches/src/test/resources/timeseries_query.json +++ /dev/null @@ -1,92 +0,0 @@ -{ - "queryType": "timeseries", - "dataSource": "test_datasource", - "granularity":"ALL", - "aggregations": [ - { - "type": "thetaSketch", - "name": "sketch_count", - "fieldName": "pty_country", - "size": 16384 - }, - { - "type": "thetaSketch", - "name": "non_existing_col_validation", - "fieldName": "non_existing_col", - "size": 16384 - } - ], - "postAggregations": [ - { - "type": "thetaSketchEstimate", - "name": "sketchEstimatePostAgg", - "field": { - "type": "fieldAccess", - "fieldName": "sketch_count" - } - }, - { - "type": "thetaSketchEstimate", - "name": "sketchIntersectionPostAggEstimate", - "field": { - "type": "thetaSketchSetOp", - "name": "sketchIntersectionPostAgg", - "func": "INTERSECT", - "size": 16384, - "fields": [ - { - "type": "fieldAccess", - "fieldName": "sketch_count" - }, - { - "type": "fieldAccess", - "fieldName": "sketch_count" - } - ] - } - }, - { - "type": "thetaSketchEstimate", - "name": "sketchAnotBPostAggEstimate", - "field": { - "type": "thetaSketchSetOp", - "name": "sketchAnotBUnionPostAgg", - "func": "NOT", - "size": 16384, - "fields": [ - { - "type": "fieldAccess", - "fieldName": "sketch_count" - }, - { - "type": "fieldAccess", - "fieldName": "sketch_count" - } - ] - } - }, - { - "type": "thetaSketchEstimate", - "name": "sketchUnionPostAggEstimate", - "field": { - "type": "thetaSketchSetOp", - "name": "sketchUnionPostAgg", - "func": "UNION", - "size": 16384, - "fields": [ - { - "type": "fieldAccess", - "fieldName": "sketch_count" - }, - { - "type": "fieldAccess", - "fieldName": "sketch_count" - } - ] - } - } - ], - "intervals": [ - "2014-10-19T00:00:00.000Z/2014-10-22T00:00:00.000Z" - ] -} diff --git a/extensions-core/datasketches/src/test/resources/topn_query.json b/extensions-core/datasketches/src/test/resources/topn_query.json deleted file mode 100644 index 3701b1f5b29c..000000000000 --- a/extensions-core/datasketches/src/test/resources/topn_query.json +++ /dev/null @@ -1,98 +0,0 @@ -{ - "queryType": "topN", - "dataSource": "test_datasource", - "granularity":"ALL", - "metric": { - "type": "inverted", - "metric": "sketch_count" - }, - "dimension": "product", - "threshold": 1, - "aggregations": [ - { - "type": "thetaSketch", - "name": "sketch_count", - "fieldName": "pty_country", - "size": 16384 - }, - { - "type": "thetaSketch", - "name": "non_existing_col_validation", - "fieldName": "non_existing_col", - "size": 16384 - } - ], - "postAggregations": [ - { - "type": "thetaSketchEstimate", - "name": "sketchEstimatePostAgg", - "field": { - "type": "fieldAccess", - "fieldName": "sketch_count" - } - }, - { - "type": "thetaSketchEstimate", - "name": "sketchIntersectionPostAggEstimate", - "field": { - "type": "thetaSketchSetOp", - "name": "sketchIntersectionPostAgg", - "func": "INTERSECT", - "size": 16384, - "fields": [ - { - "type": "fieldAccess", - "fieldName": "sketch_count" - }, - { - "type": "fieldAccess", - "fieldName": "sketch_count" - } - ] - } - }, - { - "type": "thetaSketchEstimate", - "name": "sketchAnotBPostAggEstimate", - "field": { - "type": "thetaSketchSetOp", - "name": "sketchAnotBUnionPostAgg", - "func": "NOT", - "size": 16384, - "fields": [ - { - "type": "fieldAccess", - "fieldName": "sketch_count" - }, - { - "type": "fieldAccess", - "fieldName": "sketch_count" - } - ] - } - }, - { - "type": "thetaSketchEstimate", - "name": "sketchUnionPostAggEstimate", - "field": { - "type": "thetaSketchSetOp", - "name": "sketchUnionPostAgg", - "func": "UNION", - "size": 16384, - "fields": [ - { - "type": "fieldAccess", - "fieldName": "sketch_count" - }, - { - "type": "fieldAccess", - "fieldName": "sketch_count" - } - ] - } - } - ], - "intervals": [ - "2014-10-19T00:00:00.000Z/2014-10-22T00:00:00.000Z" - ] -} diff --git a/extensions-core/datasketches/src/test/resources/topn_query_sketch_const.json b/extensions-core/datasketches/src/test/resources/topn_query_sketch_const.json deleted file mode 100644 index 3dc47dca86c3..000000000000 --- a/extensions-core/datasketches/src/test/resources/topn_query_sketch_const.json +++ /dev/null @@ -1,104 +0,0 @@ -{ - "queryType": "topN", - "dataSource": "test_datasource", - "granularity":"ALL", - "metric": { - "type": "inverted", - "metric": "sketch_count" - }, - "dimension": "product", - "threshold": 3, - "aggregations": [ - { - "type": "thetaSketch", - "name": "sketch_count", - "fieldName": "pty_country", - "size": 16384 - } - ], - "postAggregations": [ - { - "type": "thetaSketchEstimate", - "name": "sketchEstimatePostAgg", - "field": { - "type": "fieldAccess", - "fieldName": "sketch_count" - } - }, - { - "type": "thetaSketchEstimate", - "name": "sketchEstimatePostAggForSketchConstant", - "field": { - "type": "thetaSketchConstant", - "name": "theta_sketch_count", - "value": "AgMDAAAazJMCAAAAAACAPzz9j7pWTMdROWGf15uY1nI=" - } - }, - { - "type": "thetaSketchEstimate", - "name": "sketchIntersectionPostAggEstimate", - "field": { - "type": "thetaSketchSetOp", - "name": "sketchIntersectionPostAgg", - "func": "INTERSECT", - "size": 16384, - "fields": [ - { - "type": "fieldAccess", - "fieldName": "sketch_count" - }, - { - "type": "thetaSketchConstant", - "name": "theta_sketch_count", - "value": "AgMDAAAazJMCAAAAAACAPzz9j7pWTMdROWGf15uY1nI=" - } - ] - } - }, - { - "type": "thetaSketchEstimate", - "name": "sketchAnotBPostAggEstimate", - "field": { - "type": "thetaSketchSetOp", - "name": "sketchAnotBUnionPostAgg", - "func": "NOT", - "size": 16384, - "fields": [ - { - "type": "fieldAccess", - "fieldName": "sketch_count" - }, - { - "type": "thetaSketchConstant", - "name": "theta_sketch_count", - "value": "AgMDAAAazJMCAAAAAACAPzz9j7pWTMdROWGf15uY1nI=" - } - ] - } - }, - { - "type": "thetaSketchEstimate", - "name": "sketchUnionPostAggEstimate", - "field": { - "type": "thetaSketchSetOp", - "name": "sketchUnionPostAgg", - "func": "UNION", - "size": 16384, - "fields": [ - { - "type": "fieldAccess", - "fieldName": "sketch_count" - }, - { - "type": "thetaSketchConstant", - "name": "theta_sketch_count", - "value": "AgMDAAAazJMCAAAAAACAPzz9j7pWTMdROWGf15uY1nI=" - } - ] - } - } - ], - "intervals": [ - "2014-10-19T00:00:00.000Z/2014-10-22T00:00:00.000Z" - ] -} diff --git a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/aggregation/bloom/BloomFilterGroupByQueryTest.java b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/aggregation/bloom/BloomFilterGroupByQueryTest.java index 1aa124219e07..286a010b9408 100644 --- a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/aggregation/bloom/BloomFilterGroupByQueryTest.java +++ b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/aggregation/bloom/BloomFilterGroupByQueryTest.java @@ -34,7 +34,12 @@ import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.guava.Sequence; import org.apache.druid.query.aggregation.AggregationTestHelper; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.CountAggregatorFactory; +import org.apache.druid.query.aggregation.LongSumAggregatorFactory; +import org.apache.druid.query.dimension.DefaultDimensionSpec; import org.apache.druid.query.filter.BloomKFilter; +import org.apache.druid.query.filter.SelectorDimFilter; import org.apache.druid.query.groupby.GroupByQuery; import org.apache.druid.query.groupby.GroupByQueryConfig; import org.apache.druid.query.groupby.GroupByQueryRunnerTest; @@ -101,21 +106,18 @@ public void teardown() throws IOException @Test public void testQuery() throws Exception { - String query = "{" - + "\"queryType\": \"groupBy\"," - + "\"dataSource\": \"test_datasource\"," - + "\"granularity\": \"ALL\"," - + "\"dimensions\": []," - + "\"filter\":{ \"type\":\"selector\", \"dimension\":\"market\", \"value\":\"upfront\"}," - + "\"aggregations\": [" - + " { \"type\": \"bloom\", \"name\": \"blooming_quality\", \"field\": \"quality\" }" - + "]," - + "\"intervals\": [ \"1970/2050\" ]" - + "}"; + GroupByQuery query = GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval("1970/2050") + .setDimFilter(new SelectorDimFilter("market", "upfront", null)) + .setAggregatorSpecs( + new BloomFilterAggregatorFactory("blooming_quality", new DefaultDimensionSpec("quality", "quality"), null) + ) + .build(); MapBasedRow row = ingestAndQuery(query); - BloomKFilter filter = BloomKFilter.deserialize((ByteBuffer) row.getRaw("blooming_quality")); Assert.assertTrue(filter.testString("mezzanine")); Assert.assertTrue(filter.testString("premium")); @@ -125,64 +127,53 @@ public void testQuery() throws Exception @Test public void testNestedQuery() throws Exception { - String query = "{" - + "\"queryType\": \"groupBy\"," - + "\"dataSource\": {" - + "\"type\": \"query\"," - + "\"query\": {" - + "\"queryType\":\"groupBy\"," - + "\"dataSource\": \"test_datasource\"," - + "\"intervals\": [ \"1970/2050\" ]," - + "\"granularity\":\"ALL\"," - + "\"dimensions\":[]," - + "\"aggregations\": [{ \"type\":\"longSum\", \"name\":\"innerSum\", \"fieldName\":\"count\"}]" - + "}" - + "}," - + "\"granularity\": \"ALL\"," - + "\"dimensions\": []," - + "\"aggregations\": [" - + " { \"type\": \"bloom\", \"name\": \"bloom\", \"field\": \"innerSum\" }" - + "]," - + "\"intervals\": [ \"1970/2050\" ]" - + "}"; + GroupByQuery innerQuery = GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval("1970/2050") + .setAggregatorSpecs(new LongSumAggregatorFactory("innerSum", "count")) + .build(); + + GroupByQuery query = GroupByQuery.builder() + .setDataSource(innerQuery) + .setGranularity(Granularities.ALL) + .setInterval("1970/2050") + .setAggregatorSpecs( + new BloomFilterAggregatorFactory("bloom", new DefaultDimensionSpec("innerSum", "innerSum"), null) + ) + .build(); MapBasedRow row = ingestAndQuery(query); - BloomKFilter filter = BloomKFilter.deserialize((ByteBuffer) row.getRaw("bloom")); Assert.assertTrue(filter.testLong(13L)); Assert.assertFalse(filter.testLong(5L)); } - @Test public void testNestedQueryComplex() throws Exception { - String query = "{" - + "\"queryType\": \"groupBy\"," - + "\"dataSource\": {" - + "\"type\": \"query\"," - + "\"query\": {" - + "\"queryType\":\"groupBy\"," - + "\"dataSource\": \"test_datasource\"," - + "\"intervals\": [ \"1970/2050\" ]," - + "\"granularity\":\"ALL\"," - + "\"dimensions\":[]," - + "\"filter\":{ \"type\":\"selector\", \"dimension\":\"market\", \"value\":\"upfront\"}," - + "\"aggregations\": [{ \"type\":\"bloom\", \"name\":\"innerBloom\", \"field\":\"quality\"}]" - + "}" - + "}," - + "\"granularity\": \"ALL\"," - + "\"dimensions\": []," - + "\"aggregations\": [" - + " { \"type\": \"bloom\", \"name\": \"innerBloom\", \"field\": \"innerBloom\" }" - + "]," - + "\"intervals\": [ \"1970/2050\" ]" - + "}"; + GroupByQuery innerQuery = GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval("1970/2050") + .setDimFilter(new SelectorDimFilter("market", "upfront", null)) + .setAggregatorSpecs( + new BloomFilterAggregatorFactory("innerBloom", new DefaultDimensionSpec("quality", "quality"), null) + ) + .build(); + + GroupByQuery query = GroupByQuery.builder() + .setDataSource(innerQuery) + .setGranularity(Granularities.ALL) + .setInterval("1970/2050") + .setAggregatorSpecs( + new BloomFilterAggregatorFactory("innerBloom", new DefaultDimensionSpec("innerBloom", "innerBloom"), null) + ) + .build(); MapBasedRow row = ingestAndQuery(query); - BloomKFilter filter = BloomKFilter.deserialize((ByteBuffer) row.getRaw("innerBloom")); Assert.assertTrue(filter.testString("mezzanine")); Assert.assertTrue(filter.testString("premium")); @@ -192,17 +183,15 @@ public void testNestedQueryComplex() throws Exception @Test public void testQueryFakeDimension() throws Exception { - String query = "{" - + "\"queryType\": \"groupBy\"," - + "\"dataSource\": \"test_datasource\"," - + "\"granularity\": \"ALL\"," - + "\"dimensions\": []," - + "\"filter\":{ \"type\":\"selector\", \"dimension\":\"market\", \"value\":\"upfront\"}," - + "\"aggregations\": [" - + " { \"type\": \"bloom\", \"name\": \"blooming_quality\", \"field\": \"nope\" }" - + "]," - + "\"intervals\": [ \"1970/2050\" ]" - + "}"; + GroupByQuery query = GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval("1970/2050") + .setDimFilter(new SelectorDimFilter("market", "upfront", null)) + .setAggregatorSpecs( + new BloomFilterAggregatorFactory("blooming_quality", new DefaultDimensionSpec("nope", "nope"), null) + ) + .build(); MapBasedRow row = ingestAndQuery(query); @@ -217,9 +206,9 @@ public void testQueryFakeDimension() throws Exception Assert.assertEquals(empty, serialized); } - private MapBasedRow ingestAndQuery(String query) throws Exception + private MapBasedRow ingestAndQuery(GroupByQuery query) throws Exception { - String metricSpec = "[{ \"type\": \"count\", \"name\": \"count\"}]"; + List metricSpec = List.of(new CountAggregatorFactory("count")); Sequence seq = helper.createIndexAndRunQueryOnSegment( this.getClass().getClassLoader().getResourceAsStream("sample.data.tsv"), @@ -239,6 +228,6 @@ private MapBasedRow ingestAndQuery(String query) throws Exception ); List results = seq.toList(); - return results.get(0).toMapBasedRow((GroupByQuery) helper.readQuery(query)); + return results.get(0).toMapBasedRow(query); } } diff --git a/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramAggregationTest.java b/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramAggregationTest.java index 024036fd6f8a..fcef2f45a38d 100644 --- a/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramAggregationTest.java +++ b/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/ApproximateHistogramAggregationTest.java @@ -29,6 +29,7 @@ import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.guava.Sequence; import org.apache.druid.query.aggregation.AggregationTestHelper; +import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.groupby.GroupByQuery; import org.apache.druid.query.groupby.GroupByQueryConfig; import org.apache.druid.query.groupby.GroupByQueryRunnerTest; @@ -111,33 +112,29 @@ public void testIngestWithNullsIgnoredAndQuery() throws Exception private MapBasedRow ingestAndQuery(boolean ignoreNulls) throws Exception { - String ingestionAgg = ignoreNulls ? "approxHistogramFold" : "approxHistogram"; + AggregatorFactory ingestionAgg = ignoreNulls + ? new ApproximateHistogramFoldingAggregatorFactory("index_ah", "index", null, null, null, null, null) + : new ApproximateHistogramAggregatorFactory("index_ah", "index", null, null, null, null, null); - String metricSpec = "[{" - + "\"type\": \"" + ingestionAgg + "\"," - + "\"name\": \"index_ah\"," - + "\"fieldName\": \"index\"" - + "}]"; + List metricSpec = List.of(ingestionAgg); - String query = "{" - + "\"queryType\": \"groupBy\"," - + "\"dataSource\": \"test_datasource\"," - + "\"granularity\": \"ALL\"," - + "\"dimensions\": []," - + "\"aggregations\": [" - + " { \"type\": \"approxHistogramFold\", \"name\": \"index_ah\", \"fieldName\": \"index_ah\" }" - + "]," - + "\"postAggregations\": [" - + " { \"type\": \"min\", \"name\": \"index_min\", \"fieldName\": \"index_ah\"}," - + " { \"type\": \"max\", \"name\": \"index_max\", \"fieldName\": \"index_ah\"}," - + " { \"type\": \"quantile\", \"name\": \"index_quantile\", \"fieldName\": \"index_ah\", \"probability\" : 0.99 }," - + " { \"type\": \"quantiles\", \"name\": \"index_quantiles\", \"fieldName\": \"index_ah\", \"probabilities\" : [0.2, 0.7] }," - + " { \"type\": \"buckets\", \"name\": \"index_buckets\", \"fieldName\": \"index_ah\", \"bucketSize\" : 2.0, \"offset\": 4.0 }," - + " { \"type\": \"customBuckets\", \"name\": \"index_custom\", \"fieldName\": \"index_ah\", \"breaks\" : [50.0, 100.0] }," - + " { \"type\": \"equalBuckets\", \"name\": \"index_equal\", \"fieldName\": \"index_ah\", \"numBuckets\" : 3 }" - + "]," - + "\"intervals\": [ \"1970/2050\" ]" - + "}"; + GroupByQuery query = GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval("1970/2050") + .setAggregatorSpecs( + new ApproximateHistogramFoldingAggregatorFactory("index_ah", "index_ah", null, null, null, null, null) + ) + .setPostAggregatorSpecs( + new MinPostAggregator("index_min", "index_ah"), + new MaxPostAggregator("index_max", "index_ah"), + new QuantilePostAggregator("index_quantile", "index_ah", 0.99f), + new QuantilesPostAggregator("index_quantiles", "index_ah", new float[]{0.2f, 0.7f}), + new BucketsPostAggregator("index_buckets", "index_ah", 2.0f, 4.0f), + new CustomBucketsPostAggregator("index_custom", "index_ah", new float[]{50.0f, 100.0f}), + new EqualBucketsPostAggregator("index_equal", "index_ah", 3) + ) + .build(); Sequence seq = helper.createIndexAndRunQueryOnSegment( this.getClass().getClassLoader().getResourceAsStream("sample.data.tsv"), @@ -156,6 +153,6 @@ private MapBasedRow ingestAndQuery(boolean ignoreNulls) throws Exception query ); - return seq.toList().get(0).toMapBasedRow((GroupByQuery) helper.readQuery(query)); + return seq.toList().get(0).toMapBasedRow(query); } } diff --git a/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramAggregationTest.java b/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramAggregationTest.java index 949437f09162..a7650d6196b8 100644 --- a/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramAggregationTest.java +++ b/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/FixedBucketsHistogramAggregationTest.java @@ -29,6 +29,7 @@ import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.guava.Sequence; import org.apache.druid.query.aggregation.AggregationTestHelper; +import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.groupby.GroupByQuery; import org.apache.druid.query.groupby.GroupByQueryConfig; import org.apache.druid.query.groupby.GroupByQueryRunnerTest; @@ -128,42 +129,29 @@ public void testAggregateCombinerReset() throws Exception private MapBasedRow ingestAndQuery(InputStream inputDataStream) throws Exception { - String ingestionAgg = FixedBucketsHistogramAggregator.TYPE_NAME; - - String metricSpec = "[{" - + "\"type\": \"" + ingestionAgg + "\"," - + "\"name\": \"index_fbh\"," - + "\"numBuckets\": 200," - + "\"lowerLimit\": 0," - + "\"upperLimit\": 200," - + "\"outlierHandlingMode\": \"overflow\"," - + "\"fieldName\": \"index\"" - + "}]"; + List metricSpec = List.of( + new FixedBucketsHistogramAggregatorFactory( + "index_fbh", "index", 200, 0, 200, + FixedBucketsHistogram.OutlierHandlingMode.OVERFLOW, null + ) + ); - String query = "{" - + "\"queryType\": \"groupBy\"," - + "\"dataSource\": \"test_datasource\"," - + "\"granularity\": \"ALL\"," - + "\"dimensions\": []," - + "\"aggregations\": [" - + " {" - + " \"type\": \"fixedBucketsHistogram\"," - + " \"name\": \"index_fbh\"," - + " \"fieldName\": \"index_fbh\"," - + " \"numBuckets\": 200," - + " \"lowerLimit\": 0," - + " \"upperLimit\": 200," - + " \"outlierHandlingMode\": \"overflow\"," - + " \"finalizeAsBase64Binary\": true" - + " }" - + "]," - + "\"postAggregations\": [" - + " { \"type\": \"min\", \"name\": \"index_min\", \"fieldName\": \"index_fbh\"}," - + " { \"type\": \"max\", \"name\": \"index_max\", \"fieldName\": \"index_fbh\"}," - + " { \"type\": \"quantile\", \"name\": \"index_quantile\", \"fieldName\": \"index_fbh\", \"probability\" : 0.99 }" - + "]," - + "\"intervals\": [ \"1970/2050\" ]" - + "}"; + GroupByQuery query = GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval("1970/2050") + .setAggregatorSpecs( + new FixedBucketsHistogramAggregatorFactory( + "index_fbh", "index_fbh", 200, 0, 200, + FixedBucketsHistogram.OutlierHandlingMode.OVERFLOW, true + ) + ) + .setPostAggregatorSpecs( + new MinPostAggregator("index_min", "index_fbh"), + new MaxPostAggregator("index_max", "index_fbh"), + new QuantilePostAggregator("index_quantile", "index_fbh", 0.99f) + ) + .build(); Sequence seq = helper.createIndexAndRunQueryOnSegment( inputDataStream, @@ -182,6 +170,6 @@ private MapBasedRow ingestAndQuery(InputStream inputDataStream) throws Exception query ); - return seq.toList().get(0).toMapBasedRow((GroupByQuery) helper.readQuery(query)); + return seq.toList().get(0).toMapBasedRow(query); } } diff --git a/processing/src/test/java/org/apache/druid/query/aggregation/AggregationTestHelper.java b/processing/src/test/java/org/apache/druid/query/aggregation/AggregationTestHelper.java index 42211803101a..0c3c846f4872 100644 --- a/processing/src/test/java/org/apache/druid/query/aggregation/AggregationTestHelper.java +++ b/processing/src/test/java/org/apache/druid/query/aggregation/AggregationTestHelper.java @@ -22,7 +22,6 @@ import com.fasterxml.jackson.core.JsonParser; import com.fasterxml.jackson.core.JsonToken; import com.fasterxml.jackson.core.ObjectCodec; -import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.Module; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Function; @@ -105,10 +104,10 @@ import java.util.stream.Collectors; /** - * This class provides general utility to test any druid aggregation implementation given raw data, - * parser spec, aggregator specs and a group-by query. - * It allows you to create index from raw data, run a group by query on it which simulates query processing inside - * of a druid cluster exercising most of the features from aggregation and returns the results that you could verify. + * General utility for testing Druid aggregation implementations. Given raw data, an input schema, + * ingestion aggregator specs, and a query, it creates a segment from the raw data and runs the + * query against it, simulating query processing inside a Druid cluster and returning results for + * verification. */ public class AggregationTestHelper implements Closeable { @@ -328,23 +327,7 @@ public Sequence createIndexAndRunQueryOnSegment( File inputDataFile, InputRowSchema inputSchema, InputFormat inputFormat, - String aggregators, - long minTimestamp, - Granularity gran, - int maxRowCount, - String queryJson - ) throws Exception - { - File segmentDir = tempFolder.newFolder(); - createIndex(inputDataFile, inputSchema, inputFormat, aggregators, segmentDir, minTimestamp, gran, maxRowCount, true); - return runQueryOnSegments(Collections.singletonList(segmentDir), queryJson); - } - - public Sequence createIndexAndRunQueryOnSegment( - File inputDataFile, - InputRowSchema inputSchema, - InputFormat inputFormat, - String aggregators, + List aggregators, long minTimestamp, Granularity gran, int maxRowCount, @@ -352,103 +335,31 @@ public Sequence createIndexAndRunQueryOnSegment( ) throws Exception { File segmentDir = tempFolder.newFolder(); - createIndex( - inputDataFile, - inputSchema, - inputFormat, - aggregators, - segmentDir, - minTimestamp, - gran, - maxRowCount, - true - ); + createIndex(inputDataFile, inputSchema, inputFormat, aggregators, segmentDir, minTimestamp, gran, maxRowCount); return runQueryOnSegments(Collections.singletonList(segmentDir), query); } - public Sequence createIndexAndRunQueryOnSegment( - File inputDataFile, - InputRowSchema inputSchema, - InputFormat inputFormat, - String aggregators, - long minTimestamp, - Granularity gran, - int maxRowCount, - boolean rollup, - String queryJson - ) throws Exception - { - File segmentDir = tempFolder.newFolder(); - createIndex( - inputDataFile, - inputSchema, - inputFormat, - aggregators, - segmentDir, - minTimestamp, - gran, - maxRowCount, - rollup - ); - return runQueryOnSegments(Collections.singletonList(segmentDir), queryJson); - } - - public Sequence createIndexAndRunQueryOnSegment( - InputStream inputDataStream, - InputRowSchema inputSchema, - InputFormat inputFormat, - String aggregators, - long minTimestamp, - Granularity gran, - int maxRowCount, - String queryJson - ) throws Exception - { - return createIndexAndRunQueryOnSegment( - inputDataStream, - inputSchema, - inputFormat, - aggregators, - minTimestamp, - gran, - maxRowCount, - true, - queryJson - ); - } - public Sequence createIndexAndRunQueryOnSegment( InputStream inputDataStream, InputRowSchema inputSchema, InputFormat inputFormat, - String aggregators, + List aggregators, long minTimestamp, Granularity gran, int maxRowCount, - boolean rollup, - String queryJson + Query query ) throws Exception { File segmentDir = tempFolder.newFolder(); - createIndex( - inputDataStream, - inputSchema, - inputFormat, - aggregators, - segmentDir, - minTimestamp, - gran, - maxRowCount, - rollup - ); - return runQueryOnSegments(Collections.singletonList(segmentDir), queryJson); + createIndex(inputDataStream, inputSchema, inputFormat, aggregators, segmentDir, minTimestamp, gran, maxRowCount, true); + return runQueryOnSegments(Collections.singletonList(segmentDir), query); } public void createIndex( File inputDataFile, InputRowSchema inputSchema, InputFormat inputFormat, - String aggregators, + List aggregators, File outDir, long minTimestamp, Granularity gran, @@ -472,7 +383,7 @@ public void createIndex( File inputDataFile, InputRowSchema inputSchema, InputFormat inputFormat, - String aggregators, + List aggregators, File outDir, long minTimestamp, Granularity gran, @@ -497,7 +408,7 @@ public void createIndex( InputStream inputDataStream, InputRowSchema inputSchema, InputFormat inputFormat, - String aggregators, + List aggregators, File outDir, long minTimestamp, Granularity gran, @@ -525,11 +436,7 @@ public InputStream open() } }; InputEntityReader reader = inputFormat.createReader(inputSchema, streamEntity, tempFolder.newFolder()); - List aggregatorSpecs = mapper.readValue( - aggregators, - new TypeReference<>() {} - ); - AggregatorFactory[] metrics = aggregatorSpecs.toArray(new AggregatorFactory[0]); + AggregatorFactory[] metrics = aggregators.toArray(new AggregatorFactory[0]); index = new OnheapIncrementalIndex.Builder() .setIndexSchema( new IncrementalIndexSchema.Builder() @@ -593,16 +500,6 @@ public InputStream open() } } - public Query readQuery(final String queryJson) - { - try { - return mapper.readValue(queryJson, Query.class); - } - catch (IOException e) { - throw new RuntimeException(e); - } - } - public Segment persistIncrementalIndex( IncrementalIndex index, File outDir @@ -618,11 +515,6 @@ public Segment persistIncrementalIndex( //Simulates running group-by query on individual segments as historicals would do, json serialize the results //from each segment, later deserialize and merge and finally return the results - public Sequence runQueryOnSegments(final List segmentDirs, final String queryJson) - { - return runQueryOnSegments(segmentDirs, readQuery(queryJson).withOverriddenContext(queryContext)); - } - public Sequence runQueryOnSegments(final List segmentDirs, final Query query) { final List segments = Lists.transform( @@ -643,7 +535,7 @@ public Segment apply(File segmentDir) ); try { - return runQueryOnSegmentsObjs(segments, query); + return runQueryOnSegmentsObjs(segments, query.withOverriddenContext(queryContext)); } finally { for (Segment segment : segments) { @@ -691,7 +583,7 @@ public QueryRunner apply(final Segment segment) return baseRunner.run(QueryPlus.wrap(GroupByQueryRunnerTestHelper.populateResourceId(query))); } - public QueryRunner makeStringSerdeQueryRunner( + private QueryRunner makeStringSerdeQueryRunner( final ObjectMapper mapper, final QueryToolChest toolChest, final QueryRunner baseRunner diff --git a/processing/src/test/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesAggregationTest.java b/processing/src/test/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesAggregationTest.java index eeb538c0aef7..c5b343247499 100644 --- a/processing/src/test/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesAggregationTest.java +++ b/processing/src/test/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesAggregationTest.java @@ -28,6 +28,8 @@ import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.guava.Sequence; import org.apache.druid.query.aggregation.AggregationTestHelper; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.groupby.GroupByQuery; import org.apache.druid.query.groupby.GroupByQueryConfig; import org.apache.druid.query.groupby.GroupByQueryRunnerTest; import org.apache.druid.query.groupby.ResultRow; @@ -78,25 +80,21 @@ public void testIngestAndQuery() throws Exception ) ) { - String metricSpec = "[{" - + "\"type\": \"hyperUnique\"," - + "\"name\": \"index_hll\"," - + "\"fieldName\": \"market\"" - + "}]"; - - String query = "{" - + "\"queryType\": \"groupBy\"," - + "\"dataSource\": \"test_datasource\"," - + "\"granularity\": \"ALL\"," - + "\"dimensions\": []," - + "\"aggregations\": [" - + " { \"type\": \"hyperUnique\", \"name\": \"index_hll\", \"fieldName\": \"index_hll\" }" - + "]," - + "\"postAggregations\": [" - + " { \"type\": \"hyperUniqueCardinality\", \"name\": \"index_unique_count\", \"fieldName\": \"index_hll\" }" - + "]," - + "\"intervals\": [ \"1970/2050\" ]" - + "}"; + List metricSpec = List.of( + new HyperUniquesAggregatorFactory("index_hll", "market") + ); + + GroupByQuery query = GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval("1970/2050") + .setAggregatorSpecs( + new HyperUniquesAggregatorFactory("index_hll", "index_hll") + ) + .setPostAggregatorSpecs( + new HyperUniqueFinalizingPostAggregator("index_unique_count", "index_hll") + ) + .build(); Sequence seq = helper.createIndexAndRunQueryOnSegment( new File(this.getClass().getClassLoader().getResource("druid.sample.tsv").getFile()), @@ -132,26 +130,21 @@ public void testIngestAndQueryPrecomputedHll() throws Exception ) ) { - String metricSpec = "[{" - + "\"type\": \"hyperUnique\"," - + "\"name\": \"index_hll\"," - + "\"fieldName\": \"preComputedHll\"," - + "\"isInputHyperUnique\": true" - + "}]"; - - String query = "{" - + "\"queryType\": \"groupBy\"," - + "\"dataSource\": \"test_datasource\"," - + "\"granularity\": \"ALL\"," - + "\"dimensions\": []," - + "\"aggregations\": [" - + " { \"type\": \"hyperUnique\", \"name\": \"index_hll\", \"fieldName\": \"index_hll\" }" - + "]," - + "\"postAggregations\": [" - + " { \"type\": \"hyperUniqueCardinality\", \"name\": \"index_unique_count\", \"fieldName\": \"index_hll\" }" - + "]," - + "\"intervals\": [ \"1970/2050\" ]" - + "}"; + List metricSpec = List.of( + new HyperUniquesAggregatorFactory("index_hll", "preComputedHll", true, false) + ); + + GroupByQuery query = GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval("1970/2050") + .setAggregatorSpecs( + new HyperUniquesAggregatorFactory("index_hll", "index_hll") + ) + .setPostAggregatorSpecs( + new HyperUniqueFinalizingPostAggregator("index_unique_count", "index_hll") + ) + .build(); Sequence seq = helper.createIndexAndRunQueryOnSegment( new File(this.getClass().getClassLoader().getResource("druid.hll.sample.tsv").getFile()), diff --git a/processing/src/test/java/org/apache/druid/query/aggregation/post/FinalizingFieldAccessPostAggregatorTest.java b/processing/src/test/java/org/apache/druid/query/aggregation/post/FinalizingFieldAccessPostAggregatorTest.java index c8ad450fe93b..2b0bc7a120c2 100644 --- a/processing/src/test/java/org/apache/druid/query/aggregation/post/FinalizingFieldAccessPostAggregatorTest.java +++ b/processing/src/test/java/org/apache/druid/query/aggregation/post/FinalizingFieldAccessPostAggregatorTest.java @@ -40,6 +40,8 @@ import org.apache.druid.query.aggregation.CountAggregatorFactory; import org.apache.druid.query.aggregation.PostAggregator; import org.apache.druid.query.aggregation.firstlast.first.StringFirstAggregatorFactory; +import org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory; +import org.apache.druid.query.groupby.GroupByQuery; import org.apache.druid.query.groupby.GroupByQueryRunnerTest; import org.apache.druid.query.groupby.ResultRow; import org.apache.druid.query.timeseries.TimeseriesQuery; @@ -213,26 +215,30 @@ public void testIngestAndQueryWithArithmeticPostAggregator() throws Exception ) ) { - String metricSpec = "[{\"type\": \"hyperUnique\", \"name\": \"hll_market\", \"fieldName\": \"market\"}," - + "{\"type\": \"hyperUnique\", \"name\": \"hll_quality\", \"fieldName\": \"quality\"}]"; - - String query = "{" - + "\"queryType\": \"groupBy\"," - + "\"dataSource\": \"test_datasource\"," - + "\"granularity\": \"ALL\"," - + "\"dimensions\": []," - + "\"aggregations\": [" - + " { \"type\": \"hyperUnique\", \"name\": \"hll_market\", \"fieldName\": \"hll_market\" }," - + " { \"type\": \"hyperUnique\", \"name\": \"hll_quality\", \"fieldName\": \"hll_quality\" }" - + "]," - + "\"postAggregations\": [" - + " { \"type\": \"arithmetic\", \"name\": \"uniq_add\", \"fn\": \"+\", \"fields\":[" - + " { \"type\": \"finalizingFieldAccess\", \"name\": \"uniq_market\", \"fieldName\": \"hll_market\" }," - + " { \"type\": \"finalizingFieldAccess\", \"name\": \"uniq_quality\", \"fieldName\": \"hll_quality\" }]" - + " }" - + "]," - + "\"intervals\": [ \"1970/2050\" ]" - + "}"; + List metricSpec = List.of( + new HyperUniquesAggregatorFactory("hll_market", "market"), + new HyperUniquesAggregatorFactory("hll_quality", "quality") + ); + + GroupByQuery query = GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval("1970/2050") + .setAggregatorSpecs( + new HyperUniquesAggregatorFactory("hll_market", "hll_market"), + new HyperUniquesAggregatorFactory("hll_quality", "hll_quality") + ) + .setPostAggregatorSpecs( + new ArithmeticPostAggregator( + "uniq_add", + "+", + List.of( + new FinalizingFieldAccessPostAggregator("uniq_market", "hll_market"), + new FinalizingFieldAccessPostAggregator("uniq_quality", "hll_quality") + ) + ) + ) + .build(); Sequence seq = helper.createIndexAndRunQueryOnSegment( new File(this.getClass().getClassLoader().getResource("druid.sample.tsv").getFile()),