diff --git a/extensions-contrib/rabbit-stream-indexing-service/src/test/java/org/apache/druid/indexing/rabbitstream/RabbitStreamIndexTaskTuningConfigTest.java b/extensions-contrib/rabbit-stream-indexing-service/src/test/java/org/apache/druid/indexing/rabbitstream/RabbitStreamIndexTaskTuningConfigTest.java index 8eee07a747be..19930d1f9d54 100644 --- a/extensions-contrib/rabbit-stream-indexing-service/src/test/java/org/apache/druid/indexing/rabbitstream/RabbitStreamIndexTaskTuningConfigTest.java +++ b/extensions-contrib/rabbit-stream-indexing-service/src/test/java/org/apache/druid/indexing/rabbitstream/RabbitStreamIndexTaskTuningConfigTest.java @@ -163,6 +163,7 @@ public void testtoString() throws Exception "longEncoding=null, " + "complexMetricCompression=null, " + "autoColumnFormatSpec=null, " + + "stringColumnFormatSpec=null, " + "jsonCompression=null, " + "segmentLoader=null" + "}, " + diff --git a/processing/src/main/java/org/apache/druid/data/input/impl/StringDimensionSchema.java b/processing/src/main/java/org/apache/druid/data/input/impl/StringDimensionSchema.java index 018d9ca5c35e..35dbad9a2d11 100644 --- a/processing/src/main/java/org/apache/druid/data/input/impl/StringDimensionSchema.java +++ b/processing/src/main/java/org/apache/druid/data/input/impl/StringDimensionSchema.java @@ -23,37 +23,21 @@ import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.annotation.JsonProperty; -import org.apache.druid.error.DruidException; -import org.apache.druid.guice.BuiltInTypesModule; import org.apache.druid.segment.DimensionHandler; +import org.apache.druid.segment.IndexSpec; +import org.apache.druid.segment.StringColumnFormatSpec; import org.apache.druid.segment.StringDimensionHandler; import org.apache.druid.segment.column.ColumnType; import javax.annotation.Nullable; +import java.util.Objects; public class StringDimensionSchema extends DimensionSchema { private static final boolean DEFAULT_CREATE_BITMAP_INDEX = true; @Nullable - public static Integer getDefaultMaxStringLength() - { - return BuiltInTypesModule.getMaxStringLength(); - } - - @Nullable - private static Integer validateMaxStringLength(String name, @Nullable Integer maxStringLength) - { - if (maxStringLength != null && maxStringLength < 0) { - throw DruidException.forPersona(DruidException.Persona.USER) - .ofCategory(DruidException.Category.INVALID_INPUT) - .build("maxStringLength for column [%s] must be >= 0, got [%s]", name, maxStringLength); - } - return maxStringLength != null ? maxStringLength : getDefaultMaxStringLength(); - } - - @Nullable - private final Integer maxStringLength; + private final StringColumnFormatSpec columnFormatSpec; @JsonCreator public static StringDimensionSchema create(String name) @@ -66,11 +50,11 @@ public StringDimensionSchema( @JsonProperty("name") String name, @JsonProperty("multiValueHandling") MultiValueHandling multiValueHandling, @JsonProperty("createBitmapIndex") Boolean createBitmapIndex, - @JsonProperty("maxStringLength") @Nullable Integer maxStringLength + @JsonProperty("columnFormatSpec") @Nullable StringColumnFormatSpec columnFormatSpec ) { super(name, multiValueHandling, createBitmapIndex == null ? DEFAULT_CREATE_BITMAP_INDEX : createBitmapIndex); - this.maxStringLength = validateMaxStringLength(name, maxStringLength); + this.columnFormatSpec = columnFormatSpec; } public StringDimensionSchema( @@ -87,12 +71,29 @@ public StringDimensionSchema(String name) this(name, null, DEFAULT_CREATE_BITMAP_INDEX, null); } + @Nullable @JsonProperty @JsonInclude(JsonInclude.Include.NON_NULL) - @Nullable - public Integer getMaxStringLength() + public StringColumnFormatSpec getColumnFormatSpec() { - return maxStringLength; + return columnFormatSpec; + } + + @Override + public DimensionSchema getEffectiveSchema(IndexSpec indexSpec) + { + // If there's no per-column or job-level string format config, nothing to resolve + if (columnFormatSpec == null && indexSpec.getStringColumnFormatSpec() == null) { + return this; + } + StringColumnFormatSpec effective = + StringColumnFormatSpec.getEffectiveFormatSpec(columnFormatSpec, indexSpec); + return new StringDimensionSchema( + getName(), + getMultiValueHandling(), + hasBitmapIndex(), + effective + ); } @Override @@ -117,6 +118,29 @@ public boolean canBeMultiValued() @Override public DimensionHandler getDimensionHandler() { + Integer maxStringLength = columnFormatSpec != null ? columnFormatSpec.getMaxStringLength() : null; return new StringDimensionHandler(getName(), getMultiValueHandling(), hasBitmapIndex(), false, maxStringLength); } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + if (!super.equals(o)) { + return false; + } + StringDimensionSchema that = (StringDimensionSchema) o; + return Objects.equals(columnFormatSpec, that.columnFormatSpec); + } + + @Override + public int hashCode() + { + return Objects.hash(super.hashCode(), columnFormatSpec); + } } diff --git a/processing/src/main/java/org/apache/druid/segment/IndexSpec.java b/processing/src/main/java/org/apache/druid/segment/IndexSpec.java index c6f2a59e0ec4..470ba256deb3 100644 --- a/processing/src/main/java/org/apache/druid/segment/IndexSpec.java +++ b/processing/src/main/java/org/apache/druid/segment/IndexSpec.java @@ -75,6 +75,8 @@ public static Builder builder() @Nullable private final NestedCommonFormatColumnFormatSpec autoColumnFormatSpec; @Nullable + private final StringColumnFormatSpec stringColumnFormatSpec; + @Nullable private final CompressionStrategy metadataCompression; /** @@ -109,6 +111,8 @@ public static Builder builder() * used to load the written segment * @param autoColumnFormatSpec specify the default {@link NestedCommonFormatColumnFormatSpec} to use for json and * auto columns. Defaults to null upon calling {@link #getEffectiveSpec()}. + * @param stringColumnFormatSpec specify the default {@link StringColumnFormatSpec} to use for string columns. + * Defaults to null upon calling {@link #getEffectiveSpec()}. */ @JsonCreator public IndexSpec( @@ -121,7 +125,8 @@ public IndexSpec( @JsonProperty("complexMetricCompression") @Nullable CompressionStrategy complexMetricCompression, @Deprecated @JsonProperty("jsonCompression") @Nullable CompressionStrategy jsonCompression, @JsonProperty("segmentLoader") @Nullable SegmentizerFactory segmentLoader, - @JsonProperty("autoColumnFormatSpec") @Nullable NestedCommonFormatColumnFormatSpec autoColumnFormatSpec + @JsonProperty("autoColumnFormatSpec") @Nullable NestedCommonFormatColumnFormatSpec autoColumnFormatSpec, + @JsonProperty("stringColumnFormatSpec") @Nullable StringColumnFormatSpec stringColumnFormatSpec ) { this.bitmapSerdeFactory = bitmapSerdeFactory; @@ -134,6 +139,7 @@ public IndexSpec( this.jsonCompression = jsonCompression; this.segmentLoader = segmentLoader; this.autoColumnFormatSpec = autoColumnFormatSpec; + this.stringColumnFormatSpec = stringColumnFormatSpec; } @JsonProperty("bitmap") @@ -212,6 +218,14 @@ public NestedCommonFormatColumnFormatSpec getAutoColumnFormatSpec() return autoColumnFormatSpec; } + @JsonProperty + @JsonInclude(JsonInclude.Include.NON_NULL) + @Nullable + public StringColumnFormatSpec getStringColumnFormatSpec() + { + return stringColumnFormatSpec; + } + /** * Populate all null fields of {@link IndexSpec}, first from {@link #getDefault()} and finally falling back to hard * coded defaults if no overrides are defined. @@ -298,6 +312,16 @@ public IndexSpec getEffectiveSpec() ); } + if (stringColumnFormatSpec != null) { + bob.withStringColumnFormatSpec( + StringColumnFormatSpec.getEffectiveFormatSpec(stringColumnFormatSpec, this) + ); + } else if (defaultSpec.stringColumnFormatSpec != null) { + bob.withStringColumnFormatSpec( + StringColumnFormatSpec.getEffectiveFormatSpec(defaultSpec.stringColumnFormatSpec, this) + ); + } + return bob.build(); } @@ -320,7 +344,8 @@ public boolean equals(Object o) Objects.equals(complexMetricCompression, indexSpec.complexMetricCompression) && Objects.equals(jsonCompression, indexSpec.jsonCompression) && Objects.equals(segmentLoader, indexSpec.segmentLoader) && - Objects.equals(autoColumnFormatSpec, indexSpec.autoColumnFormatSpec); + Objects.equals(autoColumnFormatSpec, indexSpec.autoColumnFormatSpec) && + Objects.equals(stringColumnFormatSpec, indexSpec.stringColumnFormatSpec); } @Override @@ -336,7 +361,8 @@ public int hashCode() complexMetricCompression, jsonCompression, segmentLoader, - autoColumnFormatSpec + autoColumnFormatSpec, + stringColumnFormatSpec ); } @@ -352,6 +378,7 @@ public String toString() ", longEncoding=" + longEncoding + ", complexMetricCompression=" + complexMetricCompression + ", autoColumnFormatSpec=" + autoColumnFormatSpec + + ", stringColumnFormatSpec=" + stringColumnFormatSpec + ", jsonCompression=" + jsonCompression + ", segmentLoader=" + segmentLoader + '}'; @@ -379,6 +406,8 @@ public static class Builder private SegmentizerFactory segmentLoader; @Nullable private NestedCommonFormatColumnFormatSpec autoColumnFormatSpec; + @Nullable + private StringColumnFormatSpec stringColumnFormatSpec; public Builder withBitmapSerdeFactory(@Nullable BitmapSerdeFactory bitmapSerdeFactory) { @@ -441,6 +470,12 @@ public Builder withAutoColumnFormatSpec(@Nullable NestedCommonFormatColumnFormat return this; } + public Builder withStringColumnFormatSpec(@Nullable StringColumnFormatSpec stringColumnFormatSpec) + { + this.stringColumnFormatSpec = stringColumnFormatSpec; + return this; + } + public IndexSpec build() { return new IndexSpec( @@ -453,7 +488,8 @@ public IndexSpec build() complexMetricCompression, jsonCompression, segmentLoader, - autoColumnFormatSpec + autoColumnFormatSpec, + stringColumnFormatSpec ); } } diff --git a/processing/src/main/java/org/apache/druid/segment/StringColumnFormatSpec.java b/processing/src/main/java/org/apache/druid/segment/StringColumnFormatSpec.java new file mode 100644 index 000000000000..6f872ca49836 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/StringColumnFormatSpec.java @@ -0,0 +1,212 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; +import org.apache.druid.data.input.impl.DimensionSchema; +import org.apache.druid.error.DruidException; +import org.apache.druid.segment.column.StringBitmapIndexType; + +import javax.annotation.Nullable; +import java.util.Objects; + +public class StringColumnFormatSpec +{ + private static final StringColumnFormatSpec DEFAULT = + builder() + .setIndexType(StringBitmapIndexType.DictionaryEncodedValueIndex.INSTANCE) + .setMultiValueHandling(DimensionSchema.MultiValueHandling.SORTED_ARRAY) + .build(); + + public static Builder builder() + { + return new Builder(); + } + + public static Builder builder(StringColumnFormatSpec spec) + { + return new Builder(spec); + } + + public static StringColumnFormatSpec getEffectiveFormatSpec( + @Nullable StringColumnFormatSpec columnFormatSpec, + IndexSpec indexSpec + ) + { + final Builder builder = columnFormatSpec == null ? builder() : builder(columnFormatSpec); + + final StringColumnFormatSpec defaultSpec; + if (indexSpec.getStringColumnFormatSpec() != null) { + defaultSpec = indexSpec.getStringColumnFormatSpec(); + } else { + defaultSpec = DEFAULT; + } + + if (builder.indexType == null) { + if (defaultSpec.getIndexType() != null) { + builder.setIndexType(defaultSpec.getIndexType()); + } else { + builder.setIndexType(DEFAULT.getIndexType()); + } + } + + if (builder.multiValueHandling == null) { + if (defaultSpec.getMultiValueHandling() != null) { + builder.setMultiValueHandling(defaultSpec.getMultiValueHandling()); + } else { + builder.setMultiValueHandling(DEFAULT.getMultiValueHandling()); + } + } + + if (builder.maxStringLength == null) { + // No DEFAULT fallback needed: null means "no truncation" + builder.setMaxStringLength(defaultSpec.getMaxStringLength()); + } + + return builder.build(); + } + + @Nullable + private final StringBitmapIndexType indexType; + + @Nullable + private final DimensionSchema.MultiValueHandling multiValueHandling; + + @Nullable + private final Integer maxStringLength; + + @JsonCreator + public StringColumnFormatSpec( + @JsonProperty("indexType") @Nullable StringBitmapIndexType indexType, + @JsonProperty("multiValueHandling") @Nullable DimensionSchema.MultiValueHandling multiValueHandling, + @JsonProperty("maxStringLength") @Nullable Integer maxStringLength + ) + { + if (maxStringLength != null && maxStringLength < 0) { + throw DruidException.forPersona(DruidException.Persona.USER) + .ofCategory(DruidException.Category.INVALID_INPUT) + .build("maxStringLength must be >= 0, got [%s]", maxStringLength); + } + this.indexType = indexType; + this.multiValueHandling = multiValueHandling; + this.maxStringLength = maxStringLength; + } + + @Nullable + @JsonProperty + @JsonInclude(JsonInclude.Include.NON_NULL) + public StringBitmapIndexType getIndexType() + { + return indexType; + } + + @Nullable + @JsonProperty + @JsonInclude(JsonInclude.Include.NON_NULL) + public DimensionSchema.MultiValueHandling getMultiValueHandling() + { + return multiValueHandling; + } + + @Nullable + @JsonProperty + @JsonInclude(JsonInclude.Include.NON_NULL) + public Integer getMaxStringLength() + { + return maxStringLength; + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + StringColumnFormatSpec that = (StringColumnFormatSpec) o; + return Objects.equals(indexType, that.indexType) + && multiValueHandling == that.multiValueHandling + && Objects.equals(maxStringLength, that.maxStringLength); + } + + @Override + public int hashCode() + { + return Objects.hash(indexType, multiValueHandling, maxStringLength); + } + + @Override + public String toString() + { + return "StringColumnFormatSpec{" + + "indexType=" + indexType + + ", multiValueHandling=" + multiValueHandling + + ", maxStringLength=" + maxStringLength + + '}'; + } + + public static class Builder + { + @Nullable + private StringBitmapIndexType indexType; + @Nullable + private DimensionSchema.MultiValueHandling multiValueHandling; + @Nullable + private Integer maxStringLength; + + public Builder() + { + } + + public Builder(StringColumnFormatSpec spec) + { + this.indexType = spec.indexType; + this.multiValueHandling = spec.multiValueHandling; + this.maxStringLength = spec.maxStringLength; + } + + public Builder setIndexType(@Nullable StringBitmapIndexType indexType) + { + this.indexType = indexType; + return this; + } + + public Builder setMultiValueHandling(@Nullable DimensionSchema.MultiValueHandling multiValueHandling) + { + this.multiValueHandling = multiValueHandling; + return this; + } + + public Builder setMaxStringLength(@Nullable Integer maxStringLength) + { + this.maxStringLength = maxStringLength; + return this; + } + + public StringColumnFormatSpec build() + { + return new StringColumnFormatSpec(indexType, multiValueHandling, maxStringLength); + } + } +} diff --git a/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java b/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java index 0d23fe24aa77..af5fa34f2107 100644 --- a/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java +++ b/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java @@ -115,7 +115,7 @@ public StringDimensionHandler( boolean hasSpatialIndexes ) { - this(dimensionName, multiValueHandling, hasBitmapIndexes, hasSpatialIndexes, StringDimensionSchema.getDefaultMaxStringLength()); + this(dimensionName, multiValueHandling, hasBitmapIndexes, hasSpatialIndexes, null); } public StringDimensionHandler( diff --git a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java index 88f60ee8042d..4856a532a7a8 100644 --- a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java @@ -24,7 +24,6 @@ import org.apache.druid.collections.bitmap.BitmapFactory; import org.apache.druid.collections.bitmap.MutableBitmap; import org.apache.druid.data.input.impl.DimensionSchema.MultiValueHandling; -import org.apache.druid.data.input.impl.StringDimensionSchema; import org.apache.druid.error.DruidException; import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.StringUtils; @@ -68,7 +67,7 @@ public StringDimensionIndexer( boolean hasSpatialIndexes ) { - this(multiValueHandling, hasBitmapIndexes, hasSpatialIndexes, StringDimensionSchema.getDefaultMaxStringLength()); + this(multiValueHandling, hasBitmapIndexes, hasSpatialIndexes, null); } public StringDimensionIndexer( diff --git a/processing/src/main/java/org/apache/druid/segment/column/StringBitmapIndexType.java b/processing/src/main/java/org/apache/druid/segment/column/StringBitmapIndexType.java new file mode 100644 index 000000000000..463b174edc89 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/column/StringBitmapIndexType.java @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.column; + +import com.fasterxml.jackson.annotation.JsonSubTypes; +import com.fasterxml.jackson.annotation.JsonTypeInfo; + +import java.util.Objects; + +@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type") +@JsonSubTypes({ + @JsonSubTypes.Type(value = StringBitmapIndexType.DictionaryEncodedValueIndex.class, name = StringBitmapIndexType.TYPE_DICTIONARY), + @JsonSubTypes.Type(value = StringBitmapIndexType.NoIndex.class, name = StringBitmapIndexType.TYPE_NONE) +}) +public abstract class StringBitmapIndexType +{ + protected static final String TYPE_DICTIONARY = "dictionaryEncodedValueIndex"; + protected static final String TYPE_NONE = "none"; + + public abstract boolean hasBitmapIndex(); + + public static class DictionaryEncodedValueIndex extends StringBitmapIndexType + { + public static final DictionaryEncodedValueIndex INSTANCE = new DictionaryEncodedValueIndex(); + + @Override + public boolean hasBitmapIndex() + { + return true; + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + return o != null && getClass() == o.getClass(); + } + + @Override + public int hashCode() + { + return Objects.hashCode(getClass()); + } + + @Override + public String toString() + { + return TYPE_DICTIONARY; + } + } + + public static class NoIndex extends StringBitmapIndexType + { + public static final NoIndex INSTANCE = new NoIndex(); + + @Override + public boolean hasBitmapIndex() + { + return false; + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + return o != null && getClass() == o.getClass(); + } + + @Override + public int hashCode() + { + return Objects.hashCode(getClass()); + } + + @Override + public String toString() + { + return TYPE_NONE; + } + } +} diff --git a/processing/src/test/java/org/apache/druid/data/input/impl/StringDimensionSchemaTest.java b/processing/src/test/java/org/apache/druid/data/input/impl/StringDimensionSchemaTest.java index dbee07bddb81..11178080964e 100644 --- a/processing/src/test/java/org/apache/druid/data/input/impl/StringDimensionSchemaTest.java +++ b/processing/src/test/java/org/apache/druid/data/input/impl/StringDimensionSchemaTest.java @@ -23,9 +23,11 @@ import com.fasterxml.jackson.databind.AnnotationIntrospector; import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.druid.data.input.impl.DimensionSchema.MultiValueHandling; -import org.apache.druid.error.DruidException; import org.apache.druid.guice.DruidSecondaryModule; import org.apache.druid.guice.GuiceAnnotationIntrospector; +import org.apache.druid.segment.IndexSpec; +import org.apache.druid.segment.StringColumnFormatSpec; +import org.apache.druid.segment.column.StringBitmapIndexType; import org.junit.Assert; import org.junit.Test; @@ -55,21 +57,121 @@ public void testDeserializeFromJson() throws JsonProcessingException final String json = "{\n" + " \"name\" : \"dim\",\n" + " \"multiValueHandling\" : \"SORTED_SET\",\n" - + " \"createBitmapIndex\" : false,\n" - + " \"maxStringLength\" : 200\n" + + " \"createBitmapIndex\" : false\n" + "}"; final StringDimensionSchema schema = (StringDimensionSchema) jsonMapper.readValue(json, DimensionSchema.class); Assert.assertEquals(new StringDimensionSchema("dim", MultiValueHandling.SORTED_SET, false), schema); - Assert.assertEquals(Integer.valueOf(200), schema.getMaxStringLength()); } @Test - public void testInvalidMaxStringLength() + public void testDeserializeFromJsonWithColumnFormatSpec() throws JsonProcessingException + { + final String json = "{\n" + + " \"name\" : \"dim\",\n" + + " \"multiValueHandling\" : \"SORTED_SET\",\n" + + " \"createBitmapIndex\" : false,\n" + + " \"columnFormatSpec\" : { \"maxStringLength\" : 200 }\n" + + "}"; + final StringDimensionSchema schema = (StringDimensionSchema) jsonMapper.readValue(json, DimensionSchema.class); + final StringColumnFormatSpec expectedSpec = StringColumnFormatSpec.builder() + .setMaxStringLength(200) + .build(); + Assert.assertEquals( + new StringDimensionSchema("dim", MultiValueHandling.SORTED_SET, false, expectedSpec), + schema + ); + Assert.assertEquals(Integer.valueOf(200), schema.getColumnFormatSpec().getMaxStringLength()); + } + + @Test + public void testGetEffectiveSchemaResolvesMaxStringLengthFromIndexSpec() + { + final StringDimensionSchema schema = new StringDimensionSchema("dim"); + final IndexSpec indexSpec = IndexSpec.builder() + .withStringColumnFormatSpec( + StringColumnFormatSpec.builder().setMaxStringLength(50).build() + ) + .build(); + + final StringDimensionSchema effective = (StringDimensionSchema) schema.getEffectiveSchema(indexSpec); + + Assert.assertEquals(Integer.valueOf(50), effective.getColumnFormatSpec().getMaxStringLength()); + Assert.assertEquals("dim", effective.getName()); + } + + @Test + public void testGetEffectiveSchemaPreservesPerColumnMaxStringLength() + { + final StringColumnFormatSpec columnSpec = StringColumnFormatSpec.builder() + .setMaxStringLength(20) + .build(); + final StringDimensionSchema schema = new StringDimensionSchema("dim", null, true, columnSpec); + final IndexSpec indexSpec = IndexSpec.builder() + .withStringColumnFormatSpec( + StringColumnFormatSpec.builder().setMaxStringLength(50).build() + ) + .build(); + + final StringDimensionSchema effective = (StringDimensionSchema) schema.getEffectiveSchema(indexSpec); + + // Per-column maxStringLength=20 should not be overridden by job level 50 + Assert.assertEquals(Integer.valueOf(20), effective.getColumnFormatSpec().getMaxStringLength()); + } + + @Test + public void testGetEffectiveSchemaPreservesCreateBitmapIndex() + { + final StringDimensionSchema schema = new StringDimensionSchema("dim", null, false); + final IndexSpec indexSpec = IndexSpec.builder().build(); + + final StringDimensionSchema effective = (StringDimensionSchema) schema.getEffectiveSchema(indexSpec); + + Assert.assertFalse(effective.hasBitmapIndex()); + } + + @Test + public void testGetEffectiveSchemaPreservesMultiValueHandling() + { + final StringDimensionSchema schema = new StringDimensionSchema("dim", MultiValueHandling.ARRAY, true); + final IndexSpec indexSpec = IndexSpec.builder().build(); + + final StringDimensionSchema effective = (StringDimensionSchema) schema.getEffectiveSchema(indexSpec); + + // multiValueHandling=ARRAY must not be overridden by the DEFAULT (SORTED_ARRAY) + Assert.assertEquals(MultiValueHandling.ARRAY, effective.getMultiValueHandling()); + } + + @Test + public void testGetEffectiveSchemaNoChangeWithoutStringColumnFormatSpec() { - final Exception exception = Assert.assertThrows( - DruidException.class, - () -> new StringDimensionSchema("dim", null, true, -1) + final StringDimensionSchema schema = new StringDimensionSchema("dim"); + final IndexSpec indexSpec = IndexSpec.builder().build(); + + final StringDimensionSchema effective = (StringDimensionSchema) schema.getEffectiveSchema(indexSpec); + + // With no stringColumnFormatSpec, should return same object + Assert.assertSame(schema, effective); + Assert.assertEquals(schema.hasBitmapIndex(), effective.hasBitmapIndex()); + Assert.assertEquals(schema.getMultiValueHandling(), effective.getMultiValueHandling()); + } + + @Test + public void testGetEffectiveSchemaResolvesIndexTypeFromIndexSpec() + { + final StringDimensionSchema schema = new StringDimensionSchema("dim"); + final IndexSpec indexSpec = IndexSpec.builder() + .withStringColumnFormatSpec( + StringColumnFormatSpec.builder() + .setIndexType(StringBitmapIndexType.NoIndex.INSTANCE) + .build() + ) + .build(); + + final StringDimensionSchema effective = (StringDimensionSchema) schema.getEffectiveSchema(indexSpec); + + Assert.assertEquals( + StringBitmapIndexType.NoIndex.INSTANCE, + effective.getColumnFormatSpec().getIndexType() ); - Assert.assertTrue(exception.getMessage().contains("maxStringLength for column [dim] must be >= 0")); } } diff --git a/processing/src/test/java/org/apache/druid/segment/IndexSpecTest.java b/processing/src/test/java/org/apache/druid/segment/IndexSpecTest.java index 663f1b80a245..973aedb938e7 100644 --- a/processing/src/test/java/org/apache/druid/segment/IndexSpecTest.java +++ b/processing/src/test/java/org/apache/druid/segment/IndexSpecTest.java @@ -22,6 +22,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import nl.jqno.equalsverifier.EqualsVerifier; import org.apache.druid.jackson.DefaultObjectMapper; +import org.apache.druid.segment.column.StringBitmapIndexType; import org.apache.druid.segment.data.CompressionFactory; import org.apache.druid.segment.data.CompressionFactory.LongEncodingStrategy; import org.apache.druid.segment.data.CompressionStrategy; @@ -69,11 +70,29 @@ public void testDefaults() Assert.assertEquals(LongEncodingStrategy.LONGS, spec.getLongEncoding()); } + @Test + public void testSerdeWithStringColumnFormatSpec() throws Exception + { + final ObjectMapper objectMapper = new DefaultObjectMapper(); + final String json = "{ \"stringColumnFormatSpec\" : { \"maxStringLength\" : 50 } }"; + + final IndexSpec spec = objectMapper.readValue(json, IndexSpec.class); + Assert.assertNotNull(spec.getStringColumnFormatSpec()); + Assert.assertEquals(Integer.valueOf(50), spec.getStringColumnFormatSpec().getMaxStringLength()); + + Assert.assertEquals(spec, objectMapper.readValue(objectMapper.writeValueAsBytes(spec), IndexSpec.class)); + } + @Test public void testEquals() { EqualsVerifier.forClass(IndexSpec.class) .usingGetClass() + .withPrefabValues( + StringBitmapIndexType.class, + StringBitmapIndexType.DictionaryEncodedValueIndex.INSTANCE, + StringBitmapIndexType.NoIndex.INSTANCE + ) .verify(); } } diff --git a/processing/src/test/java/org/apache/druid/segment/StringColumnFormatSpecTest.java b/processing/src/test/java/org/apache/druid/segment/StringColumnFormatSpecTest.java new file mode 100644 index 000000000000..f2d7f80f9f48 --- /dev/null +++ b/processing/src/test/java/org/apache/druid/segment/StringColumnFormatSpecTest.java @@ -0,0 +1,155 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment; + +import com.fasterxml.jackson.core.JsonProcessingException; +import nl.jqno.equalsverifier.EqualsVerifier; +import org.apache.druid.data.input.impl.DimensionSchema.MultiValueHandling; +import org.apache.druid.segment.column.StringBitmapIndexType; +import org.junit.Assert; +import org.junit.Test; + +public class StringColumnFormatSpecTest +{ + @Test + public void testSerde() throws JsonProcessingException + { + StringColumnFormatSpec spec = StringColumnFormatSpec.builder() + .setIndexType(StringBitmapIndexType.NoIndex.INSTANCE) + .setMultiValueHandling(MultiValueHandling.SORTED_SET) + .setMaxStringLength(100) + .build(); + + StringColumnFormatSpec roundTripped = TestHelper.JSON_MAPPER.readValue( + TestHelper.JSON_MAPPER.writeValueAsString(spec), + StringColumnFormatSpec.class + ); + Assert.assertEquals(spec, roundTripped); + } + + @Test + public void testSerdeNullFields() throws JsonProcessingException + { + StringColumnFormatSpec spec = StringColumnFormatSpec.builder().build(); + + String json = TestHelper.JSON_MAPPER.writeValueAsString(spec); + Assert.assertEquals("{}", json); + + StringColumnFormatSpec roundTripped = TestHelper.JSON_MAPPER.readValue(json, StringColumnFormatSpec.class); + Assert.assertEquals(spec, roundTripped); + } + + @Test + public void testGetEffectiveFormatSpecDefaults() + { + StringColumnFormatSpec effective = StringColumnFormatSpec.getEffectiveFormatSpec( + null, + IndexSpec.builder().build() + ); + + Assert.assertEquals(StringBitmapIndexType.DictionaryEncodedValueIndex.INSTANCE, effective.getIndexType()); + Assert.assertEquals(MultiValueHandling.SORTED_ARRAY, effective.getMultiValueHandling()); + Assert.assertNull(effective.getMaxStringLength()); + } + + @Test + public void testGetEffectiveFormatSpecJobLevelOverride() + { + IndexSpec indexSpec = IndexSpec.builder() + .withStringColumnFormatSpec( + StringColumnFormatSpec.builder() + .setMaxStringLength(50) + .build() + ) + .build(); + + StringColumnFormatSpec effective = StringColumnFormatSpec.getEffectiveFormatSpec(null, indexSpec); + + Assert.assertEquals(StringBitmapIndexType.DictionaryEncodedValueIndex.INSTANCE, effective.getIndexType()); + Assert.assertEquals(MultiValueHandling.SORTED_ARRAY, effective.getMultiValueHandling()); + Assert.assertEquals(Integer.valueOf(50), effective.getMaxStringLength()); + } + + @Test + public void testGetEffectiveFormatSpecColumnOverridesJobLevel() + { + StringColumnFormatSpec columnSpec = StringColumnFormatSpec.builder() + .setMaxStringLength(20) + .build(); + + IndexSpec indexSpec = IndexSpec.builder() + .withStringColumnFormatSpec( + StringColumnFormatSpec.builder() + .setMaxStringLength(50) + .build() + ) + .build(); + + StringColumnFormatSpec effective = StringColumnFormatSpec.getEffectiveFormatSpec(columnSpec, indexSpec); + + Assert.assertEquals(Integer.valueOf(20), effective.getMaxStringLength()); + } + + @Test + public void testGetEffectiveFormatSpecColumnFallsBackToJobLevel() + { + StringColumnFormatSpec columnSpec = StringColumnFormatSpec.builder() + .setIndexType(StringBitmapIndexType.NoIndex.INSTANCE) + .build(); + + IndexSpec indexSpec = IndexSpec.builder() + .withStringColumnFormatSpec( + StringColumnFormatSpec.builder() + .setMaxStringLength(50) + .setMultiValueHandling(MultiValueHandling.ARRAY) + .build() + ) + .build(); + + StringColumnFormatSpec effective = StringColumnFormatSpec.getEffectiveFormatSpec(columnSpec, indexSpec); + + Assert.assertEquals(StringBitmapIndexType.NoIndex.INSTANCE, effective.getIndexType()); + Assert.assertEquals(MultiValueHandling.ARRAY, effective.getMultiValueHandling()); + Assert.assertEquals(Integer.valueOf(50), effective.getMaxStringLength()); + } + + @Test + public void testInvalidMaxStringLength() + { + final Exception exception = Assert.assertThrows( + Exception.class, + () -> StringColumnFormatSpec.builder().setMaxStringLength(-1).build() + ); + Assert.assertTrue(exception.getMessage().contains("maxStringLength must be >= 0")); + } + + @Test + public void testEqualsAndHashCode() + { + EqualsVerifier.forClass(StringColumnFormatSpec.class) + .usingGetClass() + .withPrefabValues( + StringBitmapIndexType.class, + StringBitmapIndexType.DictionaryEncodedValueIndex.INSTANCE, + StringBitmapIndexType.NoIndex.INSTANCE + ) + .verify(); + } +} diff --git a/server/src/main/java/org/apache/druid/segment/realtime/appenderator/BatchAppenderator.java b/server/src/main/java/org/apache/druid/segment/realtime/appenderator/BatchAppenderator.java index fb3688f5fa18..453b7ade29e2 100644 --- a/server/src/main/java/org/apache/druid/segment/realtime/appenderator/BatchAppenderator.java +++ b/server/src/main/java/org/apache/druid/segment/realtime/appenderator/BatchAppenderator.java @@ -81,6 +81,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.Collections; import java.util.HashMap; import java.util.Iterator; import java.util.List; @@ -466,7 +467,9 @@ private Sink getOrCreateSink(final SegmentIdWithShardSpec identifier) identifier.getVersion(), tuningConfig.getAppendableIndexSpec(), tuningConfig.getMaxRowsInMemory(), - maxBytesTuningConfig + maxBytesTuningConfig, + tuningConfig.getIndexSpec(), + Collections.emptyList() ); bytesCurrentlyInMemory += calculateSinkMemoryInUsed(); sinks.put(identifier, retVal); @@ -1068,6 +1071,7 @@ private Sink getSinkForIdentifierPath(SegmentIdWithShardSpec identifier, File id tuningConfig.getAppendableIndexSpec(), tuningConfig.getMaxRowsInMemory(), maxBytesTuningConfig, + tuningConfig.getIndexSpec(), hydrants ); retVal.finishWriting(); // this sink is not writable diff --git a/server/src/main/java/org/apache/druid/segment/realtime/appenderator/StreamAppenderator.java b/server/src/main/java/org/apache/druid/segment/realtime/appenderator/StreamAppenderator.java index 2835a7752c75..650e8eec25ab 100644 --- a/server/src/main/java/org/apache/druid/segment/realtime/appenderator/StreamAppenderator.java +++ b/server/src/main/java/org/apache/druid/segment/realtime/appenderator/StreamAppenderator.java @@ -516,7 +516,9 @@ private Sink getOrCreateSink(final SegmentIdWithShardSpec identifier) identifier.getVersion(), tuningConfig.getAppendableIndexSpec(), tuningConfig.getMaxRowsInMemory(), - maxBytesTuningConfig + maxBytesTuningConfig, + tuningConfig.getIndexSpec(), + Collections.emptyList() ); bytesCurrentlyInMemory.addAndGet(calculateSinkMemoryInUsed(retVal)); @@ -1412,6 +1414,7 @@ private Object bootstrapSinksFromDisk() tuningConfig.getAppendableIndexSpec(), tuningConfig.getMaxRowsInMemory(), maxBytesTuningConfig, + tuningConfig.getIndexSpec(), hydrants ); rowsSoFar += currSink.getNumRows(); diff --git a/server/src/main/java/org/apache/druid/segment/realtime/sink/Sink.java b/server/src/main/java/org/apache/druid/segment/realtime/sink/Sink.java index 87b2ab9cc52b..422a913d1b9e 100644 --- a/server/src/main/java/org/apache/druid/segment/realtime/sink/Sink.java +++ b/server/src/main/java/org/apache/druid/segment/realtime/sink/Sink.java @@ -26,12 +26,15 @@ import com.google.common.collect.Lists; import com.google.errorprone.annotations.concurrent.GuardedBy; import org.apache.druid.data.input.InputRow; +import org.apache.druid.data.input.impl.DimensionSchema; +import org.apache.druid.data.input.impl.DimensionsSpec; import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.query.Query; import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.segment.CursorFactory; +import org.apache.druid.segment.IndexSpec; import org.apache.druid.segment.QueryableIndex; import org.apache.druid.segment.Segment; import org.apache.druid.segment.SegmentMapFunction; @@ -64,6 +67,7 @@ import java.util.Optional; import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Collectors; public class Sink implements Iterable, Overshadowable { @@ -81,6 +85,7 @@ public class Sink implements Iterable, Overshadowable private final AppendableIndexSpec appendableIndexSpec; private final int maxRowsInMemory; private final long maxBytesInMemory; + private final IndexSpec indexSpec; private final CopyOnWriteArrayList hydrants = new CopyOnWriteArrayList<>(); private final LinkedHashSet dimOrder = new LinkedHashSet<>(); @@ -115,6 +120,7 @@ public Sink( appendableIndexSpec, maxRowsInMemory, maxBytesInMemory, + null, Collections.emptyList() ); } @@ -127,6 +133,7 @@ public Sink( AppendableIndexSpec appendableIndexSpec, int maxRowsInMemory, long maxBytesInMemory, + @Nullable IndexSpec indexSpec, List hydrants ) { @@ -137,6 +144,7 @@ public Sink( this.appendableIndexSpec = appendableIndexSpec; this.maxRowsInMemory = maxRowsInMemory; this.maxBytesInMemory = maxBytesInMemory; + this.indexSpec = (indexSpec != null ? indexSpec : IndexSpec.getDefault()).getEffectiveSpec(); int maxCount = -1; for (int i = 0; i < hydrants.size(); ++i) { @@ -306,11 +314,14 @@ public List acquireSegmentReferences( private FireHydrant makeNewCurrIndex(long minTimestamp, DataSchema schema) { + // Resolve effective dimension schemas up front because column indexers need the fully resolved + // StringColumnFormatSpec at construction time. + final DimensionsSpec dimensionsSpec = resolveEffectiveDimensionsSpec(schema.getDimensionsSpec()); final IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder() .withMinTimestamp(minTimestamp) .withTimestampSpec(schema.getTimestampSpec()) .withQueryGranularity(schema.getGranularitySpec().getQueryGranularity()) - .withDimensionsSpec(schema.getDimensionsSpec()) + .withDimensionsSpec(dimensionsSpec) .withMetrics(schema.getAggregators()) .withRollup(schema.getGranularitySpec().isRollup()) .withProjections(schema.getProjections()) @@ -386,6 +397,17 @@ private FireHydrant makeNewCurrIndex(long minTimestamp, DataSchema schema) return old; } + private DimensionsSpec resolveEffectiveDimensionsSpec(DimensionsSpec dimensionsSpec) + { + final List effectiveDimensions = dimensionsSpec.getDimensions() + .stream() + .map(dim -> dim.getEffectiveSchema(indexSpec)) + .collect(Collectors.toList()); + return DimensionsSpec.builder(dimensionsSpec) + .setDimensions(effectiveDimensions) + .build(); + } + /** * Merge the column from the index with the existing columns. */ diff --git a/server/src/test/java/org/apache/druid/server/compaction/CompactionStatusTest.java b/server/src/test/java/org/apache/druid/server/compaction/CompactionStatusTest.java index cdabf99198b2..04cee38e87cb 100644 --- a/server/src/test/java/org/apache/druid/server/compaction/CompactionStatusTest.java +++ b/server/src/test/java/org/apache/druid/server/compaction/CompactionStatusTest.java @@ -301,12 +301,12 @@ public void testStatusOnIndexSpecMismatch() + " metadataCompression=none," + " dimensionCompression=lz4, stringDictionaryEncoding=Utf8{}," + " metricCompression=lz4, longEncoding=longs, complexMetricCompression=null," - + " autoColumnFormatSpec=null, jsonCompression=null, segmentLoader=null}], " + + " autoColumnFormatSpec=null, stringColumnFormatSpec=null, jsonCompression=null, segmentLoader=null}], " + "current[IndexSpec{bitmapSerdeFactory=RoaringBitmapSerdeFactory{}," + " metadataCompression=none," + " dimensionCompression=zstd, stringDictionaryEncoding=Utf8{}," + " metricCompression=lz4, longEncoding=longs, complexMetricCompression=null," - + " autoColumnFormatSpec=null, jsonCompression=null, segmentLoader=null}]" + + " autoColumnFormatSpec=null, stringColumnFormatSpec=null, jsonCompression=null, segmentLoader=null}]" ); }