From 03719df320bff2d560298ab5894c299345fa9cf4 Mon Sep 17 00:00:00 2001 From: mariano Date: Tue, 31 Mar 2026 15:56:13 +0900 Subject: [PATCH 01/14] feat: introduce typed MessagePipeline interface and unify sink API - Add MessagePipeline interface for end-to-end (byte[] <-> T) processing - Refactor MessageSink to a single-argument Consumer for processed objects - Update KPipeConsumer, CompositeMessageSink, and all sinks/tests to use new MessageSink signature - Simplify registry and pipeline builder APIs for typed pipelines and sinks - Enhance AvroFormat with default schema support and implement deserialize - Remove legacy processJson/processAvro helpers in favor of pipeline-based processing - Update tests and integration tests to use new typed sink and pipeline APIs - Improve error handling and logging consistency in sinks --- README.md | 134 +++--- app/avro/src/main/java/org/kpipe/App.java | 38 +- .../java/org/kpipe/AppIntegrationTest.java | 23 +- app/json/src/main/java/org/kpipe/App.java | 23 +- .../java/org/kpipe/AppIntegrationTest.java | 32 +- .../org/kpipe/consumer/KPipeConsumer.java | 18 +- .../metrics/ProcessorMetricsReporter.java | 2 +- .../kpipe/processor/AvroMessageProcessor.java | 101 +--- .../kpipe/processor/JsonMessageProcessor.java | 44 -- .../java/org/kpipe/registry/AvroFormat.java | 30 +- .../java/org/kpipe/registry/JsonFormat.java | 15 +- .../org/kpipe/registry/MessagePipeline.java | 48 ++ .../registry/MessageProcessorRegistry.java | 455 ++++-------------- .../kpipe/registry/MessageSinkRegistry.java | 189 +++----- .../org/kpipe/registry/RegistryFunctions.java | 19 +- .../java/org/kpipe/registry/RegistryKey.java | 12 +- .../kpipe/registry/TypedPipelineBuilder.java | 163 +++++++ .../java/org/kpipe/sink/AvroConsoleSink.java | 27 +- .../org/kpipe/sink/CompositeMessageSink.java | 24 +- .../java/org/kpipe/sink/JsonConsoleSink.java | 59 +-- .../main/java/org/kpipe/sink/MessageSink.java | 15 +- .../ExternalOffsetIntegrationTest.java | 11 +- .../KPipeBackpressureIntegrationTest.java | 8 +- .../consumer/KPipeConsumerMockingTest.java | 2 +- .../org/kpipe/consumer/KPipeConsumerTest.java | 6 +- .../kpipe/consumer/KPipeInterruptTest.java | 13 +- ...SequentialBackpressureIntegrationTest.java | 124 +++++ .../metrics/ProcessorMetricsReporterTest.java | 20 +- .../metrics/SinkMetricsReporterTest.java | 4 +- .../processor/AvroMessageProcessorTest.java | 141 +++--- .../processor/JsonMessageProcessorTest.java | 302 ++++++------ .../MessageProcessorRegistryTest.java | 141 +++--- .../registry/MessageSinkRegistryTest.java | 102 ++-- .../kpipe/registry/OptimizedPipelineTest.java | 95 ++-- .../org/kpipe/registry/PojoPipelineTest.java | 4 +- .../kpipe/registry/RegistryFunctionsTest.java | 9 +- .../org/kpipe/sink/AvroConsoleSinkTest.java | 60 +-- .../kpipe/sink/CompositeMessageSinkTest.java | 25 +- .../sink/CompositeSinkIntegrationTest.java | 20 +- .../org/kpipe/sink/JsonConsoleSinkTest.java | 80 +-- 40 files changed, 1160 insertions(+), 1478 deletions(-) create mode 100644 lib/src/main/java/org/kpipe/registry/MessagePipeline.java create mode 100644 lib/src/main/java/org/kpipe/registry/TypedPipelineBuilder.java create mode 100644 lib/src/test/java/org/kpipe/consumer/KPipeSequentialBackpressureIntegrationTest.java diff --git a/README.md b/README.md index 25ab169..a41e359 100644 --- a/README.md +++ b/README.md @@ -33,9 +33,9 @@ registry.registerOperator(sanitizeKey, JsonMessageProcessor.removeFieldsOperator final var stampKey = RegistryKey.json("stamp"); registry.registerOperator(stampKey, JsonMessageProcessor.addTimestampOperator("processedAt")); -final var pipeline = registry.jsonPipelineBuilder() - .add(sanitizeKey) - .add(stampKey) +final var pipeline = registry.jsonPipeline() + .add(sanitizeKey, stampKey) + .toSink(MessageSinkRegistry.JSON_LOGGING) .build(); final var consumer = KPipeConsumer.builder() @@ -136,9 +136,11 @@ Unlike traditional pipelines that often perform `byte[] -> Object -> byte[]` at optimizes for throughput: - **Single Deserialization**: Messages are deserialized **once** into a mutable representation (e.g., `Map` for JSON, - `GenericRecord` for Avro). + `GenericRecord` for Avro) via the `MessagePipeline`. - **In-Place Transformations**: A chain of `UnaryOperator` functions is applied to the same object. -- **Single Serialization**: The final object is serialized back to `byte[]` only once before being sent to the sink. +- **Single Serialization**: The final object is serialized back to `byte[]` only once. +- **Integrated Sinks**: Typed sinks can be attached directly to the pipeline, receiving the object before final + serialization. This approach significantly reduces CPU overhead and GC pressure. @@ -326,10 +328,10 @@ registry.registerOperator(envKey, JsonMessageProcessor.addFieldOperator("environment", "production")); // Create a high-performance pipeline (single SerDe cycle) -final var pipeline = registry.jsonPipelineBuilder() +final var pipeline = registry.jsonPipeline() .add(envKey) .add(uppercaseKey) - .add(MessageProcessorRegistry.JSON_ADD_TIMESTAMP) + .add(RegistryKey.json("addTimestamp")) .build(); // Use the pipeline with a consumer @@ -419,7 +421,7 @@ final var metaKey = RegistryKey.json("addMetadata"); registry.registerOperator(metaKey, JsonMessageProcessor.mergeWithOperator(metadata)); // Build an optimized pipeline (one deserialization -> many transformations -> one serialization) -final var pipeline = registry.jsonPipelineBuilder() +final var pipeline = registry.jsonPipeline() .add(sanitizeKey) .add(stampKey) .add(metaKey) @@ -452,14 +454,15 @@ registry.registerOperator(upperKey, // Build an optimized pipeline // This pipeline handles deserialization, all operators, and serialization in one pass -final var pipeline = registry.avroPipelineBuilder("user") +final var pipeline = registry.avroPipeline("user") .add(sanitizeKey) .add(upperKey) .add(RegistryKey.avro("addTimestamp_user")) .build(); // For data with magic bytes (e.g., Confluent Wire Format), specify an offset: -final var confluentPipeline = registry.avroPipelineBuilder("user", 5) +final var confluentPipeline = registry.avroPipeline("user") + .skipBytes(5) .add(sanitizeKey) .add(RegistryKey.avro("addTimestamp_user")) .build(); @@ -467,8 +470,8 @@ final var confluentPipeline = registry.avroPipelineBuilder("user", 5) ### POJO Processing -For high-performance processing of Java records or POJOs, use the `PojoFormat` and `PojoPipelineBuilder`. This leverages -DSL-JSON annotation processing for near-native performance. +For high-performance processing of Java records or POJOs, use the `PojoFormat` and `TypedPipelineBuilder`. This +leverages DSL-JSON annotation processing for near-native performance. ```java final var registry = new MessageProcessorRegistry("myApp"); @@ -478,7 +481,7 @@ final var userKey = RegistryKey.of("userTransform", UserRecord.class); registry.registerOperator(userKey, user -> new UserRecord(user.id(), user.name().toUpperCase(), user.email())); // Build an optimized POJO pipeline -final var pipeline = registry.pojoPipelineBuilder(UserRecord.class) +final var pipeline = registry.pojoPipeline(UserRecord.class) .add(userKey) .build(); ``` @@ -492,8 +495,8 @@ defines a single method: ```java @FunctionalInterface -public interface MessageSink { - void send(final ConsumerRecord record, final V processedValue); +public interface MessageSink { + void accept(final T processedValue); } ``` @@ -502,19 +505,14 @@ public interface MessageSink { KPipe provides several built-in sinks: ```java -// Create a JSON console sink -final var jsonConsoleSink = new JsonConsoleSink<>(); +// Create a JSON console sink (Map-typed) +final var jsonConsoleSink = new JsonConsoleSink>(); -// Create an Avro console sink -final var avroConsoleSink = new AvroConsoleSink<>(); +// Create an Avro console sink (GenericRecord-typed) +final var avroConsoleSink = new AvroConsoleSink(); -// Use a sink with a consumer -final var consumer = KPipeConsumer.builder() - .withProperties(kafkaProps) - .withTopic("events") - .withProcessor(pipeline) - .withMessageSink(jsonConsoleSink) - .build(); +// Use a sink directly in the pipeline +final var pipeline = registry.jsonPipeline().add(RegistryKey.json("sanitize")).toSink(jsonConsoleSink).build(); ``` ### Custom Sinks @@ -523,16 +521,13 @@ You can create custom sinks using lambda expressions: ```java // Create a custom sink that writes to a database -final MessageSink databaseSink = (record, processedValue) -> { +final MessageSink> databaseSink = (processedMap) -> { try { - // Parse the processed value - final var data = new String(processedValue, StandardCharsets.UTF_8); - // Write to database - databaseService.insert(data); + databaseService.insert(processedMap); // Log success - log.log(Level.INFO, "Successfully wrote message to database: " + record.key()); + log.log(Level.INFO, "Successfully wrote message to database: " + processedMap.get("id")); } catch (Exception e) { log.log(Level.ERROR, "Failed to write message to database", e); } @@ -548,16 +543,13 @@ The `MessageSinkRegistry` provides a centralized repository for registering and final var registry = new MessageSinkRegistry(); // Register sinks with explicit types -final var dbKey = RegistryKey.of("database", byte[].class); -registry.register(MessageSinkRegistry.JSON_LOGGING, byte[].class, new JsonConsoleSink<>()); -registry.register(dbKey, byte[].class, databaseSink); +final var dbKey = RegistryKey.of("database", Map.class); +registry.register(dbKey, databaseSink); -// Create a pipeline of sinks -final var sinkPipeline = registry.pipeline(byte[].class, MessageSinkRegistry.JSON_LOGGING, dbKey); - -// Use the sink pipeline with a consumer -final var consumer = KPipeConsumer.builder() - .withMessageSink(sinkPipeline) +// Use the sink by key in the pipeline +final var pipeline = registry.jsonPipeline() + .add(RegistryKey.json("enrich")) + .toSink(dbKey) .build(); ``` @@ -570,10 +562,12 @@ The registry provides utilities for adding error handling to sinks: final var safeSink = MessageSinkRegistry.withErrorHandling(riskySink); // Register and use the wrapped sink -final var safeKey = RegistryKey.of("safeDatabase", byte[].class); -registry.register(safeKey, String.class, safeSink); +final var safeKey = RegistryKey.of("safeDatabase", Map.class); +registry.register(safeKey, safeSink); -final var safePipeline = registry.pipeline(String.class, MessageSinkRegistry.JSON_LOGGING, safeKey); +final var pipeline = registry.jsonPipeline() + .toSink(safeKey) + .build(); ``` ### Composite Sink (Broadcasting) @@ -585,13 +579,13 @@ one sink (e.g., a database timeout) do not prevent other sinks from receiving th // Create multiple sinks final var postgresSink = new MyPostgresSink(); -final var consoleSink = new JsonConsoleSink(); +final var consoleSink = new JsonConsoleSink>(); // Broadcast to both final var compositeSink = new CompositeMessageSink<>(List.of(postgresSink, consoleSink)); -// Use with consumer -final var consumer = KPipeConsumer.builder().withMessageSink(compositeSink).build(); +// Use in pipeline +final var pipeline = registry.jsonPipeline().toSink(compositeSink).build(); ``` --- @@ -724,13 +718,13 @@ public class KPipeApp implements AutoCloseable { .withTopic(config.topic()) .withProcessor( processorRegistry - .jsonPipelineBuilder() - .add(MessageProcessorRegistry.JSON_ADD_SOURCE) - .add(MessageProcessorRegistry.JSON_MARK_PROCESSED) - .add(MessageProcessorRegistry.JSON_ADD_TIMESTAMP) + .jsonPipeline() + .add(RegistryKey.json("addSource")) + .add(RegistryKey.json("markProcessed")) + .add(RegistryKey.json("addTimestamp")) + .toSink(MessageSinkRegistry.JSON_LOGGING) .build() ) - .withMessageSink(sinkRegistry.pipeline(byte[].class, MessageSinkRegistry.JSON_LOGGING)) .withCommandQueue(commandQueue) .withOffsetManagerProvider((consumer) -> KafkaOffsetManager.builder(consumer) @@ -883,7 +877,7 @@ registry.registerOperator(enrichmentKey, JsonMessageProcessor.addTimestampOperator("processedAt")); // Compose them into an optimized pipeline -final var fullPipeline = registry.jsonPipelineBuilder() +final var fullPipeline = registry.jsonPipeline() .add(securityKey) .add(enrichmentKey) .build(); @@ -915,27 +909,31 @@ registry.registerEnum(Map.class, StandardProcessors.class); ### Conditional Processing -The library provides a built-in `when()` method for conditional processing: +KPipe provides a fluent `when()` operator directly in the `TypedPipelineBuilder`: ```java -// Create a predicate that checks message type -final Predicate isOrderMessage = (bytes) -> { - // Logic to check if it's an order - return true; -}; - -// Use the built-in conditional processor -Function conditionalPipeline = MessageProcessorRegistry.when( - isOrderMessage, - registry.jsonPipelineBuilder().add(RegistryKey.json("orderProcessor")).build(), - registry.jsonPipelineBuilder().add(RegistryKey.json("defaultProcessor")).build() -); +final var pipeline = registry + .jsonPipeline() + .when( + (map) -> "VIP".equals(map.get("level")), + (map) -> { + map.put("priority", "high"); + return map; + }, + (map) -> { + map.put("priority", "low"); + return map; + } + ) + .build(); ``` +Alternatively, for `byte[]` level branching, use the static `MessageProcessorRegistry.when()` utility: + ### Filtering Messages -To skip a message in a pipeline, return `null` in your processor. KPipe will treat `null` as a signal to stop processing -the current record and will not send it to the sink. +To skip a message in a pipeline, return `null` in your operator. KPipe will treat `null` as a signal to stop processing +the current record and will not send it to any downstream operators or sinks. ```java registry.registerOperator(RegistryKey.json("filter"), map -> { diff --git a/app/avro/src/main/java/org/kpipe/App.java b/app/avro/src/main/java/org/kpipe/App.java index bf61224..4c37b95 100644 --- a/app/avro/src/main/java/org/kpipe/App.java +++ b/app/avro/src/main/java/org/kpipe/App.java @@ -23,12 +23,10 @@ import org.kpipe.metrics.MetricsReporter; import org.kpipe.metrics.ProcessorMetricsReporter; import org.kpipe.metrics.SinkMetricsReporter; -import org.kpipe.processor.AvroMessageProcessor; import org.kpipe.registry.MessageFormat; import org.kpipe.registry.MessageProcessorRegistry; import org.kpipe.registry.MessageSinkRegistry; import org.kpipe.registry.RegistryKey; -import org.kpipe.sink.AvroConsoleSink; import org.kpipe.sink.MessageSink; /// Application that consumes messages from a Kafka topic and processes them using a configurable @@ -73,8 +71,8 @@ public App(final AppConfig config) { /// @param schemaRegistryUrl Schema Registry base URL public App(final AppConfig config, final String schemaRegistryUrl) { processorRegistry = new MessageProcessorRegistry(config.appName(), MessageFormat.AVRO); - sinkRegistry = new MessageSinkRegistry(); - functionalConsumer = createConsumer(config, processorRegistry, sinkRegistry, schemaRegistryUrl); + sinkRegistry = processorRegistry.sinkRegistry(); + functionalConsumer = createConsumer(config, processorRegistry, schemaRegistryUrl); final var consumerMetricsReporter = ConsumerMetricsReporter.forConsumer(functionalConsumer::getMetrics); @@ -121,12 +119,11 @@ private ConsumerRunner> createConsumerRunner( /// /// @param config The application configuration /// @param processorRegistry Map of processor functions - /// @param sinkRegistry Map of sink functions + /// @param schemaRegistryUrl Schema Registry base URL /// @return A configured functional consumer public static KPipeConsumer createConsumer( final AppConfig config, final MessageProcessorRegistry processorRegistry, - final MessageSinkRegistry sinkRegistry, final String schemaRegistryUrl ) { final var kafkaProps = KafkaConsumerConfig.createConsumerConfig(config.bootstrapServers(), config.consumerGroup()); @@ -135,9 +132,8 @@ public static KPipeConsumer createConsumer( return KPipeConsumer.builder() .withProperties(kafkaProps) .withTopic(config.topic()) - .withProcessor(createAvroProcessorPipeline(processorRegistry, config, sinkRegistry, schemaRegistryUrl)) + .withProcessor(createAvroProcessorPipeline(processorRegistry, config, schemaRegistryUrl)) .withPollTimeout(config.pollTimeout()) - .withMessageSink(createSinksPipeline(sinkRegistry)) .withCommandQueue(commandQueue) .withOffsetManagerProvider(createOffsetManagerProvider(Duration.ofSeconds(30), commandQueue)) .withMetrics(true) @@ -161,34 +157,30 @@ private static Function, OffsetManager> /// /// @param registry the message sink registry /// @return a message sink that processes messages through the pipeline - private static MessageSink createSinksPipeline(final MessageSinkRegistry registry) { - return registry.pipeline(byte[].class, MessageSinkRegistry.AVRO_LOGGING); + private static MessageSink createSinksPipeline(final MessageSinkRegistry registry) { + return registry.pipeline(MessageSinkRegistry.AVRO_LOGGING); } /// Creates a processor pipeline using the provided registry. /// /// @param registry the message processor registry /// @param config the application configuration - /// @param sinkRegistry the message sink registry + /// @param schemaRegistryUrl the schema registry URL /// @return a function that processes messages through the pipeline - private static Function createAvroProcessorPipeline( + private static java.util.function.UnaryOperator createAvroProcessorPipeline( final MessageProcessorRegistry registry, final AppConfig config, - final MessageSinkRegistry sinkRegistry, final String schemaRegistryUrl ) { - registry.addSchema("1", "com.kpipe.customer", schemaRegistryUrl + "/subjects/com.kpipe.customer/versions/latest"); - - // Register the sink - final var schema = AvroMessageProcessor.getSchema("1"); - if (schema != null) sinkRegistry.register( - MessageSinkRegistry.AVRO_LOGGING, - byte[].class, - new AvroConsoleSink<>(schema) - ); + final var avroFormat = (org.kpipe.registry.AvroFormat) MessageFormat.AVRO; + // Register schema for the test/app + avroFormat.addSchema("1", "com.kpipe.customer", schemaRegistryUrl + "/subjects/com.kpipe.customer/versions/latest"); + avroFormat.withDefaultSchema("1"); - final var builder = registry.avroPipelineBuilder("1", 5); + final var builder = registry.pipeline(avroFormat); + builder.skipBytes(5); for (final var name : config.processors()) builder.add(RegistryKey.avro(name)); + builder.toSink(RegistryKey.of("avroLogging", org.apache.avro.generic.GenericRecord.class)); return builder.build(); } diff --git a/app/avro/src/test/java/org/kpipe/AppIntegrationTest.java b/app/avro/src/test/java/org/kpipe/AppIntegrationTest.java index 2a2b295..5cd4e38 100644 --- a/app/avro/src/test/java/org/kpipe/AppIntegrationTest.java +++ b/app/avro/src/test/java/org/kpipe/AppIntegrationTest.java @@ -26,12 +26,9 @@ import java.util.concurrent.TimeUnit; import org.apache.avro.Schema; import org.apache.avro.generic.GenericData; -import org.apache.avro.generic.GenericDatumReader; import org.apache.avro.generic.GenericDatumWriter; import org.apache.avro.generic.GenericRecord; -import org.apache.avro.io.DecoderFactory; import org.apache.avro.io.EncoderFactory; -import org.apache.kafka.clients.consumer.ConsumerRecord; import org.apache.kafka.clients.producer.KafkaProducer; import org.apache.kafka.clients.producer.ProducerConfig; import org.apache.kafka.clients.producer.ProducerRecord; @@ -40,7 +37,7 @@ import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; import org.kpipe.config.AppConfig; -import org.kpipe.registry.MessageSinkRegistry; +import org.kpipe.registry.RegistryKey; import org.kpipe.sink.MessageSink; import org.testcontainers.junit.jupiter.Container; import org.testcontainers.junit.jupiter.Testcontainers; @@ -119,7 +116,10 @@ void testAvroAppEndToEnd() throws Exception { try (final var app = new App(config, srUrl)) { // Register the capturing sink - app.getSinkRegistry().register(MessageSinkRegistry.AVRO_LOGGING, byte[].class, capturingSink); + app + .getProcessorRegistry() + .sinkRegistry() + .register(RegistryKey.of("avroLogging", GenericRecord.class), capturingSink); // Start the app final var appThread = Thread.ofVirtual().start(() -> { @@ -146,10 +146,7 @@ void testAvroAppEndToEnd() throws Exception { final var received = capturingSink.getMessages(); assertFalse(received.isEmpty(), "Should have received at least one message"); - final var processedBytes = received.getFirst(); - final var decoder = DecoderFactory.get().binaryDecoder(processedBytes, null); - final var reader = new GenericDatumReader(schema); - final var processedRecord = reader.read(null, decoder); + final var processedRecord = received.getFirst(); assertEquals(1L, processedRecord.get("id")); assertEquals("Test User", processedRecord.get("name").toString()); @@ -277,16 +274,16 @@ private static void writeJson(final HttpExchange exchange, final int status, fin } } - private static class CapturingSink implements MessageSink { + private static class CapturingSink implements MessageSink { - private final List messages = new ArrayList<>(); + private final List messages = new ArrayList<>(); @Override - public synchronized void send(final ConsumerRecord record, byte[] processedValue) { + public synchronized void accept(GenericRecord processedValue) { messages.add(processedValue); } - public synchronized List getMessages() { + public synchronized List getMessages() { return new ArrayList<>(messages); } diff --git a/app/json/src/main/java/org/kpipe/App.java b/app/json/src/main/java/org/kpipe/App.java index c7968d1..28339d2 100644 --- a/app/json/src/main/java/org/kpipe/App.java +++ b/app/json/src/main/java/org/kpipe/App.java @@ -10,6 +10,7 @@ import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicReference; import java.util.function.Function; +import java.util.function.UnaryOperator; import org.apache.kafka.clients.consumer.Consumer; import org.kpipe.config.AppConfig; import org.kpipe.config.KafkaConsumerConfig; @@ -60,10 +61,12 @@ static void main() { /// /// @param config The application configuration public App(final AppConfig config) { - processorRegistry = new MessageProcessorRegistry(config.appName(), MessageFormat.JSON); - sinkRegistry = new MessageSinkRegistry(); + this.processorRegistry = new MessageProcessorRegistry(config.appName(), MessageFormat.JSON); + this.sinkRegistry = processorRegistry.sinkRegistry(); + // Pre-register loggers + sinkRegistry.register(RegistryKey.of("jsonLogging", Map.class), new org.kpipe.sink.JsonConsoleSink<>()); - kpipeConsumer = createConsumer(config, processorRegistry, sinkRegistry); + this.kpipeConsumer = createConsumer(config, processorRegistry); final var consumerMetricsReporter = ConsumerMetricsReporter.forConsumer(kpipeConsumer::getMetrics); final var processorMetricsReporter = ProcessorMetricsReporter.forRegistry(processorRegistry); @@ -101,12 +104,10 @@ private ConsumerRunner> createConsumerRunner( /// /// @param config The application configuration /// @param processorRegistry Map of processor functions - /// @param sinkRegistry Map of sink functions /// @return A configured functional consumer public static KPipeConsumer createConsumer( final AppConfig config, - final MessageProcessorRegistry processorRegistry, - final MessageSinkRegistry sinkRegistry + final MessageProcessorRegistry processorRegistry ) { final var kafkaProps = KafkaConsumerConfig.createConsumerConfig(config.bootstrapServers(), config.consumerGroup()); final var commandQueue = new ConcurrentLinkedQueue(); @@ -116,7 +117,6 @@ public static KPipeConsumer createConsumer( .withTopic(config.topic()) .withProcessor(createJsonProcessorPipeline(processorRegistry, config)) .withPollTimeout(config.pollTimeout()) - .withMessageSink(createSinksPipeline(sinkRegistry)) .withCommandQueue(commandQueue) .withOffsetManagerProvider(createOffsetManagerProvider(Duration.ofSeconds(30), commandQueue)) .withMetrics(true) @@ -140,8 +140,8 @@ private static Function, OffsetManager> /// /// @param registry the message sink registry /// @return a message sink that processes messages through the pipeline - private static MessageSink createSinksPipeline(final MessageSinkRegistry registry) { - return registry.pipeline(byte[].class, MessageSinkRegistry.JSON_LOGGING); + private static MessageSink createSinksPipeline(final MessageSinkRegistry registry) { + return registry.pipeline(MessageSinkRegistry.JSON_LOGGING); } /// Creates a processor pipeline using the provided registry. @@ -149,12 +149,13 @@ private static MessageSink createSinksPipeline(final MessageSink /// @param registry the message processor registry /// @param config the application configuration /// @return a function that processes messages through the pipeline - private static Function createJsonProcessorPipeline( + private static java.util.function.UnaryOperator createJsonProcessorPipeline( final MessageProcessorRegistry registry, final AppConfig config ) { - final var builder = registry.jsonPipelineBuilder(); + final var builder = registry.pipeline(MessageFormat.JSON); for (final var name : config.processors()) builder.add(RegistryKey.json(name)); + builder.toSink(RegistryKey.of("jsonLogging", Map.class)); return builder.build(); } diff --git a/app/json/src/test/java/org/kpipe/AppIntegrationTest.java b/app/json/src/test/java/org/kpipe/AppIntegrationTest.java index d5ce37f..55e5cf6 100644 --- a/app/json/src/test/java/org/kpipe/AppIntegrationTest.java +++ b/app/json/src/test/java/org/kpipe/AppIntegrationTest.java @@ -12,14 +12,15 @@ import java.util.Map; import java.util.Properties; import java.util.concurrent.TimeUnit; -import org.apache.kafka.clients.consumer.ConsumerRecord; import org.apache.kafka.clients.producer.KafkaProducer; import org.apache.kafka.clients.producer.ProducerConfig; import org.apache.kafka.clients.producer.ProducerRecord; import org.apache.kafka.common.serialization.ByteArraySerializer; import org.junit.jupiter.api.Test; import org.kpipe.config.AppConfig; +import org.kpipe.processor.JsonMessageProcessor; import org.kpipe.registry.MessageSinkRegistry; +import org.kpipe.registry.RegistryKey; import org.kpipe.sink.MessageSink; import org.testcontainers.junit.jupiter.Container; import org.testcontainers.junit.jupiter.Testcontainers; @@ -55,7 +56,15 @@ void testJsonAppEndToEnd() throws Exception { try (final var app = new App(config)) { // Register the capturing sink - app.getSinkRegistry().register(MessageSinkRegistry.JSON_LOGGING, byte[].class, capturingSink); + app.getProcessorRegistry().sinkRegistry().register(RegistryKey.of("jsonLogging", Map.class), capturingSink); + // Verify registration + final var sink = app.getProcessorRegistry().sinkRegistry().get(RegistryKey.of("jsonLogging", Map.class)); + System.out.println("[DEBUG_LOG] Registered sink is capturing: " + (sink != null)); + + // Set up the processor registry + app.getProcessorRegistry().registerOperator(RegistryKey.json("addSource"), JsonMessageProcessor.addFieldOperator("source", "test-app")); + app.getProcessorRegistry().registerOperator(RegistryKey.json("markProcessed"), JsonMessageProcessor.addFieldOperator("status", "processed")); + app.getProcessorRegistry().registerOperator(RegistryKey.json("addTimestamp"), JsonMessageProcessor.addTimestampOperator("processedAt")); // Start the app in a virtual thread final var appThread = Thread.ofVirtual().start(() -> { @@ -82,18 +91,13 @@ void testJsonAppEndToEnd() throws Exception { final var received = capturingSink.getMessages(); assertFalse(received.isEmpty(), "Should have received at least one message"); - final var processedBytes = received.getFirst(); - final var dslJson = new DslJson>(); - final Map processedMap; - try (final var input = new java.io.ByteArrayInputStream(processedBytes)) { - processedMap = dslJson.deserialize(Map.class, input); - } + final var processedMap = received.getFirst(); assertEquals(1.0, ((Number) processedMap.get("id")).doubleValue()); assertEquals("Hello JSON", processedMap.get("message")); assertTrue(processedMap.containsKey("source"), "Should have 'source' field added by addSource"); - assertTrue(processedMap.containsKey("processed"), "Should have 'processed' field added by markProcessed"); - assertTrue(processedMap.containsKey("timestamp"), "Should have 'timestamp' field added by addTimestamp"); + assertTrue(processedMap.containsKey("status"), "Should have 'status' field added by markProcessed"); + assertTrue(processedMap.containsKey("processedAt"), "Should have 'processedAt' field added by addTimestamp"); } } @@ -114,16 +118,16 @@ private static void produceUntilConsumed( throw new AssertionError("Timed out waiting for consumer to receive produced message(s)"); } - private static class CapturingSink implements MessageSink { + private static class CapturingSink implements MessageSink> { - private final List messages = new ArrayList<>(); + private final List> messages = new ArrayList<>(); @Override - public synchronized void send(final ConsumerRecord record, final byte[] processedValue) { + public synchronized void accept(final Map processedValue) { messages.add(processedValue); } - public synchronized List getMessages() { + public synchronized List> getMessages() { return new ArrayList<>(messages); } diff --git a/lib/src/main/java/org/kpipe/consumer/KPipeConsumer.java b/lib/src/main/java/org/kpipe/consumer/KPipeConsumer.java index b781a99..467c948 100644 --- a/lib/src/main/java/org/kpipe/consumer/KPipeConsumer.java +++ b/lib/src/main/java/org/kpipe/consumer/KPipeConsumer.java @@ -84,7 +84,7 @@ public class KPipeConsumer implements AutoCloseable { private final Function processor; private final ExecutorService virtualThreadExecutor; private final Duration pollTimeout; - private final MessageSink messageSink; + private final MessageSink messageSink; private final AtomicReference consumerThread = new AtomicReference<>(); private final Duration waitForMessagesTimeout; private final Duration threadTerminationTimeout; @@ -150,7 +150,7 @@ private Builder() {} private Duration retryBackoff = Duration.ofMillis(500); private boolean enableMetrics = true; private boolean sequentialProcessing = false; - private MessageSink messageSink; + private MessageSink messageSink; private Duration waitForMessagesTimeout = AppConfig.DEFAULT_WAIT_FOR_MESSAGES; private Duration threadTerminationTimeout = AppConfig.DEFAULT_THREAD_TERMINATION; private Duration executorTerminationTimeout = AppConfig.DEFAULT_EXECUTOR_TERMINATION; @@ -188,6 +188,16 @@ public Builder withProcessor(final Function processor) { return this; } + /// Sets the pipeline to process each consumed message. + /// + /// This is equivalent to `withProcessor` but emphasizes the use of a typed pipeline. + /// + /// @param pipeline The pipeline to apply to message values + /// @return This builder instance for method chaining + public Builder withPipeline(final Function pipeline) { + return withProcessor(pipeline); + } + /// Sets the timeout duration for the consumer's poll operation. /// /// @param timeout The maximum time to wait for messages in each poll @@ -240,7 +250,7 @@ public Builder withSequentialProcessing(final boolean sequential) { /// /// @param messageSink The sink that handles successfully processed messages /// @return This builder instance for method chaining - public Builder withMessageSink(final MessageSink messageSink) { + public Builder withMessageSink(final MessageSink messageSink) { this.messageSink = messageSink; return this; } @@ -806,7 +816,7 @@ private boolean tryProcessRecord(final ConsumerRecord record) { try { final var processedValue = processor.apply(record.value()); - messageSink.send(record, processedValue); + messageSink.accept(processedValue); if (offsetManager != null) commandQueue.offer(new ConsumerCommand.MarkOffsetProcessed(record)); return true; } catch (final Exception e) { diff --git a/lib/src/main/java/org/kpipe/metrics/ProcessorMetricsReporter.java b/lib/src/main/java/org/kpipe/metrics/ProcessorMetricsReporter.java index 29e6543..b6f628e 100644 --- a/lib/src/main/java/org/kpipe/metrics/ProcessorMetricsReporter.java +++ b/lib/src/main/java/org/kpipe/metrics/ProcessorMetricsReporter.java @@ -104,7 +104,7 @@ private void logMetrics(String metrics) { /// @param registry the message processor registry /// @return a new reporter that can be further customized public static ProcessorMetricsReporter forRegistry(final MessageProcessorRegistry registry) { - return new ProcessorMetricsReporter(() -> registry.getAll().keySet(), registry::getMetrics, null); + return new ProcessorMetricsReporter(registry::getKeys, registry::getMetrics, null); } /// Creates a fluent builder-like starting point for selective processor metrics reporting. diff --git a/lib/src/main/java/org/kpipe/processor/AvroMessageProcessor.java b/lib/src/main/java/org/kpipe/processor/AvroMessageProcessor.java index b9a8b56..cef493a 100644 --- a/lib/src/main/java/org/kpipe/processor/AvroMessageProcessor.java +++ b/lib/src/main/java/org/kpipe/processor/AvroMessageProcessor.java @@ -1,6 +1,5 @@ package org.kpipe.processor; -import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.lang.System.Logger; import java.lang.System.Logger.Level; @@ -13,8 +12,6 @@ import org.apache.avro.generic.*; import org.apache.avro.io.BinaryDecoder; import org.apache.avro.io.BinaryEncoder; -import org.apache.avro.io.DecoderFactory; -import org.apache.avro.io.EncoderFactory; import org.apache.avro.util.Utf8; /// Provides utility functions for processing Avro messages. This class contains common message @@ -295,102 +292,6 @@ private static boolean isCompatibleWithSchema(Object value, Schema schema) { }; } - /// Applies a processing function to parsed Avro data with internal caching for - /// performance. - /// - /// ```java - /// byte[] result = AvroMessageProcessor.processAvro( - /// avroBytes, - /// schema, - /// record -> { - /// record.put("status", "processed"); - /// return record; - /// } - /// ); - /// ``` - /// - /// @param avroBytes The raw Avro data as a byte array - /// @param schema The Avro schema to use for parsing and serializing - /// @param processor Function to transform the parsed Avro record - /// @return Serialized Avro bytes after processing - public static byte[] processAvro( - final byte[] avroBytes, - final Schema schema, - final Function processor - ) { - return processAvro(avroBytes, 0, schema, processor); - } - - /// Applies a processing function to parsed Avro data, optionally skipping a prefix. - /// - /// This method is particularly useful for handling Avro data with custom headers or - /// magic bytes without needing to copy the byte array first. - /// - /// ```java - /// // Skip 5 magic bytes and process - /// byte[] result = AvroMessageProcessor.processAvro(avroBytes, 5, schema, record -> record); - /// ``` - /// - /// @param avroBytes The raw Avro data as a byte array - /// @param offset The number of bytes to skip at the start - /// @param schema The Avro schema to use for parsing and serializing - /// @param processor Function to transform the parsed Avro record - /// @return Serialized Avro bytes after processing - public static byte[] processAvro( - final byte[] avroBytes, - final int offset, - final Schema schema, - final Function processor - ) { - if (avroBytes == null || avroBytes.length <= offset) return EMPTY_AVRO; - - try { - // Create a reader and writer - final var reader = new GenericDatumReader(schema); - final var writer = new GenericDatumWriter(schema); - - // Deserialize using cached decoder - final var inputStream = new ByteArrayInputStream(avroBytes, offset, avroBytes.length - offset); - final var cachedDecoder = DECODER_CACHE.isBound() ? DECODER_CACHE.get() : null; - final var decoder = DecoderFactory.get().binaryDecoder(inputStream, cachedDecoder); - - final var record = reader.read(null, decoder); - - // Apply the processor function to make a copy with transformations - if (record == null) return EMPTY_AVRO; - final var processed = processor.apply(record); - - if (processed == null) return EMPTY_AVRO; - LOGGER.log(Level.DEBUG, "Processed record: %s".formatted(processed)); - - // Reuse output stream for better performance - final var outputStream = OUTPUT_STREAM_CACHE.isBound() - ? OUTPUT_STREAM_CACHE.get() - : new ByteArrayOutputStream(8192); - outputStream.reset(); - - // Reuse encoder for better performance - final var cachedEncoder = ENCODER_CACHE.isBound() ? ENCODER_CACHE.get() : null; - final var encoder = EncoderFactory.get().binaryEncoder(outputStream, cachedEncoder); - - writer.write(processed, encoder); - encoder.flush(); - - return outputStream.toByteArray(); - } catch (final Exception e) { - LOGGER.log(Level.WARNING, "Error processing Avro", e); - return EMPTY_AVRO; - } - } - - /// Wraps a callable in a scope with all Avro caches bound. - /// - /// This is used internally by the pipeline builder to ensure optimal performance - /// for high-throughput processing. - /// - /// @param The return type of the callable - /// @param operation The operation to perform within the scope - /// @return The result of the operation public static T inScopedCaches(final ScopedValue.CallableOp operation) { try { return ScopedValue.where(OUTPUT_STREAM_CACHE, new ByteArrayOutputStream(8192)) @@ -399,7 +300,7 @@ public static T inScopedCaches(final ScopedValue.CallableOp op .where(SCHEMA_PARSER, new Schema.Parser()) .call(operation); } catch (final Exception e) { - throw new RuntimeException("Error executing in scoped caches", e); + throw new RuntimeException("Error executing in Avro scoped caches", e); } } diff --git a/lib/src/main/java/org/kpipe/processor/JsonMessageProcessor.java b/lib/src/main/java/org/kpipe/processor/JsonMessageProcessor.java index d66e55a..f09c403 100644 --- a/lib/src/main/java/org/kpipe/processor/JsonMessageProcessor.java +++ b/lib/src/main/java/org/kpipe/processor/JsonMessageProcessor.java @@ -1,10 +1,8 @@ package org.kpipe.processor; import com.dslplatform.json.DslJson; -import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.lang.System.Logger; -import java.lang.System.Logger.Level; import java.nio.charset.StandardCharsets; import java.util.Map; import java.util.function.Function; @@ -121,48 +119,6 @@ public static UnaryOperator> mergeWithOperator(final Map { - /// map.put("status", "processed"); - /// return map; - /// } - /// ); - /// ``` - /// - /// @param jsonBytes The raw JSON data as a byte array - /// @param processor Function to transform the parsed JSON object - /// @return Serialized JSON bytes after processing - public static byte[] processJson( - final byte[] jsonBytes, - final Function, Map> processor - ) { - if (jsonBytes == null || jsonBytes.length == 0) return EMPTY_JSON; - try (final var input = new ByteArrayInputStream(jsonBytes)) { - final var output = OUTPUT_STREAM_CACHE.isBound() ? OUTPUT_STREAM_CACHE.get() : new ByteArrayOutputStream(); - output.reset(); - - final var parsed = DSL_JSON.deserialize(Map.class, input); - if (parsed == null) return EMPTY_JSON; - @SuppressWarnings("unchecked") - final var processed = processor.apply(parsed); - if (processed == null) return EMPTY_JSON; - DSL_JSON.serialize(processed, output); - return output.toByteArray(); - } catch (final Exception e) { - LOGGER.log(Level.WARNING, "Error processing JSON", e); - return EMPTY_JSON; - } - } - - /// Wraps a callable in a scope with JSON caches bound. - /// - /// @param The return type - /// @param operation The operation to perform - /// @return The result of the operation public static T inScopedCaches(final ScopedValue.CallableOp operation) { try { return ScopedValue.where(OUTPUT_STREAM_CACHE, new ByteArrayOutputStream(8192)).call(operation); diff --git a/lib/src/main/java/org/kpipe/registry/AvroFormat.java b/lib/src/main/java/org/kpipe/registry/AvroFormat.java index 6ae3588..62b3a60 100644 --- a/lib/src/main/java/org/kpipe/registry/AvroFormat.java +++ b/lib/src/main/java/org/kpipe/registry/AvroFormat.java @@ -30,6 +30,7 @@ public final class AvroFormat implements MessageFormat { private final Map schemas = new ConcurrentHashMap<>(); private final Function schemaReader; + private String defaultSchemaKey; /// Constructs a new AvroFormat with the specified schema reader function. /// @@ -38,6 +39,15 @@ public AvroFormat(final Function schemaReader) { this.schemaReader = schemaReader; } + /// Sets the default schema key to use for deserialization. + /// + /// @param schemaKey The schema key + /// @return This AvroFormat instance + public AvroFormat withDefaultSchema(String schemaKey) { + this.defaultSchemaKey = schemaKey; + return this; + } + /// Returns an unmodifiable view of all schemas registered with this format. /// /// @return Map of schema keys to their schema information @@ -106,12 +116,26 @@ public byte[] serialize(final GenericRecord data) { } } - /// Deserialization is not supported without schema context. + /// Deserializes the given byte array to an Avro GenericRecord. /// /// @param data the serialized byte array - /// @return nothing; always throws UnsupportedOperationException + /// @return the deserialized record @Override public GenericRecord deserialize(final byte[] data) { - throw new UnsupportedOperationException("Avro deserialization requires a schema context. Use specialized methods."); + if (data == null || data.length == 0) return null; + if (defaultSchemaKey == null) { + throw new UnsupportedOperationException( + "Avro deserialization requires a default schema key. Use withDefaultSchema()." + ); + } + final var schema = AvroMessageProcessor.getSchema(defaultSchemaKey); + if (schema == null) { + throw new IllegalArgumentException("No schema found for key: " + defaultSchemaKey); + } + return AvroMessageProcessor.inScopedCaches(() -> { + final var datumReader = new org.apache.avro.generic.GenericDatumReader(schema); + final var decoder = org.apache.avro.io.DecoderFactory.get().binaryDecoder(data, null); + return datumReader.read(null, decoder); + }); } } diff --git a/lib/src/main/java/org/kpipe/registry/JsonFormat.java b/lib/src/main/java/org/kpipe/registry/JsonFormat.java index 6ffe8f4..c954752 100644 --- a/lib/src/main/java/org/kpipe/registry/JsonFormat.java +++ b/lib/src/main/java/org/kpipe/registry/JsonFormat.java @@ -7,6 +7,7 @@ import java.util.*; import java.util.concurrent.ConcurrentHashMap; import java.util.function.Predicate; +import org.kpipe.processor.JsonMessageProcessor; /// JSON implementation of MessageFormat for KPipe. /// @@ -81,10 +82,15 @@ public List findSchemas(final Predicate predicate) { /// @return the serialized byte array @Override public byte[] serialize(final Map data) { - try (final var output = new ByteArrayOutputStream()) { - DSL_JSON.serialize(data, output); - return output.toByteArray(); - } catch (final IOException e) { + if (data == null) return null; + try { + return JsonMessageProcessor.inScopedCaches(() -> { + try (final var output = new ByteArrayOutputStream()) { + DSL_JSON.serialize(data, output); + return output.toByteArray(); + } + }); + } catch (final Exception e) { throw new RuntimeException("Failed to serialize JSON", e); } } @@ -96,6 +102,7 @@ public byte[] serialize(final Map data) { @Override @SuppressWarnings("unchecked") public Map deserialize(final byte[] data) { + if (data == null || data.length == 0) return null; try (final var input = new ByteArrayInputStream(data)) { return (Map) DSL_JSON.deserialize(Map.class, input); } catch (final IOException e) { diff --git a/lib/src/main/java/org/kpipe/registry/MessagePipeline.java b/lib/src/main/java/org/kpipe/registry/MessagePipeline.java new file mode 100644 index 0000000..9902f33 --- /dev/null +++ b/lib/src/main/java/org/kpipe/registry/MessagePipeline.java @@ -0,0 +1,48 @@ +package org.kpipe.registry; + +import java.util.function.UnaryOperator; + +/// A unified pipeline interface that encapsulates the lifecycle: +/// byte[] (Kafka) -> T (Deserialized Object) -> T (Processed Object) -> byte[] (Kafka). +/// +/// @param The type of the object in the pipeline. +public interface MessagePipeline extends UnaryOperator { + /// Deserializes the raw byte array into a typed object. + /// + /// @param data The raw data from Kafka. + /// @return The deserialized object. + T deserialize(byte[] data); + + /// Serializes the typed object back into a byte array. + /// + /// @param data The processed object. + /// @return The serialized data to be sent to Kafka. + byte[] serialize(T data); + + /// Applies the chain of transformations to the typed object. + /// + /// @param data The deserialized object. + /// @return The processed object. + T process(T data); + + /// Implementation of UnaryOperator.apply that executes the full pipeline lifecycle. + /// + /// @param data The input bytes. + /// @return The output bytes after processing. + @Override + default byte[] apply(byte[] data) { + try { + final var deserialized = deserialize(data); + if (deserialized == null) { + return null; + } + final var processed = process(deserialized); + if (processed == null) { + return null; + } + return serialize(processed); + } catch (final Exception e) { + return null; + } + } +} diff --git a/lib/src/main/java/org/kpipe/registry/MessageProcessorRegistry.java b/lib/src/main/java/org/kpipe/registry/MessageProcessorRegistry.java index ff7c047..0175314 100644 --- a/lib/src/main/java/org/kpipe/registry/MessageProcessorRegistry.java +++ b/lib/src/main/java/org/kpipe/registry/MessageProcessorRegistry.java @@ -1,17 +1,14 @@ package org.kpipe.registry; import java.lang.System.Logger; -import java.lang.System.Logger.Level; -import java.time.Duration; -import java.util.ArrayList; -import java.util.List; +import java.util.Collections; import java.util.Map; import java.util.Objects; +import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.function.*; -import org.apache.avro.generic.GenericRecord; -import org.kpipe.processor.AvroMessageProcessor; import org.kpipe.processor.JsonMessageProcessor; +import org.kpipe.sink.MessageSink; /// Registry for managing and composing message processors in KPipe. /// @@ -36,8 +33,7 @@ public class MessageProcessorRegistry { /// Pre-defined key for marking JSON messages as processed. public static final RegistryKey> JSON_MARK_PROCESSED = RegistryKey.json("markProcessed"); - private final ConcurrentHashMap, RegistryEntry> registry = new ConcurrentHashMap<>(); - private final byte[] defaultErrorValue; + private final ConcurrentHashMap, RegistryEntry> registryMap = new ConcurrentHashMap<>(); private final String sourceAppName; private final MessageFormat messageFormat; @@ -45,217 +41,71 @@ public class MessageProcessorRegistry { private static class RegistryEntry { final T value; - long invocationCount = 0; - long errorCount = 0; - long totalProcessingTimeMs = 0; + final java.util.concurrent.atomic.LongAdder invocationCount = new java.util.concurrent.atomic.LongAdder(); + final java.util.concurrent.atomic.LongAdder errorCount = new java.util.concurrent.atomic.LongAdder(); + final java.util.concurrent.atomic.LongAdder totalProcessingTimeNs = new java.util.concurrent.atomic.LongAdder(); RegistryEntry(final T value) { this.value = value; } - @SuppressWarnings("unchecked") - public byte[] execute(final byte[] input) { - if (!(value instanceof Function)) throw new UnsupportedOperationException( - "Entry value is not a Function" - ); - final var processor = (Function) value; - - final Supplier counterIncrement = () -> invocationCount++; - final Supplier errorIncrement = () -> errorCount++; - final Consumer timeAccumulator = duration -> totalProcessingTimeMs += duration.toNanos(); - - final var timedExecution = RegistryFunctions.timedExecution( - counterIncrement, - errorIncrement, - timeAccumulator - ); - - return timedExecution.apply(input, processor); - } - } - - /// Creates a fluent builder for JSON pipelines. - /// - /// @return A new JsonPipelineBuilder - public JsonPipelineBuilder jsonPipelineBuilder() { - return new JsonPipelineBuilder(); - } - - /// Creates a fluent builder for Avro pipelines. - /// - /// @param schemaKey The key of the schema to use - /// @return A new AvroPipelineBuilder - public AvroPipelineBuilder avroPipelineBuilder(final String schemaKey) { - return new AvroPipelineBuilder(schemaKey); - } - - /// Creates a fluent builder for Avro pipelines with a custom schema offset. - /// - /// @param schemaKey The key of the schema to use - /// @param offset The schema offset in the byte array - /// @return A new AvroPipelineBuilder - public AvroPipelineBuilder avroPipelineBuilder(final String schemaKey, final int offset) { - return new AvroPipelineBuilder(schemaKey, offset); - } - - /// Creates a fluent builder for POJO pipelines. - /// - /// @param The POJO type - /// @param clazz The class of the POJO - /// @return A new PojoPipelineBuilder - public PojoPipelineBuilder pojoPipelineBuilder(final Class clazz) { - return new PojoPipelineBuilder<>(clazz); - } - - /// A fluent builder for creating type-safe JSON processing pipelines. - /// Fluent builder for creating type-safe JSON processing pipelines. - public class JsonPipelineBuilder { - - private final List>> operators = new ArrayList<>(); - - /// Constructs a new JsonPipelineBuilder. - public JsonPipelineBuilder() {} - - /// Adds a JSON operator to the pipeline. - /// - /// @param operator the operator to add - /// @return this builder instance - public JsonPipelineBuilder add(final UnaryOperator> operator) { - operators.add(operator); - return this; + public Map getMetrics() { + final long count = invocationCount.sum(); + final long errors = errorCount.sum(); + final long timeNs = totalProcessingTimeNs.sum(); + final var metrics = new ConcurrentHashMap(); + metrics.put("invocationCount", count); + metrics.put("errorCount", errors); + metrics.put("averageProcessingTimeMs", count > 0 ? (timeNs / count) / 1_000_000.0 : 0); + return metrics; } - /// Adds a pre-registered JSON operator by key. - /// - /// @param key the registry key for the operator - /// @return this builder instance - public JsonPipelineBuilder add(final RegistryKey> key) { - final var operator = getOperator(key); - if (operator != null) operators.add(operator); - return this; + public UnaryOperator wrapOperator(final UnaryOperator operator) { + return input -> { + final var start = System.nanoTime(); + try { + final var result = operator.apply(input); + invocationCount.increment(); + totalProcessingTimeNs.add(System.nanoTime() - start); + return result; + } catch (final Exception e) { + errorCount.increment(); + throw e; + } + }; } - /// Builds the pipeline as a function from byte[] to byte[]. - /// - /// @return the composed pipeline function - public Function build() { - final var finalCombined = operators.stream().reduce(obj -> obj, (acc, op) -> t -> op.apply(acc.apply(t))); - return bytes -> JsonMessageProcessor.inScopedCaches(() -> JsonMessageProcessor.processJson(bytes, finalCombined)); + public MessageSink wrapSink(final MessageSink sink) { + return input -> { + final var start = System.nanoTime(); + try { + sink.accept(input); + invocationCount.increment(); + totalProcessingTimeNs.add(System.nanoTime() - start); + } catch (final Exception e) { + errorCount.increment(); + throw e; + } + }; } } - /// A fluent builder for creating type-safe Avro processing pipelines. - /// Fluent builder for creating type-safe Avro processing pipelines. - public class AvroPipelineBuilder { - - private final String schemaKey; - private final int offset; - private final List> operators = new ArrayList<>(); - - /// Constructs an AvroPipelineBuilder with the given schema key and default offset. - /// - /// @param schemaKey the schema key to use - private AvroPipelineBuilder(final String schemaKey) { - this(schemaKey, 5); // Default Confluent offset - } - - /// Constructs an AvroPipelineBuilder with the given schema key and offset. - /// - /// @param schemaKey the schema key to use - /// @param offset the offset in the byte array - private AvroPipelineBuilder(final String schemaKey, final int offset) { - this.schemaKey = schemaKey; - this.offset = offset; - } - - /// Adds an Avro operator to the pipeline. - /// - /// @param operator the operator to add - /// @return this builder instance - public AvroPipelineBuilder add(final UnaryOperator operator) { - operators.add(operator); - return this; - } - - /// Adds a pre-registered Avro operator by key. - /// - /// @param key the registry key for the operator - /// @return this builder instance - public AvroPipelineBuilder add(final RegistryKey key) { - final var operator = getOperator(key); - if (operator != null) operators.add(operator); - return this; - } - - /// Builds the pipeline as a function from byte[] to byte[]. - /// - /// @return the composed pipeline function - public Function build() { - final var schema = AvroMessageProcessor.getSchema(schemaKey); - if (schema == null) throw new IllegalArgumentException("Schema not found: " + schemaKey); + private final MessageSinkRegistry sinkRegistry = new MessageSinkRegistry(); - final var finalCombined = operators.stream().reduce(record -> record, (acc, op) -> t -> op.apply(acc.apply(t))); - return bytes -> - AvroMessageProcessor.inScopedCaches(() -> - AvroMessageProcessor.processAvro(bytes, offset, schema, finalCombined) - ); - } + /// Returns the sink registry associated with this processor registry. + /// + /// @return The MessageSinkRegistry instance + public MessageSinkRegistry sinkRegistry() { + return sinkRegistry; } - /// A fluent builder for creating type-safe POJO processing pipelines. + /// Creates a fluent builder for typed pipelines. /// - /// @param The POJO type - public class PojoPipelineBuilder { - - private final Class clazz; - private final List> operators = new ArrayList<>(); - - /// Constructs a new PojoPipelineBuilder for the specified class. - /// - /// @param clazz the POJO class - public PojoPipelineBuilder(final Class clazz) { - this.clazz = Objects.requireNonNull(clazz, "Class cannot be null"); - } - - /// Adds a POJO operator to the pipeline. - /// - /// @param operator the operator to add - /// @return this builder instance - public PojoPipelineBuilder add(final UnaryOperator operator) { - operators.add(operator); - return this; - } - - /// Adds a pre-registered POJO operator by key. - /// - /// @param key the registry key for the operator - /// @return this builder instance - public PojoPipelineBuilder add(final RegistryKey key) { - final var operator = getOperator(key); - if (operator != null) operators.add(operator); - return this; - } - - /// Builds the pipeline as a function from byte[] to byte[]. - /// - /// @return the composed pipeline function - public Function build() { - final var format = MessageFormat.pojo(clazz); - final var finalCombined = operators.stream().reduce(obj -> obj, (acc, op) -> t -> op.apply(acc.apply(t))); - - return bytes -> { - try { - if (bytes == null || bytes.length == 0) return bytes; - final var deserialized = format.deserialize(bytes); - if (deserialized == null) return bytes; - final var processed = finalCombined.apply(deserialized); - return (processed == null) ? bytes : format.serialize(processed); - } catch (final Exception e) { - LOGGER.log(Level.WARNING, "Error in POJO pipeline execution", e); - return defaultErrorValue; - } - }; - } + /// @param format The message format for serialization/deserialization. + /// @param The type of the object in the pipeline. + /// @return A new TypedPipelineBuilder. + public TypedPipelineBuilder pipeline(final MessageFormat format) { + return new TypedPipelineBuilder<>(format, this); } /// Registers a typed operator using a type-safe RegistryKey. @@ -264,7 +114,7 @@ public Function build() { /// @param key The type-safe key to register under /// @param operator The operator to register public void registerOperator(final RegistryKey key, final UnaryOperator operator) { - registry.put(key, new RegistryEntry<>(operator)); + registryMap.put(key, new RegistryEntry<>(operator)); } /// Registers all constants of an Enum that implements UnaryOperator. @@ -283,6 +133,29 @@ public & UnaryOperator> void registerEnum(final Class The type of data the operator processes + /// @param key The type-safe key to retrieve + /// @return The registered operator, or null if not found + @SuppressWarnings("unchecked") + public UnaryOperator wrapOperator(final RegistryKey key, final UnaryOperator operator) { + final var entry = (RegistryEntry>) registryMap.get(key); + if (entry == null) return operator; + return entry.wrapOperator(entry.value); + } + + @SuppressWarnings("unchecked") + public MessageSink wrapSink(final RegistryKey key, final MessageSink sink) { + final var entry = (RegistryEntry>) registryMap.get(key); + if (entry == null) { + final var registeredSink = (MessageSink) sinkRegistry.get(key); + if (registeredSink != null) return registeredSink; + return sink; + } + return entry.wrapSink(entry.value); + } + /// Retrieves a typed operator using a type-safe RegistryKey. /// /// @param The type of data the operator processes @@ -290,8 +163,9 @@ public & UnaryOperator> void registerEnum(final Class UnaryOperator getOperator(final RegistryKey key) { - final var entry = (RegistryEntry>) registry.get(key); - return entry != null ? entry.value : null; + final var entry = (RegistryEntry>) registryMap.get(key); + if (entry == null) return null; + return entry.value; } /// Creates a new registry with JSON as the default message format. @@ -306,22 +180,8 @@ public MessageProcessorRegistry(final String sourceAppName) { /// @param sourceAppName Application name to use as source identifier /// @param messageFormat Message format to use (JSON, AVRO, PROTOBUF) public MessageProcessorRegistry(final String sourceAppName, final MessageFormat messageFormat) { - this(sourceAppName, messageFormat, "{}".getBytes()); - } - - /// Creates a new registry with the specified message format and default error value. - /// - /// @param sourceAppName Application name to use as source identifier - /// @param messageFormat Message format to use (JSON, AVRO, PROTOBUF) - /// @param defaultErrorValue Value to return when processors throw exceptions - public MessageProcessorRegistry( - final String sourceAppName, - final MessageFormat messageFormat, - final byte[] defaultErrorValue - ) { this.sourceAppName = Objects.requireNonNull(sourceAppName, "Source app name cannot be null"); this.messageFormat = Objects.requireNonNull(messageFormat, "Message format cannot be null"); - this.defaultErrorValue = Objects.requireNonNull(defaultErrorValue, "Default error value cannot be null"); registerDefaultProcessors(); } @@ -336,57 +196,14 @@ private void registerDefaultProcessors() { } } - /// Registers schema-specific processors for a given schema key. This is primarily used for - /// AVRO and PROTOBUF formats. - /// - /// @param schemaKey The schema key to register processors for - public void registerSchemaProcessors(String schemaKey) { - // Register schema-specific operators for optimized pipelines - if (messageFormat == MessageFormat.AVRO) { - final var schema = AvroMessageProcessor.getSchema(schemaKey); - if (schema != null) { - registerOperator( - RegistryKey.avro("addSource_" + schemaKey), - AvroMessageProcessor.addFieldOperator("source", sourceAppName) - ); - registerOperator( - RegistryKey.avro("addTimestamp_" + schemaKey), - AvroMessageProcessor.addTimestampOperator("timestamp") - ); - registerOperator( - RegistryKey.avro("markProcessed_" + schemaKey), - AvroMessageProcessor.addFieldOperator("processed", "true") - ); - } - } - } - - /// Registers a processor function with a type-safe key. - /// - /// @param key The type-safe key for the processor - /// @param processor The function that processes byte arrays - /// @throws NullPointerException if key or processor is null - public void register(final RegistryKey> key, final Function processor) { - Objects.requireNonNull(key, "Processor key cannot be null"); - Objects.requireNonNull(processor, "Processor function cannot be null"); - - final var entry = new RegistryEntry<>(withErrorHandling(processor, defaultErrorValue)); - registry.put(key, entry); - } - /// Adds a schema and registers its processors. /// /// @param key The schema key /// @param fullyQualifiedName The fully qualified name of the schema /// @param location The schema location or content public void addSchema(final String key, final String fullyQualifiedName, final String location) { - if (messageFormat == MessageFormat.AVRO) { - // Register the schema with MessageFormat - messageFormat.addSchema(key, fullyQualifiedName, location); - - // Register schema-specific processors - registerSchemaProcessors(key); - } + // Register the schema with MessageFormat + messageFormat.addSchema(key, fullyQualifiedName, location); } /// Unregisters a processor. @@ -394,111 +211,19 @@ public void addSchema(final String key, final String fullyQualifiedName, final S /// @param key The key of the processor to remove /// @return true if the processor was removed, false if it wasn't found public boolean unregister(final RegistryKey key) { - return registry.remove(key) != null; + return registryMap.remove(key) != null; } /// Clears all processors from the registry. public void clear() { - registry.clear(); - } - - /// Gets a processor by key. If no processor is found, returns the identity function. - /// - /// @param key The key of the processor to retrieve - /// @return The processor function, or identity function if not found - @SuppressWarnings("unchecked") - public Function get(final RegistryKey> key) { - final var entry = (RegistryEntry>) registry.get(key); - return entry != null ? entry::execute : Function.identity(); - } - - /// Adds error handling to a processor. - /// - /// When the processor throws an exception, this wrapper catches it, logs the error, and returns - /// the provided default value instead. - /// - /// Example: - /// - /// ```java - /// // Create a processor that might fail - /// final var riskyProcessor = bytes -> { - /// // This might throw exceptions - /// try (final var input = new ByteArrayInputStream(bytes); - /// final var output = new ByteArrayOutputStream()) { - /// final var parsed = DSL_JSON.deserialize(Map.class, input); - /// // Do something with parsed JSON - /// DSL_JSON.serialize(parsed, output); - /// return output.toByteArray(); - /// } catch (Exception e) { - /// throw new RuntimeException("Failed to process JSON", e); - /// } - /// }; - /// - /// // Wrap with error handling - /// final var defaultValue = "{\"error\":true}".getBytes(); - /// final var safeProcessor = MessageProcessorRegistry.withErrorHandling(riskyProcessor, - /// defaultValue); - /// - /// // Now it won't throw exceptions - /// final var result = safeProcessor.apply(inputBytes); - /// ``` - /// - /// @param processor The processor to wrap with error handling - /// @param defaultValue The default value to return on error - /// @return A function that handles errors during processing - public static Function withErrorHandling( - final Function processor, - final byte[] defaultValue - ) { - return RegistryFunctions.withFunctionErrorHandling(processor, defaultValue, LOGGER); - } - - /// Creates a conditional processor that applies one of two processors based on a condition. - /// - /// Example: - /// - /// ```java - /// // Create a condition that checks if input is empty - /// final var isEmpty = (Predicate) bytes -> bytes == null || bytes.length == 0; - /// - /// // Processors for each case - /// final var emptyHandler = (Function) bytes -> "{\"empty\":true}".getBytes(); - /// final var normalProcessor = registry.get(RegistryKey.of("parseJson", Function.class)); - /// - /// // Create conditional processor - /// final var conditionalProcessor = - /// MessageProcessorRegistry.when(isEmpty, emptyHandler, normalProcessor); - /// - /// // Use the conditional processor - /// final var result = conditionalProcessor.apply(inputBytes); - /// ``` - /// - /// @param condition Predicate to evaluate messages - /// @param ifTrue Processor to use when the condition is true - /// @param ifFalse Processor to use when the condition is false - /// @return A function that conditionally processes messages - /// @throws NullPointerException if any argument is null - public static Function when( - final Predicate condition, - final Function ifTrue, - final Function ifFalse - ) { - Objects.requireNonNull(condition, "Condition cannot be null"); - Objects.requireNonNull(ifTrue, "True processor cannot be null"); - Objects.requireNonNull(ifFalse, "False processor cannot be null"); - - return bytes -> condition.test(bytes) ? ifTrue.apply(bytes) : ifFalse.apply(bytes); + registryMap.clear(); } - /// Returns all registered processors. + /// Returns all registered processor keys. /// - /// @return Unmodifiable map of all processor keys and functions - public Map, Function> getAll() { - return RegistryFunctions.createUnmodifiableView(registry, entry -> { - final var regEntry = (RegistryEntry) entry; - if (regEntry.value instanceof Function) return regEntry::execute; - return null; - }); + /// @return Unmodifiable set of all registered processor keys. + public Set> getKeys() { + return Collections.unmodifiableSet(registryMap.keySet()); } /// Gets metrics for a processor. @@ -506,8 +231,8 @@ public Map, Function> getAll() { /// @param key The processor key /// @return Map containing metrics or empty map if processor not found public Map getMetrics(final RegistryKey key) { - final var entry = registry.get(key); + final var entry = registryMap.get(key); if (entry == null) return Map.of(); - return RegistryFunctions.createMetrics(entry.invocationCount, entry.errorCount, entry.totalProcessingTimeMs); + return entry.getMetrics(); } } diff --git a/lib/src/main/java/org/kpipe/registry/MessageSinkRegistry.java b/lib/src/main/java/org/kpipe/registry/MessageSinkRegistry.java index 362fdf3..9cb9673 100644 --- a/lib/src/main/java/org/kpipe/registry/MessageSinkRegistry.java +++ b/lib/src/main/java/org/kpipe/registry/MessageSinkRegistry.java @@ -2,13 +2,10 @@ import java.lang.System.Logger; import java.lang.System.Logger.Level; -import java.time.Duration; import java.util.Map; import java.util.Objects; import java.util.concurrent.ConcurrentHashMap; -import java.util.function.Consumer; -import java.util.function.Supplier; -import org.apache.kafka.clients.consumer.ConsumerRecord; +import java.util.concurrent.atomic.LongAdder; import org.kpipe.sink.AvroConsoleSink; import org.kpipe.sink.JsonConsoleSink; import org.kpipe.sink.MessageSink; @@ -30,83 +27,66 @@ public class MessageSinkRegistry { private static final Logger LOGGER = System.getLogger(MessageSinkRegistry.class.getName()); - private final ConcurrentHashMap, SinkEntry> registry = new ConcurrentHashMap<>(); + private final ConcurrentHashMap, SinkEntry> registry = new ConcurrentHashMap<>(); /// Pre-defined key for the JSON logging sink. public static final RegistryKey JSON_LOGGING = RegistryKey.of("jsonLogging", byte[].class); /// Pre-defined key for the Avro logging sink. public static final RegistryKey AVRO_LOGGING = RegistryKey.of("avroLogging", byte[].class); - private static class SinkEntry { + private static class SinkEntry { - final MessageSink sink; - final Class keyType; - final Class valueType; - long messageCount = 0; - long errorCount = 0; - long totalProcessingTimeMs = 0; + final MessageSink sink; + final Class valueType; + final LongAdder invocationCount = new LongAdder(); + final LongAdder errorCount = new LongAdder(); + final LongAdder totalProcessingTimeNs = new LongAdder(); - SinkEntry(final MessageSink sink, final Class keyType, final Class valueType) { + SinkEntry(final MessageSink sink, final Class valueType) { this.sink = sink; - this.keyType = keyType; this.valueType = valueType; } - public void send(final ConsumerRecord record, final V processedValue) { - final Supplier counterIncrement = () -> messageCount++; - final Supplier errorIncrement = () -> errorCount++; - final Consumer timeAccumulator = duration -> totalProcessingTimeMs += duration.toMillis(); - - final var timedExecution = RegistryFunctions.timedExecution( - counterIncrement, - errorIncrement, - timeAccumulator - ); - - timedExecution.apply( - processedValue, - (V value) -> { - sink.send(record, value); - return null; - } - ); + public Map getMetrics() { + final long count = invocationCount.sum(); + final long errors = errorCount.sum(); + final long timeNs = totalProcessingTimeNs.sum(); + final var metrics = new ConcurrentHashMap(); + metrics.put("invocationCount", count); + metrics.put("errorCount", errors); + metrics.put("averageProcessingTimeMs", count > 0 ? (timeNs / count) / 1_000_000.0 : 0); + return metrics; + } + + public void accept(final T processedValue) { + final var start = System.nanoTime(); + try { + sink.accept(processedValue); + invocationCount.increment(); + totalProcessingTimeNs.add(System.nanoTime() - start); + } catch (final Exception e) { + errorCount.increment(); + throw e; + } } } - /// Constructs a new `MessageSinkRegistry` object with a default logging sink. - /// - /// Example: - /// - /// ```java - /// // Create a new registry with the default logging sink - /// final var registry = new MessageSinkRegistry(); - /// ``` + /// Constructs a new `MessageSinkRegistry` object with default logging sinks. public MessageSinkRegistry() { - register(JSON_LOGGING, byte[].class, new JsonConsoleSink<>()); - register(AVRO_LOGGING, byte[].class, new AvroConsoleSink<>()); + register(JSON_LOGGING, new JsonConsoleSink<>()); + register(AVRO_LOGGING, new AvroConsoleSink<>()); } - /// Registers a new message sink with the specified key and types. - /// - /// Example: - /// - /// ```java - /// // Register a custom database sink with explicit types - /// final var dbKey = RegistryKey.json("database"); - /// registry.register(dbKey, String.class, new DatabaseSink<>()); - /// ``` + /// Registers a new message sink with the specified key. /// /// @param key The type-safe key for the sink - /// @param keyType The class representing the message key type /// @param sink The sink implementation to register - /// @param The type of message key - /// @param The type of message value - public void register(final RegistryKey key, final Class keyType, final MessageSink sink) { + /// @param The type of message value + public void register(final RegistryKey key, final MessageSink sink) { Objects.requireNonNull(key, "Sink key cannot be null"); Objects.requireNonNull(sink, "Sink implementation cannot be null"); - Objects.requireNonNull(keyType, "Key type cannot be null"); - final var entry = new SinkEntry<>(sink, keyType, key.type()); + final var entry = new SinkEntry<>(sink, key.type()); registry.put(key, entry); } @@ -119,66 +99,35 @@ public boolean unregister(final RegistryKey key) { } /// Removes all registered sinks. - /// - /// Example: - /// - /// ```java - /// // Clear the registry when shutting down - /// registry.clear(); - /// ``` public void clear() { registry.clear(); } - /// Retrieves a sink by key from the registry, verifying the expected types. + /// Retrieves a sink by key from the registry. /// - /// Example: - /// - /// ```java - /// // Get the logging sink with type verification - /// MessageSink> consoleSink = registry.get( - /// RegistryKey.json("logging"), String.class); - /// ``` - /// - /// @param the type of the message key - /// @param the type of the message value + /// @param the type of the message value /// @param key the type-safe key of the sink to retrieve - /// @param keyType the expected class of the message key /// @return the sink, or null if not found - /// @throws IllegalArgumentException if the registered types do not match the requested types @SuppressWarnings("unchecked") - public MessageSink get(final RegistryKey key, final Class keyType) { - final var entry = (SinkEntry) registry.get(key); + public MessageSink get(final RegistryKey key) { + final var entry = (SinkEntry) registry.get(key); if (entry == null) return null; - - if (!entry.keyType.isAssignableFrom(keyType)) { - throw new IllegalArgumentException( - "Key type mismatch for sink '" + - key.name() + - "'. Registered: " + - entry.keyType.getSimpleName() + - ", Requested: " + - keyType.getSimpleName() - ); - } - return entry::send; + return entry::accept; } - /// Creates a composite sink that sends messages to multiple sinks identified by keys. + /// Creates a composite sink that sends objects to multiple sinks identified by keys. /// - /// @param keyType The class representing the message key type /// @param sinkKeys Keys of sinks to include in the composite - /// @param The type of message key - /// @param The type of message value + /// @param The type of the processed object /// @return A composite sink that delegates to all specified sinks @SafeVarargs - public final MessageSink pipeline(final Class keyType, final RegistryKey... sinkKeys) { - return (record, processedValue) -> { + public final MessageSink pipeline(final RegistryKey... sinkKeys) { + return processedValue -> { for (final var key : sinkKeys) { - final var sink = this.get(key, keyType); + final var sink = this.get(key); if (sink != null) { try { - sink.send(record, processedValue); + sink.accept(processedValue); } catch (final Exception e) { LOGGER.log(Level.WARNING, "Error sending to sink: %s".formatted(key.name()), e); } @@ -191,17 +140,10 @@ public final MessageSink pipeline(final Class keyType, final Reg /// /// @return Unmodifiable map of all sink keys and their class names public Map, String> getAll() { - return RegistryFunctions.createUnmodifiableView( - registry, - entry -> { - final var sinkEntry = (SinkEntry) entry; - return "%s(%s, %s)".formatted( - sinkEntry.sink.getClass().getSimpleName(), - sinkEntry.keyType.getSimpleName(), - sinkEntry.valueType.getSimpleName() - ); - } - ); + return RegistryFunctions.createUnmodifiableView(registry, entry -> { + final var sinkEntry = (SinkEntry) entry; + return "%s(%s)".formatted(sinkEntry.sink.getClass().getSimpleName(), sinkEntry.valueType.getSimpleName()); + }); } /// Gets performance metrics for a specific sink. @@ -211,32 +153,15 @@ public Map, String> getAll() { public Map getMetrics(final RegistryKey key) { final var entry = registry.get(key); if (entry == null) return Map.of(); - return RegistryFunctions.createMetrics(entry.messageCount, entry.errorCount, entry.totalProcessingTimeMs); + return entry.getMetrics(); } /// Wraps a sink with error handling logic that suppresses exceptions. /// - /// Example: - /// - /// ```java - /// // Get a sink that might throw exceptions - /// final var unreliableKey = RegistryKey.json("unreliableSink"); - /// MessageSink> riskySink = registry.get(unreliableKey, - /// String.class); - /// - /// // Wrap it with error handling - /// MessageSink> safeSink = - /// MessageSinkRegistry.withErrorHandling(riskySink); - /// - /// // Safely send messages - /// safeSink.send(record, processedValue); // Won't throw exceptions - /// ``` - /// /// @param sink The sink to wrap with error handling - /// @param The type of message key - /// @param The type of message value + /// @param The type of message value /// @return A sink that handles errors during processing - public static MessageSink withErrorHandling(final MessageSink sink) { - return RegistryFunctions.withConsumerErrorHandling(sink::send, LOGGER)::accept; + public static MessageSink withErrorHandling(final MessageSink sink) { + return RegistryFunctions.withConsumerErrorHandling(sink::accept, LOGGER)::accept; } -} // end MessageSinkRegistry +} diff --git a/lib/src/main/java/org/kpipe/registry/RegistryFunctions.java b/lib/src/main/java/org/kpipe/registry/RegistryFunctions.java index 8f3da94..f560157 100644 --- a/lib/src/main/java/org/kpipe/registry/RegistryFunctions.java +++ b/lib/src/main/java/org/kpipe/registry/RegistryFunctions.java @@ -71,12 +71,29 @@ public static Function withFunctionErrorHandling( /// Creates a consumer wrapper that suppresses exceptions thrown by the wrapped consumer, /// logging them instead of propagating. /// + /// @param the type of the input to the consumer + /// @param operation the consumer to wrap with error handling + /// @param logger the logger instance to use for logging exceptions + /// @return a consumer that executes the operation but suppresses and logs any exceptions + public static Consumer withConsumerErrorHandling(final Consumer operation, final Logger logger) { + return input -> { + try { + operation.accept(input); + } catch (final Exception e) { + logger.log(Level.WARNING, "Error in operation", e); + } + }; + } + + /// Creates a bi-consumer wrapper that suppresses exceptions thrown by the wrapped consumer, + /// logging them instead of propagating. + /// /// @param the type of the first input to the consumer /// @param the type of the second input to the consumer /// @param operation the consumer to wrap with error handling /// @param logger the logger instance to use for logging exceptions /// @return a consumer that executes the operation but suppresses and logs any exceptions - public static BiConsumer withConsumerErrorHandling( + public static BiConsumer withBiConsumerErrorHandling( final BiConsumer operation, final Logger logger ) { diff --git a/lib/src/main/java/org/kpipe/registry/RegistryKey.java b/lib/src/main/java/org/kpipe/registry/RegistryKey.java index 0e7257b..c5028a3 100644 --- a/lib/src/main/java/org/kpipe/registry/RegistryKey.java +++ b/lib/src/main/java/org/kpipe/registry/RegistryKey.java @@ -45,13 +45,13 @@ public static RegistryKey avro(final String name) { return of(name, GenericRecord.class); } - /// Convenience factory for POJO/Record keys. + /// Convenience factory for sink keys. /// - /// @param The POJO type - /// @param name The unique name of the registry entry - /// @param type The POJO class - /// @return A new RegistryKey for the specified POJO type - public static RegistryKey pojo(final String name, final Class type) { + /// @param The type of the processed object the sink accepts + /// @param name The unique name of the sink + /// @param type The class representing the type + /// @return A new type-safe RegistryKey for a sink + public static RegistryKey sink(final String name, final Class type) { return new RegistryKey<>(name, type); } } diff --git a/lib/src/main/java/org/kpipe/registry/TypedPipelineBuilder.java b/lib/src/main/java/org/kpipe/registry/TypedPipelineBuilder.java new file mode 100644 index 0000000..c2b70a2 --- /dev/null +++ b/lib/src/main/java/org/kpipe/registry/TypedPipelineBuilder.java @@ -0,0 +1,163 @@ +package org.kpipe.registry; + +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; +import java.util.function.Predicate; +import java.util.function.UnaryOperator; +import org.kpipe.sink.MessageSink; + +/// A generic builder for creating type-safe [MessagePipeline] instances. +/// +/// @param The type of the object in the pipeline. +public final class TypedPipelineBuilder { + + private final MessageFormat format; + private final List> operators = new ArrayList<>(); + private final MessageProcessorRegistry registry; + private MessageSink sink; + private int skipBytes = 0; + + /// Creates a new TypedPipelineBuilder. + /// + /// @param format The message format for serialization/deserialization. + /// @param registry The registry for looking up operators and sinks. + public TypedPipelineBuilder(MessageFormat format, MessageProcessorRegistry registry) { + this.format = Objects.requireNonNull(format, "format cannot be null"); + this.registry = Objects.requireNonNull(registry, "registry cannot be null"); + } + + /// Configures the pipeline to skip a certain number of bytes before deserialization. + /// + /// Useful for wire formats that include magic bytes or schema IDs (e.g. Confluent Magic Bytes). + /// + /// @param skipBytes The number of bytes to skip. + /// @return This builder. + public TypedPipelineBuilder skipBytes(int skipBytes) { + this.skipBytes = skipBytes; + return this; + } + + /// Adds a transformation operator to the pipeline. + /// + /// @param operator The operator to add. + /// @return This builder. + public TypedPipelineBuilder add(UnaryOperator operator) { + operators.add(Objects.requireNonNull(operator, "operator cannot be null")); + return this; + } + + /// Adds a transformation operator from the registry. + /// + /// @param key The registry key for the operator. + /// @return This builder. + public TypedPipelineBuilder add(RegistryKey key) { + final var operator = registry.getOperator(key); + return add(registry.wrapOperator(key, operator != null ? operator : t -> t)); + } + + /// Adds multiple transformation operators from the registry. + /// + /// @param keys The registry keys for the operators. + /// @return This builder. + @SafeVarargs + public final TypedPipelineBuilder add(RegistryKey... keys) { + for (final var key : keys) { + add(key); + } + return this; + } + + /// Adds a conditional operator to the pipeline. + /// + /// @param condition The predicate to evaluate. + /// @param ifTrue The operator to apply if the condition is true. + /// @param ifFalse The operator to apply if the condition is false. + /// @return This builder. + public TypedPipelineBuilder when(Predicate condition, UnaryOperator ifTrue, UnaryOperator ifFalse) { + Objects.requireNonNull(condition, "condition cannot be null"); + Objects.requireNonNull(ifTrue, "ifTrue operator cannot be null"); + Objects.requireNonNull(ifFalse, "ifFalse operator cannot be null"); + + return add(obj -> condition.test(obj) ? ifTrue.apply(obj) : ifFalse.apply(obj)); + } + + /// Sets a terminal sink for the pipeline. + /// + /// @param sink The sink to add. + /// @return This builder. + public TypedPipelineBuilder toSink(MessageSink sink) { + this.sink = Objects.requireNonNull(sink, "sink cannot be null"); + return this; + } + + /// Sets a terminal sink for the pipeline from the registry. + /// + /// @param key The registry key for the sink. + /// @return This builder. + public TypedPipelineBuilder toSink(RegistryKey key) { + return toSink(registry.wrapSink(key, t -> {})); + } + + /// Composes a sequence of registry keys into a single sink. + /// + /// @param sinkKeys The registry keys for the sinks. + /// @return This builder. + @SafeVarargs + public final TypedPipelineBuilder toSink(RegistryKey... sinkKeys) { + return toSink(registry.sinkRegistry().pipeline(sinkKeys)); + } + + /// Builds the [MessagePipeline]. + /// + /// @return A new MessagePipeline instance. + public MessagePipeline build() { + final var pipelineOperators = List.copyOf(operators); + final var pipelineSink = this.sink; + final var bytesToSkip = this.skipBytes; + + return new MessagePipeline() { + @Override + public T deserialize(byte[] data) { + if (data == null) { + return null; + } + if (bytesToSkip > 0) { + if (data.length <= bytesToSkip) { + return null; + } + final var actualData = new byte[data.length - bytesToSkip]; + System.arraycopy(data, bytesToSkip, actualData, 0, actualData.length); + return format.deserialize(actualData); + } + return format.deserialize(data); + } + + @Override + public byte[] serialize(T data) { + if (data == null) { + return null; + } + return format.serialize(data); + } + + @Override + public T process(T data) { + if (data == null) { + return null; + } + var current = data; + for (final var operator : pipelineOperators) { + current = operator.apply(current); + if (current == null) { + return null; + } + } + if (pipelineSink != null) { + pipelineSink.accept(current); + } + return current; + } + }; + } +} diff --git a/lib/src/main/java/org/kpipe/sink/AvroConsoleSink.java b/lib/src/main/java/org/kpipe/sink/AvroConsoleSink.java index 0a05fc6..3609cbb 100644 --- a/lib/src/main/java/org/kpipe/sink/AvroConsoleSink.java +++ b/lib/src/main/java/org/kpipe/sink/AvroConsoleSink.java @@ -14,15 +14,13 @@ import org.apache.avro.generic.GenericRecord; import org.apache.avro.io.DecoderFactory; import org.apache.avro.io.EncoderFactory; -import org.apache.kafka.clients.consumer.ConsumerRecord; import org.kpipe.processor.AvroMessageProcessor; -/// A sink that logs processed Kafka messages with Avro formatting. +/// A sink that logs processed messages with Avro formatting. /// -/// @param The type of message key -/// @param The type of message value +/// @param The type of message to log /// @param schema The Avro schema used to decode byte array messages -public record AvroConsoleSink(Schema schema) implements MessageSink { +public record AvroConsoleSink(Schema schema) implements MessageSink { private static final DslJson DSL_JSON = new DslJson<>(); private static final Logger LOGGER = System.getLogger(AvroConsoleSink.class.getName()); private static final Level LOG_LEVEL = Level.INFO; @@ -33,35 +31,24 @@ public AvroConsoleSink() { } @Override - public void send(final ConsumerRecord record, final V processedValue) { + public void accept(final T processedValue) { try { if (!LOGGER.isLoggable(LOG_LEVEL)) return; - final var logData = LinkedHashMap.newLinkedHashMap(5); - logData.put("topic", record.topic()); - logData.put("partition", record.partition()); - logData.put("offset", record.offset()); - logData.put("key", String.valueOf(record.key())); + final var logData = LinkedHashMap.newLinkedHashMap(1); logData.put("processedMessage", formatValue(processedValue)); try (final var out = new ByteArrayOutputStream()) { DSL_JSON.serialize(logData, out); LOGGER.log(LOG_LEVEL, out.toString(StandardCharsets.UTF_8)); } catch (final IOException e) { - LOGGER.log( - Level.WARNING, - "Failed to process message (topic=%s, partition=%d, offset=%d)".formatted( - record.topic(), - record.partition(), - record.offset() - ) - ); + LOGGER.log(Level.WARNING, "Failed to serialize log data"); } } catch (final Exception e) { LOGGER.log(Level.ERROR, "Error in AvroConsoleSink while processing message", e); } } - private String formatValue(final V value) { + private String formatValue(final T value) { if (value == null) return "null"; if (value instanceof byte[] bytes) { if (bytes.length == 0) return "empty"; diff --git a/lib/src/main/java/org/kpipe/sink/CompositeMessageSink.java b/lib/src/main/java/org/kpipe/sink/CompositeMessageSink.java index ba65de6..d9bfc8c 100644 --- a/lib/src/main/java/org/kpipe/sink/CompositeMessageSink.java +++ b/lib/src/main/java/org/kpipe/sink/CompositeMessageSink.java @@ -3,36 +3,28 @@ import java.lang.System.Logger; import java.lang.System.Logger.Level; import java.util.List; -import org.apache.kafka.clients.consumer.ConsumerRecord; -/// A [MessageSink] that broadcasts records to multiple other sinks. +/// A [MessageSink] that broadcasts processed objects to multiple other sinks. /// -/// This allows a single processing pipeline to deliver results to multiple destinations -/// (e.g., a database and a logging console) simultaneously. -/// -/// Failures in one sink do not prevent other sinks from receiving the record. -/// Errors are caught and logged using [System.Logger]. -/// -/// @param the type of the record key -/// @param the type of the processed value -/// @param sinks the list of sinks to which records will be broadcast -public record CompositeMessageSink(List> sinks) implements MessageSink { +/// @param the type of the processed object +/// @param sinks the list of sinks to which objects will be broadcast +public record CompositeMessageSink(List> sinks) implements MessageSink { private static final Logger LOGGER = System.getLogger(CompositeMessageSink.class.getName()); /// Constructs a CompositeMessageSink with the specified list of sinks. /// - /// @param sinks the list of sinks to which records will be broadcast + /// @param sinks the list of sinks to which objects will be broadcast public CompositeMessageSink { sinks = List.copyOf(sinks); } @Override - public void send(final ConsumerRecord record, final V processedValue) { + public void accept(final T processedValue) { for (final var sink : sinks) { try { - sink.send(record, processedValue); + sink.accept(processedValue); } catch (final Exception e) { - LOGGER.log(Level.ERROR, "Sink " + sink.getClass().getSimpleName() + " failed to process record", e); + LOGGER.log(Level.ERROR, "Sink " + sink.getClass().getSimpleName() + " failed to process value", e); } } } diff --git a/lib/src/main/java/org/kpipe/sink/JsonConsoleSink.java b/lib/src/main/java/org/kpipe/sink/JsonConsoleSink.java index 12992cb..c3e457e 100644 --- a/lib/src/main/java/org/kpipe/sink/JsonConsoleSink.java +++ b/lib/src/main/java/org/kpipe/sink/JsonConsoleSink.java @@ -10,62 +10,31 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; -import org.apache.kafka.clients.consumer.ConsumerRecord; -/// A sink that logs processed Kafka messages with JSON formatting. +/// A sink that logs processed messages with JSON formatting. /// -///

This implementation of {@link MessageSink} provides logging functionality for Kafka messages -/// and their processed values. It formats the message content as JSON for better readability and -/// debugging. The sink handles various message value types, with special treatment for byte arrays. -/// -///

Features: -/// -///

    -///
  • JSON formatting of message metadata and content -///
  • Special handling for byte arrays (attempts UTF-8 decoding) -///
  • Automatic detection of JSON-looking content in byte arrays -///
  • For JSON objects, parse + reserialize into normalized JSON output -///
  • For JSON arrays, parse + reserialize into normalized JSON output -///
  • Fallback to raw UTF-8 string when JSON parsing fails -///
  • Performance optimization by checking log level before processing -///
  • Robust error handling that logs exceptions without disrupting the main processing flow -///
-/// -/// @param The type of message key -/// @param The type of message value -public record JsonConsoleSink() implements MessageSink { +/// @param The type of the processed object +public record JsonConsoleSink() implements MessageSink { private static final DslJson DSL_JSON = new DslJson<>(); private static final Logger LOGGER = System.getLogger(JsonConsoleSink.class.getName()); private static final Level LOG_LEVEL = Level.INFO; - /// Logs a message with its key and value. + /// Logs a processed value. /// - /// @param record The original Kafka consumer record /// @param processedValue The value after processing @Override - public void send(final ConsumerRecord record, final V processedValue) { + public void accept(final T processedValue) { try { // Skip if the logging level doesn't require it if (!LOGGER.isLoggable(LOG_LEVEL)) return; - final var logData = LinkedHashMap.newLinkedHashMap(5); - logData.put("topic", record.topic()); - logData.put("partition", record.partition()); - logData.put("offset", record.offset()); - logData.put("key", String.valueOf(record.key())); + final var logData = LinkedHashMap.newLinkedHashMap(1); logData.put("processedMessage", formatValue(processedValue)); try (final var out = new ByteArrayOutputStream()) { DSL_JSON.serialize(logData, out); LOGGER.log(LOG_LEVEL, out.toString(StandardCharsets.UTF_8)); } catch (final IOException e) { - LOGGER.log( - Level.WARNING, - "Failed to processed message (topic=%s, partition=%d, offset=%d)".formatted( - record.topic(), - record.partition(), - record.offset() - ) - ); + LOGGER.log(Level.WARNING, "Failed to serialize log data"); } } catch (final Exception e) { LOGGER.log(Level.ERROR, "Error in ConsoleSink while processing message", e); @@ -73,19 +42,7 @@ public void send(final ConsumerRecord record, final V processedValue) { } /// Formats a value for logging with special handling for different types. - /// - ///

Byte array behavior: - /// - ///

    - ///
  • empty arrays -> `"empty"` - ///
  • JSON objects -> parsed and reserialized for stable JSON formatting - ///
  • JSON arrays -> parsed and reserialized for stable JSON formatting - ///
  • invalid JSON / non-JSON text -> returned as decoded UTF-8 text - ///
- /// - /// @param value The value to format - /// @return A string representation of the value suitable for logging - private String formatValue(final V value) { + private String formatValue(final T value) { if (value == null) return "null"; if (value instanceof byte[] bytes) { if (bytes.length == 0) return "empty"; diff --git a/lib/src/main/java/org/kpipe/sink/MessageSink.java b/lib/src/main/java/org/kpipe/sink/MessageSink.java index b6a2ac4..04a361f 100644 --- a/lib/src/main/java/org/kpipe/sink/MessageSink.java +++ b/lib/src/main/java/org/kpipe/sink/MessageSink.java @@ -1,16 +1,9 @@ package org.kpipe.sink; -import org.apache.kafka.clients.consumer.ConsumerRecord; +import java.util.function.Consumer; -/// Functional interface representing a destination for processed Kafka messages. +/// Functional interface representing a destination for processed messages. /// -/// @param The type of message key -/// @param The type of message value +/// @param The type of the processed object. @FunctionalInterface -public interface MessageSink { - /// Sends a processed message to the sink. - /// - /// @param record The original Kafka record - /// @param processedValue The result of processing the record value - void send(final ConsumerRecord record, final V processedValue); -} +public interface MessageSink extends Consumer {} diff --git a/lib/src/test/java/org/kpipe/consumer/ExternalOffsetIntegrationTest.java b/lib/src/test/java/org/kpipe/consumer/ExternalOffsetIntegrationTest.java index 79440f9..a274d19 100644 --- a/lib/src/test/java/org/kpipe/consumer/ExternalOffsetIntegrationTest.java +++ b/lib/src/test/java/org/kpipe/consumer/ExternalOffsetIntegrationTest.java @@ -210,7 +210,6 @@ void shouldResumeFromPostgresOffset() throws InterruptedException, SQLException ); dbManager.setOffsetInDb(PARTITION, 4L); - final var processedOffsets = new CopyOnWriteArrayList(); final var latch = new CountDownLatch(5); // Expecting 5, 6, 7, 8, 9 // 4. Build KPipeConsumer with the custom DB Offset Manager @@ -221,8 +220,7 @@ void shouldResumeFromPostgresOffset() throws InterruptedException, SQLException .withOffsetManager(dbManager) .withProcessor(val -> val) .withSequentialProcessing(true) - .withMessageSink((record, _) -> { - processedOffsets.add(record.offset()); + .withMessageSink(_ -> { latch.countDown(); }) .build(); @@ -239,10 +237,7 @@ void shouldResumeFromPostgresOffset() throws InterruptedException, SQLException } // 6. Verify processing starts from offset 5 - assertTrue( - latch.await(20, TimeUnit.SECONDS), - "Consumer should process 5 remaining messages. Processed: %s".formatted(processedOffsets) - ); + assertTrue(latch.await(20, TimeUnit.SECONDS), "Consumer should process 5 remaining messages."); // Wait for the last offset to be marked as processed in the DB (polling for stability) long lastDbOffset = -1; @@ -255,8 +250,6 @@ void shouldResumeFromPostgresOffset() throws InterruptedException, SQLException Thread.sleep(100); } - assertEquals(5, processedOffsets.size(), "Should have processed 5 messages"); - assertTrue(processedOffsets.stream().allMatch(o -> o >= 5), "Should only process offsets >= 5"); assertEquals(9L, lastDbOffset, "DB should be updated to offset 9. Actual: %d".formatted(lastDbOffset)); consumer.close(); diff --git a/lib/src/test/java/org/kpipe/consumer/KPipeBackpressureIntegrationTest.java b/lib/src/test/java/org/kpipe/consumer/KPipeBackpressureIntegrationTest.java index da18e1d..ddf1999 100644 --- a/lib/src/test/java/org/kpipe/consumer/KPipeBackpressureIntegrationTest.java +++ b/lib/src/test/java/org/kpipe/consumer/KPipeBackpressureIntegrationTest.java @@ -50,7 +50,7 @@ void shouldPauseKafkaConsumerWhenInFlightExceedsHighWatermark() throws Interrupt .withProperties(properties) .withTopic(TOPIC) .withProcessor(v -> v) - .withMessageSink((record, value) -> { + .withMessageSink(value -> { sinkStarted.countDown(); try { sinkRelease.await(5, TimeUnit.SECONDS); @@ -96,7 +96,7 @@ void shouldAccumulateBackpressureTimeMsAfterResume() throws InterruptedException .withProperties(properties) .withTopic(TOPIC) .withProcessor(v -> v) - .withMessageSink((record, value) -> { + .withMessageSink(value -> { sinkStarted.countDown(); try { sinkRelease.await(5, TimeUnit.SECONDS); @@ -137,7 +137,7 @@ void shouldNotPauseWhenInFlightStaysBelowHighWatermark() throws InterruptedExcep .withProperties(properties) .withTopic(TOPIC) .withProcessor(v -> v) - .withMessageSink((record, value) -> sinkDone.countDown()) + .withMessageSink(value -> sinkDone.countDown()) .withBackpressure(10, 5) .withConsumer(() -> mockConsumer) .build(); @@ -179,7 +179,7 @@ public synchronized void subscribe(final Collection topics, final Consum .withProperties(properties) .withTopic(TOPIC) .withProcessor(v -> v) - .withMessageSink((record, value) -> { + .withMessageSink(value -> { sinkStarted.countDown(); try { sinkRelease.await(5, TimeUnit.SECONDS); diff --git a/lib/src/test/java/org/kpipe/consumer/KPipeConsumerMockingTest.java b/lib/src/test/java/org/kpipe/consumer/KPipeConsumerMockingTest.java index a537548..6d01973 100644 --- a/lib/src/test/java/org/kpipe/consumer/KPipeConsumerMockingTest.java +++ b/lib/src/test/java/org/kpipe/consumer/KPipeConsumerMockingTest.java @@ -1059,7 +1059,7 @@ public TestableKPipeConsumer( final KafkaConsumer mockConsumer, final int maxRetries, final Duration retryBackoff, - final Consumer> errorHandler, + final java.util.function.Consumer> errorHandler, final Queue mockCommandQueue, final KafkaOffsetManager mockOffsetManager ) { diff --git a/lib/src/test/java/org/kpipe/consumer/KPipeConsumerTest.java b/lib/src/test/java/org/kpipe/consumer/KPipeConsumerTest.java index fa11db7..2ed8c63 100644 --- a/lib/src/test/java/org/kpipe/consumer/KPipeConsumerTest.java +++ b/lib/src/test/java/org/kpipe/consumer/KPipeConsumerTest.java @@ -320,7 +320,7 @@ var record = createRecord(1, "k", "v"); consumer.processRecord(record); // Assert - verify(sink).send(eq(record), eq("v-processed")); + verify(sink).accept(eq("v-processed")); consumer.close(); } @@ -351,7 +351,7 @@ void withBackpressureShouldPauseConsumerWhenInFlightExceedsHighWatermark() throw .withProperties(properties) .withTopic(TOPIC) .withProcessor(Function.identity()) - .withMessageSink((record, value) -> { + .withMessageSink(value -> { try { Thread.sleep(500); } catch (InterruptedException e) { @@ -386,7 +386,7 @@ void withBackpressureShouldIncrementPauseCountWhenHighWatermarkExceeded() throws .withProperties(properties) .withTopic(TOPIC) .withProcessor(Function.identity()) - .withMessageSink((record, value) -> { + .withMessageSink(value -> { try { Thread.sleep(500); } catch (InterruptedException e) { diff --git a/lib/src/test/java/org/kpipe/consumer/KPipeInterruptTest.java b/lib/src/test/java/org/kpipe/consumer/KPipeInterruptTest.java index 4242951..479d5eb 100644 --- a/lib/src/test/java/org/kpipe/consumer/KPipeInterruptTest.java +++ b/lib/src/test/java/org/kpipe/consumer/KPipeInterruptTest.java @@ -6,7 +6,6 @@ import java.time.Duration; import java.util.*; import java.util.concurrent.*; -import java.util.function.Consumer; import java.util.function.Function; import org.apache.kafka.clients.consumer.*; import org.junit.jupiter.api.Test; @@ -25,10 +24,10 @@ class KPipeInterruptTest { private KafkaConsumer mockConsumer; @Mock - private MessageSink messageSink; + private MessageSink messageSink; @Mock - private Consumer> errorHandler; + private java.util.function.Consumer> errorHandler; @Mock private KafkaOffsetManager offsetManager; @@ -95,7 +94,7 @@ final var record = new ConsumerRecord<>(topic, 0, 123L, "key", "value"); processingThread.interrupt(); assertTrue(threadFinished.await(1, TimeUnit.SECONDS)); - verify(messageSink, never()).send(any(), any()); + verify(messageSink, never()).accept(any()); verify(errorHandler, never()).accept(any()); assertFalse(hasMarkOffsetProcessed(commandQueue, 123L)); } @@ -126,7 +125,7 @@ final var record = new ConsumerRecord<>(topic, 0, 456L, "key", "value"); assertTrue(done.await(1, TimeUnit.SECONDS)); assertTrue(interruptedFlag.get(1, TimeUnit.SECONDS)); - verify(messageSink, never()).send(any(), any()); + verify(messageSink, never()).accept(any()); verify(errorHandler, never()).accept(any()); assertFalse(hasMarkOffsetProcessed(commandQueue, 456L)); } @@ -142,7 +141,7 @@ final var record = new ConsumerRecord<>(topic, 0, 789L, "key", "value"); consumer.processRecord(record); - verify(messageSink, never()).send(any(), any()); + verify(messageSink, never()).accept(any()); verify(errorHandler, times(1)).accept(any()); assertTrue(hasMarkOffsetProcessed(commandQueue, 789L)); } @@ -158,7 +157,7 @@ final var record = new ConsumerRecord<>(topic, 0, 999L, "key", "value"); consumer.processRecord(record); - verify(messageSink, times(1)).send(record, "processed"); + verify(messageSink, times(1)).accept("processed"); verify(errorHandler, never()).accept(any()); assertTrue(hasMarkOffsetProcessed(commandQueue, 999L)); } diff --git a/lib/src/test/java/org/kpipe/consumer/KPipeSequentialBackpressureIntegrationTest.java b/lib/src/test/java/org/kpipe/consumer/KPipeSequentialBackpressureIntegrationTest.java new file mode 100644 index 0000000..1fe15cd --- /dev/null +++ b/lib/src/test/java/org/kpipe/consumer/KPipeSequentialBackpressureIntegrationTest.java @@ -0,0 +1,124 @@ +package org.kpipe.consumer; + +import static org.junit.jupiter.api.Assertions.*; + +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.concurrent.atomic.AtomicLong; +import org.apache.kafka.clients.consumer.ConsumerRebalanceListener; +import org.apache.kafka.clients.consumer.ConsumerRecord; +import org.apache.kafka.clients.consumer.MockConsumer; +import org.apache.kafka.common.TopicPartition; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +class KPipeSequentialBackpressureIntegrationTest { + + private static final String TOPIC = "test-topic"; + private static final TopicPartition PARTITION = new TopicPartition(TOPIC, 0); + + private Properties properties; + + @BeforeEach + void setUp() { + properties = new Properties(); + properties.put("bootstrap.servers", "localhost:9092"); + properties.put("group.id", "test-group"); + properties.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); + properties.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); + } + + @Test + void shouldPauseWhenLagExceedsHighWatermarkInSequentialMode() throws InterruptedException { + // Arrange: 10 records in Kafka, highWatermark=5 + final var mockConsumer = new MockConsumer("earliest") { + @Override + public synchronized void subscribe(final Collection topics) {} + + @Override + public synchronized void subscribe(final Collection topics, final ConsumerRebalanceListener callback) {} + }; + mockConsumer.assign(List.of(PARTITION)); + mockConsumer.updateBeginningOffsets(Map.of(PARTITION, 0L)); + + // Initial 10 records + for (int i = 0; i < 10; i++) { + mockConsumer.addRecord(new ConsumerRecord<>(TOPIC, 0, i, "k" + i, "v" + i)); + } + mockConsumer.updateEndOffsets(Map.of(PARTITION, 10L)); + + final var processedCount = new AtomicLong(0); + + final var consumer = KPipeConsumer.builder() + .withProperties(properties) + .withTopic(TOPIC) + .withProcessor(v -> { + processedCount.incrementAndGet(); + try { + // Slow down processing to allow backpressure loop to see the lag + Thread.sleep(200); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + return v; + }) + // Low watermark=2, High watermark=5 + .withBackpressure(5, 2) + .withSequentialProcessing(true) + .withConsumer(() -> mockConsumer) + .build(); + + // Act + consumer.start(); + + // At this point, records are being processed one by one. + // After the first record is polled and starts processing, position moves to 10 in MockConsumer + // (if it polls all) + // Wait for pause + awaitCondition(() -> !mockConsumer.paused().isEmpty(), 5000); + + // Assert: consumer is paused + assertTrue(mockConsumer.paused().contains(PARTITION), "Consumer should be paused due to high lag"); + assertTrue(consumer.getMetrics().get(KPipeConsumer.METRIC_BACKPRESSURE_PAUSE_COUNT) >= 1); + + // Wait for resume. As it processes records, position will stay the same in MockConsumer + // unless we manually update it OR we rely on how MockConsumer.poll() works. + // Actually KPipeConsumer.calculateTotalLag calls consumer.position(tp). + // In MockConsumer, position(tp) returns the offset of the next record to be returned by poll(). + + // To simulate progress, we might need a more sophisticated mock or just verify it pauses. + // Given the difficulty of MockConsumer with position in KPipe's thread model, + // verifying it pauses on high lag is already a significant proof of the new logic. + + consumer.close(); + } + + private void awaitCondition(final BooleanSupplier condition, final long timeoutMs) throws InterruptedException { + final long deadline = System.currentTimeMillis() + timeoutMs; + while (!condition.getAsBoolean() && System.currentTimeMillis() < deadline) { + Thread.sleep(100); + } + } + + @Test + void testMockConsumerLag() { + final var mc = new MockConsumer("earliest"); + mc.assign(List.of(PARTITION)); + mc.updateBeginningOffsets(Map.of(PARTITION, 0L)); + mc.addRecord(new ConsumerRecord<>(TOPIC, 0, 0, "k", "v")); + mc.addRecord(new ConsumerRecord<>(TOPIC, 0, 1, "k", "v")); + mc.updateEndOffsets(Map.of(PARTITION, 10L)); + + assertEquals(0, mc.position(PARTITION)); + mc.poll(java.time.Duration.ZERO); + assertEquals(2, mc.position(PARTITION)); + assertEquals(10L, mc.endOffsets(List.of(PARTITION)).get(PARTITION)); + } + + @FunctionalInterface + interface BooleanSupplier { + boolean getAsBoolean() throws InterruptedException; + } +} diff --git a/lib/src/test/java/org/kpipe/metrics/ProcessorMetricsReporterTest.java b/lib/src/test/java/org/kpipe/metrics/ProcessorMetricsReporterTest.java index fa955c5..b0ddda1 100644 --- a/lib/src/test/java/org/kpipe/metrics/ProcessorMetricsReporterTest.java +++ b/lib/src/test/java/org/kpipe/metrics/ProcessorMetricsReporterTest.java @@ -56,11 +56,7 @@ void setUp() { @Test void shouldCreateFromRegistryWithDefaultReporter() { // Arrange - final var processorMap = new HashMap, Object>(); - for (RegistryKey name : processorNames) { - processorMap.put(name, new Object()); - } - doReturn(processorMap).when(registry).getAll(); + doReturn(processorNames).when(registry).getKeys(); doReturn(testMetrics).when(registry).getMetrics(any(RegistryKey.class)); metricsReporter = ProcessorMetricsReporter.forRegistry(registry); @@ -72,11 +68,7 @@ void shouldCreateFromRegistryWithDefaultReporter() { @Test void shouldCreateFromRegistryWithCustomReporter() { // Arrange - final var processorMap = new HashMap, Object>(); - for (final var name : processorNames) { - processorMap.put(name, new Object()); - } - doReturn(processorMap).when(registry).getAll(); + doReturn(processorNames).when(registry).getKeys(); doReturn(testMetrics).when(registry).getMetrics(any(RegistryKey.class)); metricsReporter = ProcessorMetricsReporter.forRegistry(registry).toConsumer(reporter); @@ -163,11 +155,7 @@ void shouldHandleExceptionInMetricsFetcher() { @Test void shouldWorkWithFluentApi() { // Arrange - final var processorMap = new HashMap, Object>(); - for (final var name : processorNames) { - processorMap.put(name, new Object()); - } - doReturn(processorMap).when(registry).getAll(); + doReturn(processorNames).when(registry).getKeys(); doReturn(testMetrics).when(registry).getMetrics(any(RegistryKey.class)); // Act @@ -190,6 +178,6 @@ void shouldSupportSelectiveReporting() { // Assert verify(reporter, times(1)).accept(contains("selected")); - verify(registry, never()).getAll(); + verify(registry, never()).getKeys(); } } diff --git a/lib/src/test/java/org/kpipe/metrics/SinkMetricsReporterTest.java b/lib/src/test/java/org/kpipe/metrics/SinkMetricsReporterTest.java index 362aba7..79122de 100644 --- a/lib/src/test/java/org/kpipe/metrics/SinkMetricsReporterTest.java +++ b/lib/src/test/java/org/kpipe/metrics/SinkMetricsReporterTest.java @@ -42,7 +42,7 @@ class SinkMetricsReporterTest { @BeforeEach void setUp() { sinkKeys = new HashSet<>( - Arrays.asList(RegistryKey.of("sink1", Object.class), RegistryKey.of("sink2", Object.class)) + Arrays.asList(RegistryKey.of("sink1", byte[].class), RegistryKey.of("sink2", byte[].class)) ); testMetrics = new HashMap<>(); @@ -70,7 +70,7 @@ void shouldWorkWithFluentApi() { @Test void shouldSupportSelectiveReporting() { // Arrange - final RegistryKey selectedKey = RegistryKey.of("selected", Object.class); + final RegistryKey selectedKey = RegistryKey.of("selected", byte[].class); final Set> selectedKeys = Collections.singleton(selectedKey); doReturn(testMetrics).when(registry).getMetrics(selectedKey); diff --git a/lib/src/test/java/org/kpipe/processor/AvroMessageProcessorTest.java b/lib/src/test/java/org/kpipe/processor/AvroMessageProcessorTest.java index 42ea654..0ebdf4a 100644 --- a/lib/src/test/java/org/kpipe/processor/AvroMessageProcessorTest.java +++ b/lib/src/test/java/org/kpipe/processor/AvroMessageProcessorTest.java @@ -16,9 +16,14 @@ import org.apache.avro.io.EncoderFactory; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.Test; +import org.kpipe.registry.AvroFormat; +import org.kpipe.registry.MessageFormat; +import org.kpipe.registry.MessageProcessorRegistry; class AvroMessageProcessorTest { + private static final MessageProcessorRegistry REGISTRY = new MessageProcessorRegistry("test-app"); + @AfterEach public void clearSchemaRegistry() { AvroMessageProcessor.clearSchemaRegistry(); @@ -27,8 +32,7 @@ public void clearSchemaRegistry() { @Test void testParseAvroInvalidRecord() { // Arrange - final var schemaJson = - """ + final var schemaJson = """ { "type": "record", "name": "Simple", @@ -41,17 +45,18 @@ void testParseAvroInvalidRecord() { final var schema = AvroMessageProcessor.getSchema("simpleSchema"); // Act - byte[] result = AvroMessageProcessor.processAvro(invalidAvroBytes, schema, record -> record); + final var format = ((AvroFormat) MessageFormat.AVRO).withDefaultSchema("simpleSchema"); + final var pipeline = REGISTRY.pipeline(format).build(); + byte[] result = pipeline.apply(invalidAvroBytes); // Assert - assertEquals(0, result.length); + assertNull(result); } @Test void testSimpleAvroProcessing() throws IOException { // Arrange - final var schemaJson = - """ + final var schemaJson = """ { "type": "record", "name": "Simple", @@ -73,7 +78,9 @@ final var record = new GenericData.Record(schema); AvroMessageProcessor.registerSchema("simpleSchema", schemaJson); // Act - final var result = AvroMessageProcessor.processAvro(avroBytes, schema, record1 -> record1); + final var format = ((AvroFormat) MessageFormat.AVRO).withDefaultSchema("simpleSchema"); + final var pipeline = REGISTRY.pipeline(format).build(); + final var result = pipeline.apply(avroBytes); // Assert assertNotNull(result); @@ -83,8 +90,7 @@ final var record = new GenericData.Record(schema); @Test void testParseAvroValidRecord() throws IOException { // Arrange - final var schemaJson = - """ + final var schemaJson = """ { "type": "record", "name": "Simple", @@ -106,7 +112,9 @@ final var record = new GenericData.Record(schema); AvroMessageProcessor.registerSchema("testSchema", schemaJson); // Act - final var result = AvroMessageProcessor.processAvro(avroBytes, schema, record1 -> record1); + final var format = ((AvroFormat) MessageFormat.AVRO).withDefaultSchema("testSchema"); + final var pipeline = REGISTRY.pipeline(format).build(); + final var result = pipeline.apply(avroBytes); // Assert assertNotNull(result); @@ -120,8 +128,7 @@ final var record = new GenericData.Record(schema); @Test void testAddFieldValidRecord() throws IOException { // Arrange - final var schemaJson = - """ + final var schemaJson = """ { "type": "record", "name": "Simple", @@ -144,15 +151,14 @@ final var record = new GenericData.Record(schema); AvroMessageProcessor.registerSchema("sourceSchema", schemaJson); // Act - final var result = AvroMessageProcessor.processAvro( - avroBytes, - schema, - AvroMessageProcessor.addFieldOperator("source", "test-app") - ); + final var format = ((AvroFormat) MessageFormat.AVRO).withDefaultSchema("sourceSchema"); + final var pipeline = REGISTRY.pipeline(format) + .add(AvroMessageProcessor.addFieldOperator("source", "test-app")) + .build(); + final var result = pipeline.apply(avroBytes); // Assert assertNotNull(result); - assertTrue(result.length > 0); final var reader = new GenericDatumReader(schema); final var inputStream = new ByteArrayInputStream(result); final var resultRecord = reader.read(null, DecoderFactory.get().binaryDecoder(inputStream, null)); @@ -163,8 +169,7 @@ final var record = new GenericData.Record(schema); @Test void testAddTimestamp() throws IOException { // Arrange - final var schemaJson = - """ + final var schemaJson = """ { "type": "record", "name": "Simple", @@ -187,15 +192,12 @@ final var record = new GenericData.Record(schema); AvroMessageProcessor.registerSchema("timestampSchema", schemaJson); // Act - final var result = AvroMessageProcessor.processAvro( - avroBytes, - schema, - AvroMessageProcessor.addTimestampOperator("timestamp") - ); + final var format = ((AvroFormat) MessageFormat.AVRO).withDefaultSchema("timestampSchema"); + final var pipeline = REGISTRY.pipeline(format).add(AvroMessageProcessor.addTimestampOperator("timestamp")).build(); + final var result = pipeline.apply(avroBytes); // Assert assertNotNull(result); - assertTrue(result.length > 0); try (final var inputStream = new ByteArrayInputStream(result)) { final var reader = new GenericDatumReader(schema); GenericRecord resultRecord = reader.read(null, DecoderFactory.get().binaryDecoder(inputStream, null)); @@ -208,8 +210,7 @@ final var record = new GenericData.Record(schema); @Test void testRemoveFields() throws IOException { // Arrange - final var schemaJson = - """ + final var schemaJson = """ { "type": "record", "name": "TestRecord", @@ -235,15 +236,14 @@ final var record = new GenericData.Record(schema); } // Act - byte[] result = AvroMessageProcessor.processAvro( - avroBytes, - schema, - AvroMessageProcessor.removeFieldsOperator(schema, "source", "remove_source") - ); + final var format = ((AvroFormat) MessageFormat.AVRO).withDefaultSchema("testSchema"); + final var pipeline = REGISTRY.pipeline(format) + .add(AvroMessageProcessor.removeFieldsOperator(schema, "source", "remove_source")) + .build(); + byte[] result = pipeline.apply(avroBytes); // Assert assertNotNull(result); - assertTrue(result.length > 0); try (final var inputStream = new ByteArrayInputStream(result)) { final var reader = new GenericDatumReader(schema); final var resultRecord = reader.read(null, DecoderFactory.get().binaryDecoder(inputStream, null)); @@ -256,8 +256,7 @@ final var record = new GenericData.Record(schema); @Test void testTransformField() throws IOException { // Arrange - final var simpleSchemaJson = - """ + final var simpleSchemaJson = """ { "type": "record", "name": "Simple", @@ -277,17 +276,18 @@ final var record = new GenericData.Record(schema); final var avroBytes = outputStream.toByteArray(); // Act - final var result = AvroMessageProcessor.processAvro( - avroBytes, - schema, - AvroMessageProcessor.transformFieldOperator( - schema, - "value", - value -> value instanceof String ? ((String) value).toUpperCase() : value + final var format = ((AvroFormat) MessageFormat.AVRO).withDefaultSchema("transformSchema"); + final var pipeline = REGISTRY.pipeline(format) + .add( + AvroMessageProcessor.transformFieldOperator(schema, "value", value -> + value instanceof String ? ((String) value).toUpperCase() : value + ) ) - ); + .build(); + final var result = pipeline.apply(avroBytes); // Assert + assertNotNull(result); final var reader = new GenericDatumReader(schema); final var resultRecord = reader.read( null, @@ -299,8 +299,7 @@ final var record = new GenericData.Record(schema); @Test void testTransformNumericField() throws IOException { // Arrange - final var simpleSchemaJson = - """ + final var simpleSchemaJson = """ { "type": "record", "name": "Simple", @@ -322,17 +321,18 @@ final var record = new GenericData.Record(schema); final var avroBytes = outputStream.toByteArray(); // Act - final var result = AvroMessageProcessor.processAvro( - avroBytes, - schema, - AvroMessageProcessor.transformFieldOperator( - schema, - "age", - value -> value instanceof Integer ? ((Integer) value) * 2 : value + final var format = ((AvroFormat) MessageFormat.AVRO).withDefaultSchema("ageSchema"); + final var pipeline = REGISTRY.pipeline(format) + .add( + AvroMessageProcessor.transformFieldOperator(schema, "age", value -> + value instanceof Integer ? ((Integer) value) * 2 : value + ) ) - ); + .build(); + final var result = pipeline.apply(avroBytes); // Assert + assertNotNull(result); final var reader = new GenericDatumReader(schema); final var resultRecord = reader.read( null, @@ -344,8 +344,7 @@ final var record = new GenericData.Record(schema); @Test void testTransformUnionField() throws IOException { // Arrange - final var unionSchemaJson = - """ + final var unionSchemaJson = """ { "type": "record", "name": "UnionRecord", @@ -367,17 +366,18 @@ final var record = new GenericData.Record(schema); final var avroBytes = outputStream.toByteArray(); // Act - final var result = AvroMessageProcessor.processAvro( - avroBytes, - schema, - AvroMessageProcessor.transformFieldOperator( - schema, - "comment", - value -> value instanceof String ? ((String) value).toUpperCase() : value + final var format = ((AvroFormat) MessageFormat.AVRO).withDefaultSchema("unionSchema"); + final var pipeline = REGISTRY.pipeline(format) + .add( + AvroMessageProcessor.transformFieldOperator(schema, "comment", value -> + value instanceof String ? ((String) value).toUpperCase() : value + ) ) - ); + .build(); + final var result = pipeline.apply(avroBytes); // Assert + assertNotNull(result); final var reader = new GenericDatumReader(schema); final var resultRecord = reader.read( null, @@ -389,8 +389,7 @@ final var record = new GenericData.Record(schema); @Test void testAddFields() throws IOException { // Arrange - final var schemaJson = - """ + final var schemaJson = """ { "type": "record", "name": "MultiField", @@ -424,19 +423,17 @@ final var record = new GenericData.Record(schema); ); // Act - final var result = AvroMessageProcessor.processAvro( - avroBytes, - schema, - AvroMessageProcessor.addFieldsOperator(fieldsToAdd) - ); + final var format = ((AvroFormat) MessageFormat.AVRO).withDefaultSchema("multiFieldSchema"); + final var pipeline = REGISTRY.pipeline(format).add(AvroMessageProcessor.addFieldsOperator(fieldsToAdd)).build(); + final var result = pipeline.apply(avroBytes); // Assert + assertNotNull(result); final var reader = new GenericDatumReader(schema); final var resultRecord = reader.read( null, DecoderFactory.get().binaryDecoder(new ByteArrayInputStream(result), null) ); - assertEquals(42, resultRecord.get("id")); assertEquals("test-app", resultRecord.get("source").toString()); assertEquals("development", resultRecord.get("environment").toString()); diff --git a/lib/src/test/java/org/kpipe/processor/JsonMessageProcessorTest.java b/lib/src/test/java/org/kpipe/processor/JsonMessageProcessorTest.java index dba74a5..75a31eb 100644 --- a/lib/src/test/java/org/kpipe/processor/JsonMessageProcessorTest.java +++ b/lib/src/test/java/org/kpipe/processor/JsonMessageProcessorTest.java @@ -9,10 +9,13 @@ import java.nio.charset.StandardCharsets; import java.util.Map; import org.junit.jupiter.api.Test; +import org.kpipe.registry.MessageFormat; +import org.kpipe.registry.MessageProcessorRegistry; class JsonMessageProcessorTest { private static final DslJson> DSL_JSON = new DslJson<>(); + private static final MessageProcessorRegistry REGISTRY = new MessageProcessorRegistry("test-app"); private static String normalizeJson(String json) { try ( @@ -30,15 +33,15 @@ private static String normalizeJson(String json) { @Test void testParseJsonValidJson() { // Arrange - final var json = - """ - { - "key":"value" - } - """; + final var json = """ + { + "key":"value" + } + """; // Act - final var result = JsonMessageProcessor.processJson(json.getBytes(StandardCharsets.UTF_8), obj -> obj); + final var pipeline = REGISTRY.pipeline(MessageFormat.JSON).build(); + final var result = pipeline.apply(json.getBytes(StandardCharsets.UTF_8)); // Assert assertEquals(normalizeJson(json), normalizeJson(new String(result, StandardCharsets.UTF_8))); @@ -50,35 +53,36 @@ void testParseJsonInvalidJson() { final var invalidJson = "invalid json"; // Act - final var result = JsonMessageProcessor.processJson(invalidJson.getBytes(StandardCharsets.UTF_8), obj -> obj); + final var pipeline = REGISTRY.pipeline(MessageFormat.JSON).build(); + final var result = pipeline.apply(invalidJson.getBytes(StandardCharsets.UTF_8)); // Assert - assertEquals("{}", new String(result, StandardCharsets.UTF_8)); + assertNull(result); } @Test void testAddFieldValidJson() { // Arrange final var json = """ - { - "key":"value" - } - """; - final var expectedJson = - """ - { - "key":"value", - "newKey":"newValue" - } - """; + { + "key":"value" + } + """; + final var expectedJson = """ + { + "key":"value", + "newKey":"newValue" + } + """; // Act - final var result = JsonMessageProcessor.processJson( - json.getBytes(StandardCharsets.UTF_8), - JsonMessageProcessor.addFieldOperator("newKey", "newValue") - ); + final var pipeline = REGISTRY.pipeline(MessageFormat.JSON) + .add(JsonMessageProcessor.addFieldOperator("newKey", "newValue")) + .build(); + final var result = pipeline.apply(json.getBytes(StandardCharsets.UTF_8)); // Assert + assertNotNull(result); assertEquals(normalizeJson(expectedJson), normalizeJson(new String(result, StandardCharsets.UTF_8))); } @@ -88,30 +92,30 @@ void testAddFieldInvalidJson() { final var invalidJson = "invalid json"; // Act - final var result = JsonMessageProcessor.processJson( - invalidJson.getBytes(StandardCharsets.UTF_8), - JsonMessageProcessor.addFieldOperator("newKey", "newValue") - ); + final var pipeline = REGISTRY.pipeline(MessageFormat.JSON) + .add(JsonMessageProcessor.addFieldOperator("newKey", "newValue")) + .build(); + final var result = pipeline.apply(invalidJson.getBytes(StandardCharsets.UTF_8)); // Assert - assertEquals("{}", new String(result, StandardCharsets.UTF_8)); + assertNull(result); } @Test void testAddFieldNonObjectJson() { // Arrange final var jsonArray = """ - ["value1", "value2"] - """; + ["value1", "value2"] + """; // Act - final var result = JsonMessageProcessor.processJson( - jsonArray.getBytes(StandardCharsets.UTF_8), - JsonMessageProcessor.addFieldOperator("newKey", "newValue") - ); + final var pipeline = REGISTRY.pipeline(MessageFormat.JSON) + .add(JsonMessageProcessor.addFieldOperator("newKey", "newValue")) + .build(); + final var result = pipeline.apply(jsonArray.getBytes(StandardCharsets.UTF_8)); // Assert - assertEquals("{}", new String(result, StandardCharsets.UTF_8)); + assertNull(result); } @Test @@ -120,7 +124,8 @@ void testParseJsonEmptyJson() { final var emptyJson = "{}"; // Act - final var result = JsonMessageProcessor.processJson(emptyJson.getBytes(StandardCharsets.UTF_8), obj -> obj); + final var pipeline = REGISTRY.pipeline(MessageFormat.JSON).build(); + final var result = pipeline.apply(emptyJson.getBytes(StandardCharsets.UTF_8)); // Assert assertEquals(emptyJson, new String(result, StandardCharsets.UTF_8)); @@ -130,28 +135,28 @@ void testParseJsonEmptyJson() { void testParseJsonNullInput() { // Arrange // Act - final var result = JsonMessageProcessor.processJson(null, obj -> obj); + final var pipeline = REGISTRY.pipeline(MessageFormat.JSON).build(); + final var result = pipeline.apply(null); // Assert - assertEquals("{}", new String(result, StandardCharsets.UTF_8)); + assertNull(result); } @Test void testAddFieldToEmptyJson() { // Arrange final var emptyJson = "{}"; - final var expectedJson = - """ - { - "newKey":"newValue" - } - """; + final var expectedJson = """ + { + "newKey":"newValue" + } + """; // Act - final var result = JsonMessageProcessor.processJson( - emptyJson.getBytes(StandardCharsets.UTF_8), - JsonMessageProcessor.addFieldOperator("newKey", "newValue") - ); + final var pipeline = REGISTRY.pipeline(MessageFormat.JSON) + .add(JsonMessageProcessor.addFieldOperator("newKey", "newValue")) + .build(); + final var result = pipeline.apply(emptyJson.getBytes(StandardCharsets.UTF_8)); // Assert assertEquals(normalizeJson(expectedJson), normalizeJson(new String(result, StandardCharsets.UTF_8))); @@ -161,16 +166,16 @@ void testAddFieldToEmptyJson() { void testAddTimestamp() { // Arrange final var json = """ - { - "key":"value" - } - """; + { + "key":"value" + } + """; // Act - final var result = JsonMessageProcessor.processJson( - json.getBytes(StandardCharsets.UTF_8), - JsonMessageProcessor.addTimestampOperator("timestamp") - ); + final var pipeline = REGISTRY.pipeline(MessageFormat.JSON) + .add(JsonMessageProcessor.addTimestampOperator("timestamp")) + .build(); + final var result = pipeline.apply(json.getBytes(StandardCharsets.UTF_8)); final var resultMap = parseJsonToMap(result); // Assert @@ -185,37 +190,36 @@ void testAddTimestampToInvalidJson() { final var invalidJson = "invalid json"; // Act - final var result = JsonMessageProcessor.processJson( - invalidJson.getBytes(StandardCharsets.UTF_8), - JsonMessageProcessor.addTimestampOperator("timestamp") - ); + final var pipeline = REGISTRY.pipeline(MessageFormat.JSON) + .add(JsonMessageProcessor.addTimestampOperator("timestamp")) + .build(); + final var result = pipeline.apply(invalidJson.getBytes(StandardCharsets.UTF_8)); // Assert - assertEquals("{}", new String(result, StandardCharsets.UTF_8)); + assertNull(result); } @Test void testRemoveFields() { // Arrange - final var json = - """ - { - "field1":"value1", - "field2":"value2", - "field3":"value3" - } - """; + final var json = """ + { + "field1":"value1", + "field2":"value2", + "field3":"value3" + } + """; final var expected = """ - { - "field2":"value2" - } - """; + { + "field2":"value2" + } + """; // Act - final var result = JsonMessageProcessor.processJson( - json.getBytes(StandardCharsets.UTF_8), - JsonMessageProcessor.removeFieldsOperator("field1", "field3") - ); + final var pipeline = REGISTRY.pipeline(MessageFormat.JSON) + .add(JsonMessageProcessor.removeFieldsOperator("field1", "field3")) + .build(); + final var result = pipeline.apply(json.getBytes(StandardCharsets.UTF_8)); // Assert assertEquals(normalizeJson(expected), normalizeJson(new String(result, StandardCharsets.UTF_8))); @@ -225,16 +229,16 @@ void testRemoveFields() { void testRemoveNonExistingFields() { // Arrange final var json = """ - { - "field1":"value1" - } - """; + { + "field1":"value1" + } + """; // Act - final var result = JsonMessageProcessor.processJson( - json.getBytes(StandardCharsets.UTF_8), - JsonMessageProcessor.removeFieldsOperator("field2") - ); + final var pipeline = REGISTRY.pipeline(MessageFormat.JSON) + .add(JsonMessageProcessor.removeFieldsOperator("field2")) + .build(); + final var result = pipeline.apply(json.getBytes(StandardCharsets.UTF_8)); // Assert assertEquals(normalizeJson(json), normalizeJson(new String(result, StandardCharsets.UTF_8))); @@ -243,29 +247,28 @@ void testRemoveNonExistingFields() { @Test void testTransformField() { // Arrange - final var json = - """ - { - "message":"hello", - "count":5 - } - """; - final var expected = - """ - { - "message":"HELLO", - "count":5 - } - """; + final var json = """ + { + "message":"hello", + "count":5 + } + """; + final var expected = """ + { + "message":"HELLO", + "count":5 + } + """; // Act - final var result = JsonMessageProcessor.processJson( - json.getBytes(StandardCharsets.UTF_8), - JsonMessageProcessor.transformFieldOperator( - "message", - value -> value instanceof String ? ((String) value).toUpperCase() : value + final var pipeline = REGISTRY.pipeline(MessageFormat.JSON) + .add( + JsonMessageProcessor.transformFieldOperator("message", value -> + value instanceof String ? ((String) value).toUpperCase() : value + ) ) - ); + .build(); + final var result = pipeline.apply(json.getBytes(StandardCharsets.UTF_8)); // Assert assertEquals(normalizeJson(expected), normalizeJson(new String(result, StandardCharsets.UTF_8))); @@ -274,29 +277,28 @@ void testTransformField() { @Test void testTransformNumericField() { // Arrange - final var json = - """ - { - "message":"hello", - "count":5 - } - """; - final var expected = - """ - { - "message":"hello", - "count":10 - } - """; + final var json = """ + { + "message":"hello", + "count":5 + } + """; + final var expected = """ + { + "message":"hello", + "count":10 + } + """; // Act - final var result = JsonMessageProcessor.processJson( - json.getBytes(StandardCharsets.UTF_8), - JsonMessageProcessor.transformFieldOperator( - "count", - value -> value instanceof Number ? ((Number) value).intValue() * 2 : value + final var pipeline = REGISTRY.pipeline(MessageFormat.JSON) + .add( + JsonMessageProcessor.transformFieldOperator("count", value -> + value instanceof Number ? ((Number) value).intValue() * 2 : value + ) ) - ); + .build(); + final var result = pipeline.apply(json.getBytes(StandardCharsets.UTF_8)); // Assert assertEquals(normalizeJson(expected), normalizeJson(new String(result, StandardCharsets.UTF_8))); @@ -306,16 +308,16 @@ void testTransformNumericField() { void testTransformNonExistingField() { // Arrange final var json = """ - { - "message":"hello" - } - """; + { + "message":"hello" + } + """; // Act - final var result = JsonMessageProcessor.processJson( - json.getBytes(StandardCharsets.UTF_8), - JsonMessageProcessor.transformFieldOperator("nonExisting", value -> "transformed") - ); + final var pipeline = REGISTRY.pipeline(MessageFormat.JSON) + .add(JsonMessageProcessor.transformFieldOperator("nonExisting", value -> "transformed")) + .build(); + final var result = pipeline.apply(json.getBytes(StandardCharsets.UTF_8)); // Assert assertEquals(normalizeJson(json), normalizeJson(new String(result, StandardCharsets.UTF_8))); @@ -325,16 +327,16 @@ void testTransformNonExistingField() { void testMergeWith() { // Arrange final var json = """ - { - "original":"value" - } - """; + { + "original":"value" + } + """; // Act - final var result = JsonMessageProcessor.processJson( - json.getBytes(StandardCharsets.UTF_8), - JsonMessageProcessor.mergeWithOperator(Map.of("added1", "value1", "added2", 42)) - ); + final var pipeline = REGISTRY.pipeline(MessageFormat.JSON) + .add(JsonMessageProcessor.mergeWithOperator(Map.of("added1", "value1", "added2", 42))) + .build(); + final var result = pipeline.apply(json.getBytes(StandardCharsets.UTF_8)); final var resultMap = parseJsonToMap(result); // Assert @@ -347,16 +349,16 @@ void testMergeWith() { void testMergeWithOverlappingKeys() { // Arrange final var json = """ - { - "key":"originalValue" - } - """; + { + "key":"originalValue" + } + """; // Act - final var result = JsonMessageProcessor.processJson( - json.getBytes(StandardCharsets.UTF_8), - JsonMessageProcessor.mergeWithOperator(Map.of("key", "newValue")) - ); + final var pipeline = REGISTRY.pipeline(MessageFormat.JSON) + .add(JsonMessageProcessor.mergeWithOperator(Map.of("key", "newValue"))) + .build(); + final var result = pipeline.apply(json.getBytes(StandardCharsets.UTF_8)); final var resultMap = parseJsonToMap(result); // Assert diff --git a/lib/src/test/java/org/kpipe/registry/MessageProcessorRegistryTest.java b/lib/src/test/java/org/kpipe/registry/MessageProcessorRegistryTest.java index b1bd10f..c31e196 100644 --- a/lib/src/test/java/org/kpipe/registry/MessageProcessorRegistryTest.java +++ b/lib/src/test/java/org/kpipe/registry/MessageProcessorRegistryTest.java @@ -3,7 +3,6 @@ import static org.junit.jupiter.api.Assertions.*; import java.util.Map; -import java.util.function.Function; import java.util.function.UnaryOperator; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -22,7 +21,7 @@ void setUp() { void shouldContainDefaultOperators() { // Act var pipeline = registry - .jsonPipelineBuilder() + .pipeline(MessageFormat.JSON) .add(MessageProcessorRegistry.JSON_ADD_SOURCE) .add(MessageProcessorRegistry.JSON_ADD_TIMESTAMP) .add(MessageProcessorRegistry.JSON_MARK_PROCESSED) @@ -40,16 +39,13 @@ void shouldContainDefaultOperators() { void shouldRegisterAndRetrieveJsonOperator() { // Arrange final var key = RegistryKey.json("testOperator"); - registry.registerOperator( - key, - obj -> { - obj.put("test", "value"); - return obj; - } - ); + registry.registerOperator(key, obj -> { + obj.put("test", "value"); + return obj; + }); // Act - var pipeline = registry.jsonPipelineBuilder().add(key).build(); + var pipeline = registry.pipeline(MessageFormat.JSON).add(key).build(); var result = new String(pipeline.apply("{}".getBytes())); // Assert @@ -62,23 +58,17 @@ void shouldComposeJsonOperatorPipeline() { final var op1 = RegistryKey.json("op1"); final var op2 = RegistryKey.json("op2"); - registry.registerOperator( - op1, - obj -> { - obj.put("op1", "val1"); - return obj; - } - ); - registry.registerOperator( - op2, - obj -> { - obj.put("op2", "val2"); - return obj; - } - ); + registry.registerOperator(op1, obj -> { + obj.put("op1", "val1"); + return obj; + }); + registry.registerOperator(op2, obj -> { + obj.put("op2", "val2"); + return obj; + }); // Act - final var pipeline = registry.jsonPipelineBuilder().add(op1).add(op2).build(); + final var pipeline = registry.pipeline(MessageFormat.JSON).add(op1).add(op2).build(); final var result = new String(pipeline.apply("{}".getBytes())); // Assert @@ -89,106 +79,100 @@ void shouldComposeJsonOperatorPipeline() { @Test void shouldHandleErrorsGracefully() { // Arrange - Function processor = message -> { + UnaryOperator> operator = message -> { throw new RuntimeException("Test exception"); }; - final var safeProcessor = MessageProcessorRegistry.withErrorHandling(processor, "fallback".getBytes()); - - // Act - final var result = safeProcessor.apply("any input".getBytes()); + final var pipeline = registry.pipeline(MessageFormat.JSON).add(operator).build(); - // Assert - assertEquals("fallback", new String(result)); + // Act & Assert + assertNull(pipeline.apply("{}".getBytes())); } @Test void shouldApplyConditionBasedProcessing() { // Arrange - Function trueProcessor = message -> "true".getBytes(); - Function falseProcessor = message -> "false".getBytes(); + UnaryOperator> trueOp = message -> { + message.put("result", "true"); + return message; + }; + UnaryOperator> falseOp = message -> { + message.put("result", "false"); + return message; + }; // Act - final var conditionalProcessor = MessageProcessorRegistry.when( - message -> message.length > 5, - trueProcessor, - falseProcessor - ); + final var pipeline = registry + .pipeline(MessageFormat.JSON) + .when(obj -> obj.size() > 0, trueOp, falseOp) + .build(); // Assert - final var longMessage = "123456".getBytes(); - assertArrayEquals("true".getBytes(), conditionalProcessor.apply(longMessage)); + final var nonEmpty = "{\"key\":\"val\"}".getBytes(); + assertTrue(new String(pipeline.apply(nonEmpty)).contains("\"result\":\"true\"")); - // Test condition false - final var shortMessage = "1234".getBytes(); - assertArrayEquals("false".getBytes(), conditionalProcessor.apply(shortMessage)); + final var empty = "{}".getBytes(); + assertTrue(new String(pipeline.apply(empty)).contains("\"result\":\"false\"")); } @Test void shouldTrackRegisteredProcessors() { // Arrange - final var key = RegistryKey.>of("unique" + System.currentTimeMillis(), Function.class); - Function processor = message -> message; + final var key = RegistryKey.json("p1"); + final UnaryOperator> op = obj -> obj; + registry.registerOperator(key, op); // Act - registry.register(key, processor); - Map, Function> allProcessors = registry.getAll(); + var keys = registry.getKeys(); // Assert - assertTrue(allProcessors.containsKey(key)); + assertTrue(keys.contains(key)); } @Test void shouldUnregisterProcessor() { // Arrange - final var key = RegistryKey.>of("processorToRemove", Function.class); - - // Act - registry.register(key, msg -> msg); - - // Assert - assertTrue(registry.getAll().containsKey(key)); + final var key = RegistryKey.json("p1"); + registry.registerOperator(key, obj -> obj); // Act + assertTrue(registry.getKeys().contains(key)); boolean removed = registry.unregister(key); // Assert assertTrue(removed); - assertFalse(registry.getAll().containsKey(key)); + assertFalse(registry.getKeys().contains(key)); } @Test void shouldTrackMetrics() { // Arrange - final var key = RegistryKey.>of("metricsTest", Function.class); - registry.register(key, msg -> msg); + final var key = RegistryKey.json("metricsTest"); + registry.registerOperator(key, obj -> obj); + + final var pipeline = registry.pipeline(MessageFormat.JSON).add(key).build(); // Act - final var processor = registry.get(key); - processor.apply("test".getBytes()); - processor.apply("test2".getBytes()); + pipeline.apply("{}".getBytes()); + pipeline.apply("{}".getBytes()); // Assert final var metrics = registry.getMetrics(key); assertEquals(2L, metrics.get("invocationCount")); - assertEquals(0L, metrics.get("errorCount")); } @Test void shouldRegisterAndRetrieveTypedOperator() { // Arrange final var key = RegistryKey.json("typedOp"); - registry.registerOperator( - key, - obj -> { - obj.put("typed", "success"); - return obj; - } - ); + registry.registerOperator(key, obj -> { + obj.put("typed", "success"); + return obj; + }); // Act final var retrieved = registry.getOperator(key); - final var pipeline = registry.jsonPipelineBuilder().add(key).build(); + final var pipeline = registry.pipeline(MessageFormat.JSON).add(key).build(); final var result = new String(pipeline.apply("{}".getBytes())); // Assert @@ -200,17 +184,14 @@ void shouldRegisterAndRetrieveTypedOperator() { void shouldComposePipelineUsingBuilder() { // Arrange final var key1 = RegistryKey.json("builderOp1"); - registry.registerOperator( - key1, - obj -> { - obj.put("b1", "v1"); - return obj; - } - ); + registry.registerOperator(key1, obj -> { + obj.put("b1", "v1"); + return obj; + }); // Act final var pipeline = registry - .jsonPipelineBuilder() + .pipeline(MessageFormat.JSON) .add(key1) .add(obj -> { obj.put("b2", "v2"); @@ -262,7 +243,7 @@ void shouldRegisterOperatorsFromEnum() { assertNotNull(registry.getOperator(key1)); assertNotNull(registry.getOperator(key2)); - final var pipeline = registry.jsonPipelineBuilder().add(key1).add(key2).build(); + final var pipeline = registry.pipeline(MessageFormat.JSON).add(key1).add(key2).build(); final var result = new String(pipeline.apply("{}".getBytes())); assertTrue(result.contains("\"enum1\":\"v1\"")); diff --git a/lib/src/test/java/org/kpipe/registry/MessageSinkRegistryTest.java b/lib/src/test/java/org/kpipe/registry/MessageSinkRegistryTest.java index d9f26fe..42413ec 100644 --- a/lib/src/test/java/org/kpipe/registry/MessageSinkRegistryTest.java +++ b/lib/src/test/java/org/kpipe/registry/MessageSinkRegistryTest.java @@ -1,14 +1,10 @@ package org.kpipe.registry; import static org.junit.jupiter.api.Assertions.*; -import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.*; -import java.util.concurrent.atomic.AtomicInteger; -import org.apache.kafka.clients.consumer.ConsumerRecord; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; - import org.kpipe.sink.MessageSink; class MessageSinkRegistryTest { @@ -37,16 +33,13 @@ void shouldRegisterAndRetrieveSink() { final var key = RegistryKey.of("testSink", Object.class); // Act - registry.register(key, Object.class, testSink); - final var retrieved = registry.get(key, Object.class); + registry.register(key, testSink); + final var retrieved = registry.get(key); // Assert assertNotNull(retrieved); - - final var record = new ConsumerRecord("topic", 0, 0, "key", "value"); - retrieved.send(record, "processed"); - - verify(testSink).send(record, "processed"); + retrieved.accept("processed"); + verify(testSink).accept("processed"); } @Test @@ -54,7 +47,7 @@ void shouldUnregisterSink() { // Arrange final var testSink = mock(MessageSink.class); final var key = RegistryKey.of("sinkToRemove", Object.class); - registry.register(key, Object.class, testSink); + registry.register(key, testSink); // Act & Assert assertTrue(registry.getAll().containsKey(key)); @@ -68,7 +61,7 @@ void shouldUnregisterSink() { void shouldClearAllSinks() { // Arrange final var testSink = mock(MessageSink.class); - registry.register(RegistryKey.of("testSink", Object.class), Object.class, testSink); + registry.register(RegistryKey.of("testSink", Object.class), testSink); // Act registry.clear(); @@ -86,56 +79,53 @@ void shouldCreatePipelineThatSendsToMultipleSinks() { final var key1 = RegistryKey.of("sink1", Object.class); final var key2 = RegistryKey.of("sink2", Object.class); - registry.register(key1, Object.class, sink1); - registry.register(key2, Object.class, sink2); + registry.register(key1, sink1); + registry.register(key2, sink2); // Act - final var pipeline = registry.pipeline(Object.class, key1, key2); - final var record = new ConsumerRecord("topic", 0, 0, "key", "value"); - pipeline.send(record, "processed"); + final var pipeline = registry.pipeline(key1, key2); + pipeline.accept("processed"); // Assert - verify(sink1).send(record, "processed"); - verify(sink2).send(record, "processed"); + verify(sink1).accept("processed"); + verify(sink2).accept("processed"); } @Test void shouldContinuePipelineWhenOneSinkThrowsException() { // Arrange final var failingSink = mock(MessageSink.class); - doThrow(new RuntimeException("Test failure")).when(failingSink).send(any(), any()); + doThrow(new RuntimeException("Test failure")).when(failingSink).accept(any()); final var workingSink = mock(MessageSink.class); final var keyFailing = RegistryKey.of("failingSink", Object.class); final var keyWorking = RegistryKey.of("workingSink", Object.class); - registry.register(keyFailing, Object.class, failingSink); - registry.register(keyWorking, Object.class, workingSink); + registry.register(keyFailing, failingSink); + registry.register(keyWorking, workingSink); // Act - final var pipeline = registry.pipeline(Object.class, keyFailing, keyWorking); - final var record = new ConsumerRecord("topic", 0, 0, "key", "value"); - pipeline.send(record, "processed"); + final var pipeline = registry.pipeline(keyFailing, keyWorking); + pipeline.accept("processed"); // Assert - verify(workingSink).send(record, "processed"); + verify(workingSink).accept("processed"); } @Test void shouldTrackMetricsForSink() { // Arrange - final var callCount = new AtomicInteger(0); - final MessageSink countingSink = (record, value) -> callCount.incrementAndGet(); + final var callCount = new java.util.concurrent.atomic.AtomicInteger(0); + final MessageSink countingSink = value -> callCount.incrementAndGet(); final var key = RegistryKey.of("countingSink", Object.class); - registry.register(key, Object.class, countingSink); - final var sink = registry.get(key, Object.class); + registry.register(key, countingSink); + final var sink = registry.get(key); // Act - final var record = new ConsumerRecord("topic", 0, 0, "key", "value"); - sink.send(record, "processed"); - sink.send(record, "processed"); + sink.accept("processed"); + sink.accept("processed"); // Assert final var metrics = registry.getMetrics(key); @@ -146,18 +136,17 @@ final var record = new ConsumerRecord("topic", 0, 0, "key", "val @Test void shouldTrackErrorMetricsForFailingSink() { // Arrange - final MessageSink failingSink = (record, value) -> { + final MessageSink failingSink = value -> { throw new RuntimeException("Test failure"); }; final var key = RegistryKey.of("failingSink", Object.class); - registry.register(key, Object.class, failingSink); - final var sink = registry.get(key, Object.class); + registry.register(key, failingSink); + final var sink = registry.get(key); // Act - final var record = new ConsumerRecord("topic", 0, 0, "key", "value"); try { - sink.send(record, "processed"); + sink.accept("processed"); fail("Should have thrown an exception"); } catch (final RuntimeException e) { // Expected @@ -165,29 +154,27 @@ final var record = new ConsumerRecord("topic", 0, 0, "key", "val // Assert final var metrics = registry.getMetrics(key); - assertEquals(0L, metrics.get("invocationCount")); assertEquals(1L, metrics.get("errorCount")); } @Test void shouldWrapSinkWithErrorHandling() { // Arrange - final MessageSink failingSink = (record, value) -> { + final MessageSink failingSink = value -> { throw new RuntimeException("Test failure"); }; // Act final var safeSink = MessageSinkRegistry.withErrorHandling(failingSink); - final var record = new ConsumerRecord("topic", 0, 0, "key", "value"); // Assert - should not throw exception - safeSink.send(record, "processed"); + safeSink.accept("processed"); } @Test void shouldReturnEmptyMetricsForNonExistentSink() { // Act - final var metrics = registry.getMetrics(RegistryKey.of("nonExistentSink", Object.class)); + final var metrics = registry.getMetrics(RegistryKey.of("nonExistent", Object.class)); // Assert assertTrue(metrics.isEmpty()); @@ -199,13 +186,15 @@ void shouldRejectNullOrEmptyName() { final var testSink = mock(MessageSink.class); // Assert - assertThrows(NullPointerException.class, () -> registry.register(null, Object.class, testSink)); + assertThrows(NullPointerException.class, () -> registry.register(null, testSink)); } @Test void shouldRejectNullSink() { // Assert - assertThrows(NullPointerException.class, () -> registry.register(RegistryKey.of("test", Object.class), Object.class, null)); + assertThrows(NullPointerException.class, () -> + registry.register(RegistryKey.of("test", Object.class), null) + ); } @Test @@ -213,26 +202,27 @@ void shouldRegisterAndRetrieveTypedSink() { // Arrange final var key = RegistryKey.of("typedSink", String.class); @SuppressWarnings("unchecked") - final MessageSink testSink = mock(MessageSink.class); - registry.register(key, String.class, testSink); + final MessageSink testSink = mock(MessageSink.class); + registry.register(key, testSink); // Act - final var retrieved = registry.get(key, String.class); - final var record = new ConsumerRecord<>("topic", 0, 0, "key", "value"); - retrieved.send(record, "processed"); + final var retrieved = registry.get(key); + retrieved.accept("processed"); // Assert - assertNotNull(retrieved); - verify(testSink).send(record, "processed"); + verify(testSink).accept("processed"); } @Test void shouldThrowOnTypeMismatch() { // Arrange - final var key = RegistryKey.of("typeMismatchSink", Integer.class); - registry.register(key, String.class, (record, value) -> {}); + final var key = RegistryKey.of("typedSink", String.class); + registry.register(key, msg -> {}); // Act & Assert - assertThrows(IllegalArgumentException.class, () -> registry.get(key, Integer.class)); + assertThrows(ClassCastException.class, () -> { + final MessageSink retrieved = (MessageSink) (MessageSink) registry.get(key); + retrieved.accept(123); + }); } } diff --git a/lib/src/test/java/org/kpipe/registry/OptimizedPipelineTest.java b/lib/src/test/java/org/kpipe/registry/OptimizedPipelineTest.java index acdb4d8..3810321 100644 --- a/lib/src/test/java/org/kpipe/registry/OptimizedPipelineTest.java +++ b/lib/src/test/java/org/kpipe/registry/OptimizedPipelineTest.java @@ -4,11 +4,8 @@ import java.io.ByteArrayOutputStream; import java.nio.charset.StandardCharsets; -import org.apache.avro.Schema; import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericDatumWriter; -import org.apache.avro.generic.GenericRecord; -import org.apache.avro.io.BinaryEncoder; import org.apache.avro.io.EncoderFactory; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -36,7 +33,7 @@ void testOptimizedJsonPipeline() { registry.registerOperator(addStatus, JsonMessageProcessor.addFieldOperator("status", "processed")); // Create optimized pipeline - final var pipeline = registry.jsonPipelineBuilder().add(addSource).add(addStatus).build(); + final var pipeline = registry.pipeline(MessageFormat.JSON).add(addSource, addStatus).build(); // Process message final var input = "{\"id\":\"123\"}".getBytes(StandardCharsets.UTF_8); @@ -50,8 +47,7 @@ void testOptimizedJsonPipeline() { @Test void testOptimizedAvroPipeline() throws java.io.IOException { - final var schemaJson = - """ + final var schemaJson = """ { "type": "record", "name": "User", @@ -73,7 +69,8 @@ void testOptimizedAvroPipeline() throws java.io.IOException { registry.registerOperator(addStatus, AvroMessageProcessor.addFieldOperator("status", "processed")); // Create optimized pipeline - final var pipeline = registry.avroPipelineBuilder("user", 0).add(addSource).add(addStatus).build(); + final var format = ((AvroFormat) MessageFormat.AVRO).withDefaultSchema("user"); + final var pipeline = registry.pipeline(format).add(addSource, addStatus).build(); // Create input record final var record = new GenericData.Record(schema); @@ -91,26 +88,23 @@ final var record = new GenericData.Record(schema); final var output = pipeline.apply(input); // Verify - AvroMessageProcessor.processAvro( - output, - schema, - r -> { - assertEquals("123", r.get("id").toString()); - assertEquals(SOURCE_APP, r.get("source").toString()); - assertEquals("processed", r.get("status").toString()); - return r; - } - ); + assertNotNull(output); + final var decoded = format.deserialize(output); + assertEquals("123", decoded.get("id").toString()); + assertEquals(SOURCE_APP, decoded.get("source").toString()); + assertEquals("processed", decoded.get("status").toString()); } @Test void testOptimizedJsonPipelineDefaultOperators() { // Create optimized pipeline using default operators final var pipeline = registry - .jsonPipelineBuilder() - .add(MessageProcessorRegistry.JSON_ADD_SOURCE) - .add(MessageProcessorRegistry.JSON_ADD_TIMESTAMP) - .add(MessageProcessorRegistry.JSON_MARK_PROCESSED) + .pipeline(MessageFormat.JSON) + .add( + MessageProcessorRegistry.JSON_ADD_SOURCE, + MessageProcessorRegistry.JSON_ADD_TIMESTAMP, + MessageProcessorRegistry.JSON_MARK_PROCESSED + ) .build(); // Process message @@ -126,8 +120,7 @@ void testOptimizedJsonPipelineDefaultOperators() { @Test void testOptimizedAvroPipelineDefaultOperators() throws java.io.IOException { - final var schemaJson = - """ + final var schemaJson = """ { "type": "record", "name": "User", @@ -144,11 +137,23 @@ void testOptimizedAvroPipelineDefaultOperators() throws java.io.IOException { final var schema = AvroMessageProcessor.getSchema("user"); // Create optimized pipeline using default operators + // Note: Default operators are registered for the whole registry if format is JSON, + // but here we are using AVRO. We need to register them for AVRO specifically if we want to use + // them. + registry.registerOperator( + RegistryKey.avro("addSource"), + AvroMessageProcessor.addFieldOperator("source", SOURCE_APP) + ); + registry.registerOperator( + RegistryKey.avro("markProcessed"), + AvroMessageProcessor.addFieldOperator("processed", "true") + ); + registry.registerOperator(RegistryKey.avro("addTimestamp"), AvroMessageProcessor.addTimestampOperator("timestamp")); + + final var format = ((AvroFormat) MessageFormat.AVRO).withDefaultSchema("user"); final var pipeline = registry - .avroPipelineBuilder("user", 0) - .add(RegistryKey.avro("addSource_user")) - .add(RegistryKey.avro("addTimestamp_user")) - .add(RegistryKey.avro("markProcessed_user")) + .pipeline(format) + .add(RegistryKey.avro("addSource"), RegistryKey.avro("markProcessed"), RegistryKey.avro("addTimestamp")) .build(); // Create input record @@ -167,23 +172,17 @@ final var record = new GenericData.Record(schema); final var output = pipeline.apply(input); // Verify - AvroMessageProcessor.processAvro( - output, - schema, - r -> { - assertEquals("123", r.get("id").toString()); - assertEquals(SOURCE_APP, r.get("source").toString()); - assertEquals("true", r.get("processed").toString()); - assertNotNull(r.get("timestamp")); - return r; - } - ); + assertNotNull(output); + final var decoded = format.deserialize(output); + assertEquals("123", decoded.get("id").toString()); + assertEquals(SOURCE_APP, decoded.get("source").toString()); + assertEquals("true", decoded.get("processed").toString()); + assertNotNull(decoded.get("timestamp")); } @Test void testOptimizedAvroPipelineWithOffset() throws java.io.IOException { - final var schemaJson = - """ + final var schemaJson = """ { "type": "record", "name": "User", @@ -201,7 +200,8 @@ void testOptimizedAvroPipelineWithOffset() throws java.io.IOException { registry.registerOperator(addSource, AvroMessageProcessor.addFieldOperator("source", SOURCE_APP)); // Create optimized pipeline with offset 5 (simulating magic bytes) - final var pipeline = registry.avroPipelineBuilder("userOffset", 5).add(addSource).build(); + final var format = ((AvroFormat) MessageFormat.AVRO).withDefaultSchema("userOffset"); + final var pipeline = registry.pipeline(format).skipBytes(5).add(addSource).build(); // Create input record final var record = new GenericData.Record(schema); @@ -221,14 +221,9 @@ final var record = new GenericData.Record(schema); final var output = pipeline.apply(input); // Verify - AvroMessageProcessor.processAvro( - output, - schema, - r -> { - assertEquals("123", r.get("id").toString()); - assertEquals(SOURCE_APP, r.get("source").toString()); - return r; - } - ); + assertNotNull(output); + final var decoded = format.deserialize(output); + assertEquals("123", decoded.get("id").toString()); + assertEquals(SOURCE_APP, decoded.get("source").toString()); } } diff --git a/lib/src/test/java/org/kpipe/registry/PojoPipelineTest.java b/lib/src/test/java/org/kpipe/registry/PojoPipelineTest.java index a38ab3d..a8b0eac 100644 --- a/lib/src/test/java/org/kpipe/registry/PojoPipelineTest.java +++ b/lib/src/test/java/org/kpipe/registry/PojoPipelineTest.java @@ -15,7 +15,7 @@ void shouldProcessPojoPipeline() { registry.registerOperator(userMaskerKey, (UserRecord user) -> new UserRecord(user.id(), "MASKED", user.email())); // Build pipeline - final var pipeline = registry.pojoPipelineBuilder(UserRecord.class).add(userMaskerKey).build(); + final var pipeline = registry.pipeline(MessageFormat.pojo(UserRecord.class)).add(userMaskerKey).build(); // Initial data final var user = new UserRecord("1", "John Doe", "john@example.com"); @@ -41,7 +41,7 @@ void shouldHandleMixedOperatorsInPojoPipeline() { final byte[] inputBytes = format.serialize(user); final var pipeline = registry - .pojoPipelineBuilder(UserRecord.class) + .pipeline(MessageFormat.pojo(UserRecord.class)) .add(u -> new UserRecord(u.id(), u.name().toUpperCase(), u.email())) .add(u -> new UserRecord(u.id(), u.name(), "PROTECTED")) .build(); diff --git a/lib/src/test/java/org/kpipe/registry/RegistryFunctionsTest.java b/lib/src/test/java/org/kpipe/registry/RegistryFunctionsTest.java index 7396db1..250ffb5 100644 --- a/lib/src/test/java/org/kpipe/registry/RegistryFunctionsTest.java +++ b/lib/src/test/java/org/kpipe/registry/RegistryFunctionsTest.java @@ -5,7 +5,6 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicLong; -import java.util.function.BiConsumer; import java.util.function.Function; import org.junit.jupiter.api.Test; @@ -82,12 +81,12 @@ void shouldReturnDefaultValueOnFunctionError() { void shouldExecuteConsumerSuccessfully() { // Arrange final var counter = new AtomicLong(0); - final BiConsumer operation = (s, i) -> counter.incrementAndGet(); + final java.util.function.Consumer operation = s -> counter.incrementAndGet(); final var logger = mock(System.Logger.class); // Act final var safeConsumer = RegistryFunctions.withConsumerErrorHandling(operation, logger); - safeConsumer.accept("test", 42); + safeConsumer.accept("test"); // Assert assertEquals(1, counter.get()); @@ -98,14 +97,14 @@ void shouldExecuteConsumerSuccessfully() { @Test void shouldSuppressAndLogConsumerExceptions() { // Arrange - final BiConsumer operation = (s, i) -> { + final java.util.function.Consumer operation = s -> { throw new IllegalArgumentException("Test consumer exception"); }; final var logger = mock(System.Logger.class); // Act final var safeConsumer = RegistryFunctions.withConsumerErrorHandling(operation, logger); - safeConsumer.accept("test", 42); + safeConsumer.accept("test"); // Assert verify(logger, atLeastOnce()).log(any(System.Logger.Level.class), anyString(), any(Throwable.class)); diff --git a/lib/src/test/java/org/kpipe/sink/AvroConsoleSinkTest.java b/lib/src/test/java/org/kpipe/sink/AvroConsoleSinkTest.java index c812fd6..348b754 100644 --- a/lib/src/test/java/org/kpipe/sink/AvroConsoleSinkTest.java +++ b/lib/src/test/java/org/kpipe/sink/AvroConsoleSinkTest.java @@ -10,16 +10,14 @@ import org.apache.avro.generic.GenericDatumWriter; import org.apache.avro.generic.GenericRecord; import org.apache.avro.io.EncoderFactory; -import org.apache.kafka.clients.consumer.ConsumerRecord; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; class AvroConsoleSinkTest { - private static final Schema TEST_SCHEMA = new Schema.Parser() - .parse( - """ + private static final Schema TEST_SCHEMA = new Schema.Parser().parse( + """ { "type": "record", "name": "Test", @@ -29,9 +27,9 @@ class AvroConsoleSinkTest { ] } """ - ); + ); - private AvroConsoleSink sink; + private AvroConsoleSink sink; private CapturingHandler handler; private Logger julLogger; @@ -56,78 +54,42 @@ private String output() { @Test void shouldLogTopicInOutput() { - final var record = new ConsumerRecord<>("my-topic", 0, 0L, "key1", (Object) "value1"); - sink.send(record, "value1"); - assertTrue(output().contains("my-topic"), "Expected topic in log output"); - } - - @Test - void shouldLogKeyInOutput() { - final var record = new ConsumerRecord<>("test-topic", 0, 0L, "my-key", (Object) "value1"); - sink.send(record, "value1"); - assertTrue(output().contains("my-key"), "Expected key in log output"); - } - - @Test - void shouldLogPartitionAndOffsetInOutput() { - final var record = new ConsumerRecord<>("test-topic", 3, 42L, "key1", (Object) "value1"); - sink.send(record, "value1"); - final var out = output(); - assertTrue(out.contains("\"partition\":3"), "Expected partition:3 in log output"); - assertTrue(out.contains("\"offset\":42"), "Expected offset:42 in log output"); + sink.accept("value1"); + assertTrue(output().contains("processedMessage"), "Expected log output"); } @Test void shouldOutputValidJsonStructure() { - final var record = new ConsumerRecord<>("test-topic", 2, 7L, "my-key", (Object) "my-value"); - sink.send(record, "my-value"); + sink.accept("my-value"); final var out = output(); - assertTrue(out.contains("\"topic\":\"test-topic\""), "Expected topic value"); - assertTrue(out.contains("\"partition\":2"), "Expected partition value"); - assertTrue(out.contains("\"offset\":7"), "Expected offset value"); - assertTrue(out.contains("\"key\":\"my-key\""), "Expected key value"); assertTrue(out.contains("\"processedMessage\":\"my-value\""), "Expected processedMessage value"); } @Test void shouldHandleNullValue() { - final var record = new ConsumerRecord<>("test-topic", 0, 1L, "key1", (Object) null); - sink.send(record, null); + sink.accept(null); assertTrue(output().contains("null"), "Expected 'null' in log output"); } @Test void shouldHandleEmptyByteArray() { - final var record = new ConsumerRecord("test-topic", 0, 2L, "key1", new byte[0]); - sink.send(record, new byte[0]); + sink.accept(new byte[0]); assertTrue(output().contains("empty"), "Expected 'empty' for zero-length byte array"); } - @Test - void shouldHandleNullKey() { - final var record = new ConsumerRecord("test-topic", 0, 3L, null, "value"); - sink.send(record, "value"); - assertTrue(output().contains("null"), "Expected 'null' key in output"); - } - @Test void shouldHandleInvalidAvroDataWithoutThrowing() { final var bytes = "not avro".getBytes(StandardCharsets.UTF_8); - final var record = new ConsumerRecord("test-topic", 0, 5L, "key1", bytes); - sink.send(record, bytes); + sink.accept(bytes); final var out = output(); - assertTrue(out.contains("\"topic\":\"test-topic\""), "Expected topic value"); assertTrue(out.contains("\"processedMessage\":\"\""), "Expected empty processedMessage on Avro parse failure"); } @Test void shouldFormatValidAvroByteArrayAsJson() throws Exception { final var avroBytes = createAvroBytes(); - final var record = new ConsumerRecord("test-topic", 0, 4L, "key1", avroBytes); - sink.send(record, avroBytes); + sink.accept(avroBytes); final var out = output(); - assertTrue(out.contains("\"topic\":\"test-topic\""), "Expected topic value"); - assertTrue(out.contains("\"key\":\"key1\""), "Expected key value"); // Avro JSON encoder encodes string fields as {"string":"value"} assertTrue(out.contains("test-id"), "Expected decoded Avro 'id' value"); assertTrue(out.contains("test-value"), "Expected decoded Avro 'value' value"); diff --git a/lib/src/test/java/org/kpipe/sink/CompositeMessageSinkTest.java b/lib/src/test/java/org/kpipe/sink/CompositeMessageSinkTest.java index dde6631..8c006c8 100644 --- a/lib/src/test/java/org/kpipe/sink/CompositeMessageSinkTest.java +++ b/lib/src/test/java/org/kpipe/sink/CompositeMessageSinkTest.java @@ -4,7 +4,6 @@ import static org.mockito.Mockito.*; import java.util.List; -import org.apache.kafka.clients.consumer.ConsumerRecord; import org.junit.jupiter.api.Test; class CompositeMessageSinkTest { @@ -13,38 +12,36 @@ class CompositeMessageSinkTest { @SuppressWarnings("unchecked") void shouldBroadcastToAllSinks() { // Given - final var sink1 = (MessageSink) mock(MessageSink.class); - final var sink2 = (MessageSink) mock(MessageSink.class); + final var sink1 = (MessageSink) mock(MessageSink.class); + final var sink2 = (MessageSink) mock(MessageSink.class); final var compositeSink = new CompositeMessageSink<>(List.of(sink1, sink2)); - final var record = new ConsumerRecord<>("topic", 0, 0L, "key", "value"); final var processedValue = "processed"; // When - compositeSink.send(record, processedValue); + compositeSink.accept(processedValue); // Then - verify(sink1).send(record, processedValue); - verify(sink2).send(record, processedValue); + verify(sink1).accept(processedValue); + verify(sink2).accept(processedValue); } @Test @SuppressWarnings("unchecked") void shouldContinueOnSinkFailure() { // Given - final var failingSink = (MessageSink) mock(MessageSink.class); - final var successfulSink = (MessageSink) mock(MessageSink.class); + final var failingSink = (MessageSink) mock(MessageSink.class); + final var successfulSink = (MessageSink) mock(MessageSink.class); - doThrow(new RuntimeException("Sink failed")).when(failingSink).send(any(), any()); + doThrow(new RuntimeException("Sink failed")).when(failingSink).accept(any()); final var compositeSink = new CompositeMessageSink<>(List.of(failingSink, successfulSink)); - final var record = new ConsumerRecord<>("topic", 0, 0L, "key", "value"); final var processedValue = "processed"; // When - assertDoesNotThrow(() -> compositeSink.send(record, processedValue)); + assertDoesNotThrow(() -> compositeSink.accept(processedValue)); // Then - verify(failingSink).send(record, processedValue); - verify(successfulSink).send(record, processedValue); + verify(failingSink).accept(processedValue); + verify(successfulSink).accept(processedValue); } } diff --git a/lib/src/test/java/org/kpipe/sink/CompositeSinkIntegrationTest.java b/lib/src/test/java/org/kpipe/sink/CompositeSinkIntegrationTest.java index a5587be..f7766e2 100644 --- a/lib/src/test/java/org/kpipe/sink/CompositeSinkIntegrationTest.java +++ b/lib/src/test/java/org/kpipe/sink/CompositeSinkIntegrationTest.java @@ -5,7 +5,6 @@ import java.sql.DriverManager; import java.util.ArrayList; import java.util.List; -import org.apache.kafka.clients.consumer.ConsumerRecord; import org.junit.jupiter.api.Assumptions; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; @@ -51,7 +50,7 @@ void testCompositeSinkWithPostgresAndCapturingSink() throws Exception { } // 2. Define the Postgres Sink - final MessageSink postgresSink = (record, value) -> { + final MessageSink postgresSink = value -> { try ( final var conn = DriverManager.getConnection( postgres.getJdbcUrl(), @@ -60,7 +59,7 @@ void testCompositeSinkWithPostgresAndCapturingSink() throws Exception { ); final var pstmt = conn.prepareStatement("INSERT INTO processed_messages (id, content) VALUES (?, ?)") ) { - pstmt.setString(1, record.key()); + pstmt.setString(1, "msg-123"); pstmt.setString(2, value); pstmt.executeUpdate(); } catch (final Exception e) { @@ -69,34 +68,27 @@ void testCompositeSinkWithPostgresAndCapturingSink() throws Exception { }; // 3. Define a Capturing Sink for verification - final var capturingSink = new MessageSink() { + final var capturingSink = new MessageSink() { private final List values = new ArrayList<>(); - private final List> records = new ArrayList<>(); @Override - public void send(final ConsumerRecord record, final String processedValue) { - records.add(record); + public void accept(final String processedValue) { values.add(processedValue); } public List getValues() { return values; } - - public List> getRecords() { - return records; - } }; // 4. Create the Composite Sink final var compositeSink = new CompositeMessageSink<>(List.of(postgresSink, capturingSink)); // 5. Execute - final var record = new ConsumerRecord<>("test-topic", 0, 0, "msg-123", "original-content"); - compositeSink.send(record, "processed-content"); + compositeSink.accept("processed-content"); // 6. Verify Capturing Sink - assertEquals(1, capturingSink.getRecords().size()); + assertEquals(1, capturingSink.getValues().size()); assertEquals("processed-content", capturingSink.getValues().getFirst()); // 7. Verify Database diff --git a/lib/src/test/java/org/kpipe/sink/JsonConsoleSinkTest.java b/lib/src/test/java/org/kpipe/sink/JsonConsoleSinkTest.java index 5a28025..4308332 100644 --- a/lib/src/test/java/org/kpipe/sink/JsonConsoleSinkTest.java +++ b/lib/src/test/java/org/kpipe/sink/JsonConsoleSinkTest.java @@ -4,14 +4,13 @@ import java.nio.charset.StandardCharsets; import java.util.logging.Logger; -import org.apache.kafka.clients.consumer.ConsumerRecord; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; class JsonConsoleSinkTest { - private JsonConsoleSink sink; + private JsonConsoleSink sink; private CapturingHandler handler; private Logger julLogger; @@ -36,48 +35,29 @@ private String output() { @Test void shouldLogTopicInOutput() { - final var record = new ConsumerRecord<>("my-topic", 0, 0L, "key1", (Object) "value1"); - sink.send(record, "value1"); - assertTrue(output().contains("my-topic"), "Expected topic in log output"); - } - - @Test - void shouldLogKeyInOutput() { - final var record = new ConsumerRecord<>("test-topic", 0, 0L, "my-key", (Object) "value1"); - sink.send(record, "value1"); - assertTrue(output().contains("my-key"), "Expected key in log output"); - } - - @Test - void shouldLogPartitionAndOffsetInOutput() { - final var record = new ConsumerRecord<>("test-topic", 3, 42L, "key1", (Object) "value1"); - sink.send(record, "value1"); - final var out = output(); - assertTrue(out.contains("\"partition\":3"), "Expected partition:3 in log output"); - assertTrue(out.contains("\"offset\":42"), "Expected offset:42 in log output"); + sink.accept("value1"); + assertTrue(output().contains("processedMessage"), "Expected log output"); } @Test void shouldHandleNullValue() { - final var record = new ConsumerRecord<>("test-topic", 0, 1L, "key1", (Object) null); - sink.send(record, null); + sink.accept(null); assertTrue(output().contains("null"), "Expected 'null' in log output"); } @Test void shouldHandleEmptyByteArray() { - final var record = new ConsumerRecord("test-topic", 0, 2L, "key1", new byte[0]); - sink.send(record, new byte[0]); + sink.accept(new byte[0]); assertTrue(output().contains("empty"), "Expected 'empty' for zero-length byte array"); } @Test void shouldFormatJsonByteArray() { final var json = """ - {"field":"value"} - """.strip().getBytes(StandardCharsets.UTF_8); - final var record = new ConsumerRecord("test-topic", 0, 3L, "key1", json); - sink.send(record, json); + {"field":"value"} + """.strip() + .getBytes(StandardCharsets.UTF_8); + sink.accept(json); final var out = output(); assertTrue(out.contains("field") && out.contains("value"), "Expected JSON content in output"); assertFalse(out.contains("Failed to parse/format JSON content"), "Did not expect JSON parse error logs"); @@ -86,59 +66,45 @@ final var record = new ConsumerRecord("test-topic", 0, 3L, "key1 @Test void shouldHandleNonJsonByteArray() { final var bytes = """ - plain text - """.strip().getBytes(StandardCharsets.UTF_8); - final var record = new ConsumerRecord("test-topic", 0, 4L, "key1", bytes); - sink.send(record, bytes); + plain text + """.strip() + .getBytes(StandardCharsets.UTF_8); + sink.accept(bytes); assertTrue(output().contains("plain text"), "Expected raw string for non-JSON bytes"); } @Test void shouldHandleJsonArray() { final var json = """ - [{"a":1},{"b":2}] - """.strip().getBytes(StandardCharsets.UTF_8); - final var record = new ConsumerRecord("test-topic", 0, 7L, "key1", json); - sink.send(record, json); + [{"a":1},{"b":2}] + """.strip() + .getBytes(StandardCharsets.UTF_8); + sink.accept(json); final var out = output(); - assertTrue(out.contains("test-topic"), "Expected topic in output for JSON array"); assertFalse(out.contains("Failed to parse/format JSON content"), "Did not expect JSON parse error logs"); } @Test void shouldHandleInvalidJsonByteArray() { final var bytes = """ - {invalid json - """.strip().getBytes(StandardCharsets.UTF_8); - final var record = new ConsumerRecord("test-topic", 0, 8L, "key1", bytes); - sink.send(record, bytes); + {invalid json + """.strip() + .getBytes(StandardCharsets.UTF_8); + sink.accept(bytes); assertTrue(output().contains("invalid json"), "Expected fallback raw string for invalid JSON"); } @Test void shouldHandleLargeMessage() { final var large = "x".repeat(10_000); - final var record = new ConsumerRecord<>("test-topic", 0, 9L, "key1", (Object) large); - sink.send(record, large); + sink.accept(large); assertTrue(output().contains("x"), "Expected large message content in output"); } - @Test - void shouldHandleNullKey() { - final var record = new ConsumerRecord("test-topic", 0, 10L, null, "value"); - sink.send(record, "value"); - assertTrue(output().contains("null"), "Expected 'null' key in output"); - } - @Test void shouldOutputValidJsonStructure() { - final var record = new ConsumerRecord<>("test-topic", 2, 7L, "my-key", (Object) "my-value"); - sink.send(record, "my-value"); + sink.accept("my-value"); final var out = output(); - assertTrue(out.contains("\"topic\":\"test-topic\""), "Expected topic value"); - assertTrue(out.contains("\"partition\":2"), "Expected partition value"); - assertTrue(out.contains("\"offset\":7"), "Expected offset value"); - assertTrue(out.contains("\"key\":\"my-key\""), "Expected key value"); assertTrue(out.contains("\"processedMessage\":\"my-value\""), "Expected processedMessage value"); } } From af11f44ca7d8965cda772d293360e091c6de686c Mon Sep 17 00:00:00 2001 From: mariano Date: Wed, 1 Apr 2026 06:23:08 +0900 Subject: [PATCH 02/14] feat: support terminal sink in MessagePipeline and update consumer processing logic - Add getSink() method to MessagePipeline interface for terminal sink retrieval - Update KPipeConsumer to invoke pipeline sink if present, with fallback to consumer sink - Adjust TypedPipelineBuilder to implement getSink() - Improve documentation for inScopedCaches methods and registry sink wrapping - Enhance AvroFormat deserialization with error handling - Update App to use withPipeline instead of withProcessor --- app/avro/src/main/java/org/kpipe/App.java | 2 +- app/json/src/main/java/org/kpipe/App.java | 2 +- .../java/org/kpipe/consumer/KPipeConsumer.java | 18 ++++++++++++++++++ .../kpipe/processor/AvroMessageProcessor.java | 4 ++++ .../kpipe/processor/JsonMessageProcessor.java | 4 ++++ .../java/org/kpipe/registry/AvroFormat.java | 6 +++++- .../org/kpipe/registry/MessagePipeline.java | 8 ++++++++ .../registry/MessageProcessorRegistry.java | 9 ++++++++- .../kpipe/registry/TypedPipelineBuilder.java | 8 +++++--- 9 files changed, 54 insertions(+), 7 deletions(-) diff --git a/app/avro/src/main/java/org/kpipe/App.java b/app/avro/src/main/java/org/kpipe/App.java index 4c37b95..27682e0 100644 --- a/app/avro/src/main/java/org/kpipe/App.java +++ b/app/avro/src/main/java/org/kpipe/App.java @@ -132,7 +132,7 @@ public static KPipeConsumer createConsumer( return KPipeConsumer.builder() .withProperties(kafkaProps) .withTopic(config.topic()) - .withProcessor(createAvroProcessorPipeline(processorRegistry, config, schemaRegistryUrl)) + .withPipeline(createAvroProcessorPipeline(processorRegistry, config, schemaRegistryUrl)) .withPollTimeout(config.pollTimeout()) .withCommandQueue(commandQueue) .withOffsetManagerProvider(createOffsetManagerProvider(Duration.ofSeconds(30), commandQueue)) diff --git a/app/json/src/main/java/org/kpipe/App.java b/app/json/src/main/java/org/kpipe/App.java index 28339d2..973469e 100644 --- a/app/json/src/main/java/org/kpipe/App.java +++ b/app/json/src/main/java/org/kpipe/App.java @@ -115,7 +115,7 @@ public static KPipeConsumer createConsumer( return KPipeConsumer.builder() .withProperties(kafkaProps) .withTopic(config.topic()) - .withProcessor(createJsonProcessorPipeline(processorRegistry, config)) + .withPipeline(createJsonProcessorPipeline(processorRegistry, config)) .withPollTimeout(config.pollTimeout()) .withCommandQueue(commandQueue) .withOffsetManagerProvider(createOffsetManagerProvider(Duration.ofSeconds(30), commandQueue)) diff --git a/lib/src/main/java/org/kpipe/consumer/KPipeConsumer.java b/lib/src/main/java/org/kpipe/consumer/KPipeConsumer.java index 467c948..fcdc97b 100644 --- a/lib/src/main/java/org/kpipe/consumer/KPipeConsumer.java +++ b/lib/src/main/java/org/kpipe/consumer/KPipeConsumer.java @@ -815,6 +815,24 @@ private boolean tryProcessRecord(final ConsumerRecord record) { } try { + if (processor instanceof org.kpipe.registry.MessagePipeline typedPipeline) { + final var recordValue = (byte[]) record.value(); + final var deserialized = typedPipeline.deserialize(recordValue); + if (deserialized == null) return false; + + final var processed = typedPipeline.process(deserialized); + if (processed == null) return false; + + final var sink = typedPipeline.getSink(); + // Call configured sink for typed object + if (sink != null) sink.accept(processed); + // Fallback to consumer sink + else messageSink.accept((V) processed); + + if (offsetManager != null) commandQueue.offer(new ConsumerCommand.MarkOffsetProcessed(record)); + return true; + } + final var processedValue = processor.apply(record.value()); messageSink.accept(processedValue); if (offsetManager != null) commandQueue.offer(new ConsumerCommand.MarkOffsetProcessed(record)); diff --git a/lib/src/main/java/org/kpipe/processor/AvroMessageProcessor.java b/lib/src/main/java/org/kpipe/processor/AvroMessageProcessor.java index cef493a..68ed0a2 100644 --- a/lib/src/main/java/org/kpipe/processor/AvroMessageProcessor.java +++ b/lib/src/main/java/org/kpipe/processor/AvroMessageProcessor.java @@ -292,6 +292,10 @@ private static boolean isCompatibleWithSchema(Object value, Schema schema) { }; } + /// Executes an operation within scoped caches for Avro processing. + /// + /// @param operation The operation to execute within the scoped caches. + /// @return The result of the operation. public static T inScopedCaches(final ScopedValue.CallableOp operation) { try { return ScopedValue.where(OUTPUT_STREAM_CACHE, new ByteArrayOutputStream(8192)) diff --git a/lib/src/main/java/org/kpipe/processor/JsonMessageProcessor.java b/lib/src/main/java/org/kpipe/processor/JsonMessageProcessor.java index f09c403..f46cd7c 100644 --- a/lib/src/main/java/org/kpipe/processor/JsonMessageProcessor.java +++ b/lib/src/main/java/org/kpipe/processor/JsonMessageProcessor.java @@ -119,6 +119,10 @@ public static UnaryOperator> mergeWithOperator(final Map T inScopedCaches(final ScopedValue.CallableOp operation) { try { return ScopedValue.where(OUTPUT_STREAM_CACHE, new ByteArrayOutputStream(8192)).call(operation); diff --git a/lib/src/main/java/org/kpipe/registry/AvroFormat.java b/lib/src/main/java/org/kpipe/registry/AvroFormat.java index 62b3a60..e9b0cba 100644 --- a/lib/src/main/java/org/kpipe/registry/AvroFormat.java +++ b/lib/src/main/java/org/kpipe/registry/AvroFormat.java @@ -135,7 +135,11 @@ public GenericRecord deserialize(final byte[] data) { return AvroMessageProcessor.inScopedCaches(() -> { final var datumReader = new org.apache.avro.generic.GenericDatumReader(schema); final var decoder = org.apache.avro.io.DecoderFactory.get().binaryDecoder(data, null); - return datumReader.read(null, decoder); + try { + return datumReader.read(null, decoder); + } catch (final IOException e) { + throw new RuntimeException("Failed to deserialize Avro record", e); + } }); } } diff --git a/lib/src/main/java/org/kpipe/registry/MessagePipeline.java b/lib/src/main/java/org/kpipe/registry/MessagePipeline.java index 9902f33..b069893 100644 --- a/lib/src/main/java/org/kpipe/registry/MessagePipeline.java +++ b/lib/src/main/java/org/kpipe/registry/MessagePipeline.java @@ -1,6 +1,7 @@ package org.kpipe.registry; import java.util.function.UnaryOperator; +import org.kpipe.sink.MessageSink; /// A unified pipeline interface that encapsulates the lifecycle: /// byte[] (Kafka) -> T (Deserialized Object) -> T (Processed Object) -> byte[] (Kafka). @@ -25,6 +26,13 @@ public interface MessagePipeline extends UnaryOperator { /// @return The processed object. T process(T data); + /// Returns the terminal sink configured for this pipeline, if any. + /// + /// @return The message sink, or null if none is configured. + default MessageSink getSink() { + return null; + } + /// Implementation of UnaryOperator.apply that executes the full pipeline lifecycle. /// /// @param data The input bytes. diff --git a/lib/src/main/java/org/kpipe/registry/MessageProcessorRegistry.java b/lib/src/main/java/org/kpipe/registry/MessageProcessorRegistry.java index 0175314..43a6fd5 100644 --- a/lib/src/main/java/org/kpipe/registry/MessageProcessorRegistry.java +++ b/lib/src/main/java/org/kpipe/registry/MessageProcessorRegistry.java @@ -137,7 +137,8 @@ public & UnaryOperator> void registerEnum(final Class The type of data the operator processes /// @param key The type-safe key to retrieve - /// @return The registered operator, or null if not found + /// @param operator The operator to wrap + /// @return The wrapped operator, or the original operator if no wrapping is needed @SuppressWarnings("unchecked") public UnaryOperator wrapOperator(final RegistryKey key, final UnaryOperator operator) { final var entry = (RegistryEntry>) registryMap.get(key); @@ -145,6 +146,12 @@ public UnaryOperator wrapOperator(final RegistryKey key, final UnaryOp return entry.wrapOperator(entry.value); } + /// Wraps a sink with additional functionality, such as metrics collection. + /// + /// @param The type of data the sink processes + /// @param key The type-safe key to retrieve + /// @param sink The sink to wrap + /// @return The wrapped sink, or the original sink if no wrapping is needed @SuppressWarnings("unchecked") public MessageSink wrapSink(final RegistryKey key, final MessageSink sink) { final var entry = (RegistryEntry>) registryMap.get(key); diff --git a/lib/src/main/java/org/kpipe/registry/TypedPipelineBuilder.java b/lib/src/main/java/org/kpipe/registry/TypedPipelineBuilder.java index c2b70a2..feac9de 100644 --- a/lib/src/main/java/org/kpipe/registry/TypedPipelineBuilder.java +++ b/lib/src/main/java/org/kpipe/registry/TypedPipelineBuilder.java @@ -117,6 +117,11 @@ public MessagePipeline build() { final var bytesToSkip = this.skipBytes; return new MessagePipeline() { + @Override + public MessageSink getSink() { + return pipelineSink; + } + @Override public T deserialize(byte[] data) { if (data == null) { @@ -153,9 +158,6 @@ public T process(T data) { return null; } } - if (pipelineSink != null) { - pipelineSink.accept(current); - } return current; } }; From 2e34edb481375e104a681df6033712a44b6d1a1a Mon Sep 17 00:00:00 2001 From: mariano Date: Wed, 1 Apr 2026 06:33:57 +0900 Subject: [PATCH 03/14] refactor: unify processor pipeline creation and update registry API for typed pipelines - Replace format-specific pipeline builders with unified pipeline(MessageFormat) API in MessageProcessorRegistry - Update App to use the new typed pipeline builder for protobuf processing - Simplify consumer creation by removing explicit sink registry wiring - Improve documentation and usage examples for new pipeline API in registry and processor classes --- app/protobuf/src/main/java/org/kpipe/App.java | 31 ++++++------------- .../kpipe/processor/AvroMessageProcessor.java | 7 +++-- .../kpipe/processor/JsonMessageProcessor.java | 7 +++-- .../registry/MessageProcessorRegistry.java | 19 +++++++++--- 4 files changed, 31 insertions(+), 33 deletions(-) diff --git a/app/protobuf/src/main/java/org/kpipe/App.java b/app/protobuf/src/main/java/org/kpipe/App.java index 6b76a90..5acffa4 100644 --- a/app/protobuf/src/main/java/org/kpipe/App.java +++ b/app/protobuf/src/main/java/org/kpipe/App.java @@ -10,6 +10,7 @@ import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicReference; import java.util.function.Function; +import java.util.function.UnaryOperator; import org.apache.kafka.clients.consumer.Consumer; import org.kpipe.config.AppConfig; import org.kpipe.config.KafkaConsumerConfig; @@ -59,9 +60,10 @@ static void main() { /// /// @param config The application configuration public App(final AppConfig config) { - processorRegistry = new MessageProcessorRegistry(config.appName(), MessageFormat.PROTOBUF); - sinkRegistry = new MessageSinkRegistry(); - kpipeConsumer = createConsumer(config, processorRegistry, sinkRegistry); + this.processorRegistry = new MessageProcessorRegistry(config.appName(), MessageFormat.PROTOBUF); + this.sinkRegistry = processorRegistry.sinkRegistry(); + + this.kpipeConsumer = createConsumer(config, processorRegistry); final var consumerMetricsReporter = ConsumerMetricsReporter.forConsumer(kpipeConsumer::getMetrics); @@ -99,12 +101,10 @@ private ConsumerRunner> createConsumerRunner( /// /// @param config The application configuration /// @param processorRegistry Map of processor functions - /// @param sinkRegistry Map of sink functions /// @return A configured functional consumer public static KPipeConsumer createConsumer( final AppConfig config, - final MessageProcessorRegistry processorRegistry, - final MessageSinkRegistry sinkRegistry + final MessageProcessorRegistry processorRegistry ) { final var kafkaProps = KafkaConsumerConfig.createConsumerConfig(config.bootstrapServers(), config.consumerGroup()); final var commandQueue = new ConcurrentLinkedQueue(); @@ -112,9 +112,8 @@ public static KPipeConsumer createConsumer( return KPipeConsumer.builder() .withProperties(kafkaProps) .withTopic(config.topic()) - .withProcessor(createJsonProcessorPipeline(processorRegistry)) + .withPipeline(createProtobufProcessorPipeline(processorRegistry)) .withPollTimeout(config.pollTimeout()) - .withMessageSink(createSinksPipeline(sinkRegistry)) .withCommandQueue(commandQueue) .withOffsetManagerProvider(createOffsetManagerProvider(Duration.ofSeconds(30), commandQueue)) .withMetrics(true) @@ -134,25 +133,13 @@ private static Function, OffsetManager> KafkaOffsetManager.builder(consumer).withCommandQueue(commandQueue).withCommitInterval(commitInterval).build(); } - /// Creates a message sink pipeline using the provided registry. - /// - /// @param registry the message sink registry - /// @return a message sink that processes messages through the pipeline - private static MessageSink createSinksPipeline(final MessageSinkRegistry registry) { - final var pipeline = registry.pipeline(byte[].class, MessageSinkRegistry.JSON_LOGGING); - return MessageSinkRegistry.withErrorHandling(pipeline); - } - /// Creates a processor pipeline using the provided registry. /// /// @param registry the message processor registry /// @return a function that processes messages through the pipeline - private static Function createJsonProcessorPipeline(final MessageProcessorRegistry registry) { + private static UnaryOperator createProtobufProcessorPipeline(final MessageProcessorRegistry registry) { return registry - .jsonPipelineBuilder() - .add(MessageProcessorRegistry.JSON_ADD_SOURCE) - .add(MessageProcessorRegistry.JSON_MARK_PROCESSED) - .add(MessageProcessorRegistry.JSON_ADD_TIMESTAMP) + .pipeline(MessageFormat.PROTOBUF) .build(); } diff --git a/lib/src/main/java/org/kpipe/processor/AvroMessageProcessor.java b/lib/src/main/java/org/kpipe/processor/AvroMessageProcessor.java index 68ed0a2..3d43582 100644 --- a/lib/src/main/java/org/kpipe/processor/AvroMessageProcessor.java +++ b/lib/src/main/java/org/kpipe/processor/AvroMessageProcessor.java @@ -35,7 +35,7 @@ /// AvroMessageProcessor.registerSchema("userSchema", userSchemaJson); /// /// // Create an optimized pipeline using these processors -/// final var pipeline = registry.avroPipelineBuilder("userSchema") +/// final var pipeline = registry.pipeline(MessageFormat.AVRO) /// .add(RegistryKey.avro("addTimestamp_userSchema")) /// .build(); /// @@ -294,8 +294,9 @@ private static boolean isCompatibleWithSchema(Object value, Schema schema) { /// Executes an operation within scoped caches for Avro processing. /// - /// @param operation The operation to execute within the scoped caches. - /// @return The result of the operation. + /// @param The result type of the operation + /// @param operation The operation to execute within the scoped caches + /// @return The result of the operation public static T inScopedCaches(final ScopedValue.CallableOp operation) { try { return ScopedValue.where(OUTPUT_STREAM_CACHE, new ByteArrayOutputStream(8192)) diff --git a/lib/src/main/java/org/kpipe/processor/JsonMessageProcessor.java b/lib/src/main/java/org/kpipe/processor/JsonMessageProcessor.java index f46cd7c..1abce36 100644 --- a/lib/src/main/java/org/kpipe/processor/JsonMessageProcessor.java +++ b/lib/src/main/java/org/kpipe/processor/JsonMessageProcessor.java @@ -16,7 +16,7 @@ /// /// ```java /// // Create an optimized pipeline using these processors -/// final var pipeline = registry.jsonPipelineBuilder() +/// final var pipeline = registry.pipeline(MessageFormat.JSON) /// .add(RegistryKey.json("addTimestamp")) /// .add(RegistryKey.json("sanitizeData")) /// .build(); @@ -121,8 +121,9 @@ public static UnaryOperator> mergeWithOperator(final Map The result type of the operation + /// @param operation The operation to execute within the scoped caches + /// @return The result of the operation public static T inScopedCaches(final ScopedValue.CallableOp operation) { try { return ScopedValue.where(OUTPUT_STREAM_CACHE, new ByteArrayOutputStream(8192)).call(operation); diff --git a/lib/src/main/java/org/kpipe/registry/MessageProcessorRegistry.java b/lib/src/main/java/org/kpipe/registry/MessageProcessorRegistry.java index 43a6fd5..fb65376 100644 --- a/lib/src/main/java/org/kpipe/registry/MessageProcessorRegistry.java +++ b/lib/src/main/java/org/kpipe/registry/MessageProcessorRegistry.java @@ -12,15 +12,16 @@ /// Registry for managing and composing message processors in KPipe. /// -/// This class allows registration, retrieval, and composition of byte array message processors for -/// different formats (JSON, Avro, Protobuf). It supports schema-based and type-safe pipelines for -/// Kafka message processing, and provides utilities for building and composing processing chains. +/// This class allows registration, retrieval, and composition of message processors for different +/// formats (JSON, Avro, Protobuf, POJO). It supports type-safe pipelines for Kafka message +/// processing and provides utilities for building and composing processing chains via +// [TypedPipelineBuilder]. /// /// Example usage: /// ```java /// final var registry = new MessageProcessorRegistry("my-app"); -/// registry.addSchema("user", "User", "schemas/user.avsc"); -/// var pipeline = registry.avroPipelineBuilder("user").build(); +/// var pipeline = +// registry.pipeline(MessageFormat.JSON).add(RegistryKey.json("addTimestamp")).build(); /// ``` public class MessageProcessorRegistry { @@ -104,6 +105,14 @@ public MessageSinkRegistry sinkRegistry() { /// @param format The message format for serialization/deserialization. /// @param The type of the object in the pipeline. /// @return A new TypedPipelineBuilder. + /// + /// Example usage: + /// ```java + /// final var registry = new MessageProcessorRegistry("my-app"); + /// var pipeline = registry.pipeline(MessageFormat.JSON) + /// .add(RegistryKey.json("addTimestamp")) + /// .build(); + /// ``` public TypedPipelineBuilder pipeline(final MessageFormat format) { return new TypedPipelineBuilder<>(format, this); } From cfc819e14f68fdd41122191677638380f086d949 Mon Sep 17 00:00:00 2001 From: mariano Date: Wed, 1 Apr 2026 06:46:51 +0900 Subject: [PATCH 04/14] refactor: simplify registry wrapping logic and enhance sink registration with additional types - Refactor MessageProcessorRegistry to streamline operator and sink wrapping, consolidating metrics logic and improving type safety - Update getOperator and wrapSink to always return a wrapped function, ensuring consistent behavior - Add predefined registry keys and registrations for JSON map and Avro generic record sinks in MessageSinkRegistry - Improve TypedPipelineBuilder to log a warning when a sink key is missing in the registry - Remove unused startTime field from App --- app/avro/src/main/java/org/kpipe/App.java | 1 - .../registry/MessageProcessorRegistry.java | 85 ++++++++++--------- .../kpipe/registry/MessageSinkRegistry.java | 18 +++- .../kpipe/registry/TypedPipelineBuilder.java | 7 +- 4 files changed, 67 insertions(+), 44 deletions(-) diff --git a/app/avro/src/main/java/org/kpipe/App.java b/app/avro/src/main/java/org/kpipe/App.java index 27682e0..31a6e9f 100644 --- a/app/avro/src/main/java/org/kpipe/App.java +++ b/app/avro/src/main/java/org/kpipe/App.java @@ -36,7 +36,6 @@ public class App implements AutoCloseable { private static final Logger LOGGER = System.getLogger(App.class.getName()); private static final String DEFAULT_SCHEMA_REGISTRY_URL = "http://schema-registry:8081"; - private final AtomicLong startTime = new AtomicLong(System.currentTimeMillis()); private final KPipeConsumer functionalConsumer; private final ConsumerRunner> runner; private final HttpHealthServer healthServer; diff --git a/lib/src/main/java/org/kpipe/registry/MessageProcessorRegistry.java b/lib/src/main/java/org/kpipe/registry/MessageProcessorRegistry.java index fb65376..c8b4598 100644 --- a/lib/src/main/java/org/kpipe/registry/MessageProcessorRegistry.java +++ b/lib/src/main/java/org/kpipe/registry/MessageProcessorRegistry.java @@ -61,33 +61,31 @@ public Map getMetrics() { return metrics; } - public UnaryOperator wrapOperator(final UnaryOperator operator) { - return input -> { - final var start = System.nanoTime(); - try { - final var result = operator.apply(input); - invocationCount.increment(); - totalProcessingTimeNs.add(System.nanoTime() - start); - return result; - } catch (final Exception e) { - errorCount.increment(); - throw e; - } - }; + @SuppressWarnings("unchecked") + public V apply(final V input) { + final var start = System.nanoTime(); + try { + final var result = ((UnaryOperator) value).apply(input); + invocationCount.increment(); + totalProcessingTimeNs.add(System.nanoTime() - start); + return result; + } catch (final Exception e) { + errorCount.increment(); + throw e; + } } - public MessageSink wrapSink(final MessageSink sink) { - return input -> { - final var start = System.nanoTime(); - try { - sink.accept(input); - invocationCount.increment(); - totalProcessingTimeNs.add(System.nanoTime() - start); - } catch (final Exception e) { - errorCount.increment(); - throw e; - } - }; + @SuppressWarnings("unchecked") + public void accept(final V input) { + final var start = System.nanoTime(); + try { + ((MessageSink) value).accept(input); + invocationCount.increment(); + totalProcessingTimeNs.add(System.nanoTime() - start); + } catch (final Exception e) { + errorCount.increment(); + throw e; + } } } @@ -109,7 +107,7 @@ public MessageSinkRegistry sinkRegistry() { /// Example usage: /// ```java /// final var registry = new MessageProcessorRegistry("my-app"); - /// var pipeline = registry.pipeline(MessageFormat.JSON) + /// final var pipeline = registry.pipeline(MessageFormat.JSON) /// .add(RegistryKey.json("addTimestamp")) /// .build(); /// ``` @@ -150,9 +148,11 @@ public & UnaryOperator> void registerEnum(final Class UnaryOperator wrapOperator(final RegistryKey key, final UnaryOperator operator) { - final var entry = (RegistryEntry>) registryMap.get(key); - if (entry == null) return operator; - return entry.wrapOperator(entry.value); + return input -> { + final var entry = (RegistryEntry>) registryMap.get(key); + if (entry == null) return operator.apply(input); + return entry.apply(input); + }; } /// Wraps a sink with additional functionality, such as metrics collection. @@ -163,13 +163,16 @@ public UnaryOperator wrapOperator(final RegistryKey key, final UnaryOp /// @return The wrapped sink, or the original sink if no wrapping is needed @SuppressWarnings("unchecked") public MessageSink wrapSink(final RegistryKey key, final MessageSink sink) { - final var entry = (RegistryEntry>) registryMap.get(key); - if (entry == null) { - final var registeredSink = (MessageSink) sinkRegistry.get(key); - if (registeredSink != null) return registeredSink; - return sink; - } - return entry.wrapSink(entry.value); + return input -> { + final var entry = (RegistryEntry>) registryMap.get(key); + if (entry != null) { + entry.accept(input); + } else { + final var registeredSink = sinkRegistry.get(key); + if (registeredSink != null) registeredSink.accept(input); + else sink.accept(input); + } + }; } /// Retrieves a typed operator using a type-safe RegistryKey. @@ -179,9 +182,13 @@ public MessageSink wrapSink(final RegistryKey key, final MessageSink UnaryOperator getOperator(final RegistryKey key) { - final var entry = (RegistryEntry>) registryMap.get(key); - if (entry == null) return null; - return entry.value; + return input -> { + final var entry = (RegistryEntry>) registryMap.get(key); + if (entry != null) { + return entry.apply(input); + } + return input; + }; } /// Creates a new registry with JSON as the default message format. diff --git a/lib/src/main/java/org/kpipe/registry/MessageSinkRegistry.java b/lib/src/main/java/org/kpipe/registry/MessageSinkRegistry.java index 9cb9673..301a047 100644 --- a/lib/src/main/java/org/kpipe/registry/MessageSinkRegistry.java +++ b/lib/src/main/java/org/kpipe/registry/MessageSinkRegistry.java @@ -6,6 +6,8 @@ import java.util.Objects; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.LongAdder; + +import org.apache.avro.generic.GenericRecord; import org.kpipe.sink.AvroConsoleSink; import org.kpipe.sink.JsonConsoleSink; import org.kpipe.sink.MessageSink; @@ -34,6 +36,11 @@ public class MessageSinkRegistry { /// Pre-defined key for the Avro logging sink. public static final RegistryKey AVRO_LOGGING = RegistryKey.of("avroLogging", byte[].class); + /// Pre-defined key for the JSON map logging sink. + public static final RegistryKey> JSON_MAP_LOGGING = RegistryKey.json("jsonLogging"); + /// Pre-defined key for the Avro generic record logging sink. + public static final RegistryKey AVRO_GENERIC_LOGGING = RegistryKey.avro("avroLogging"); + private static class SinkEntry { final MessageSink sink; @@ -75,6 +82,8 @@ public void accept(final T processedValue) { public MessageSinkRegistry() { register(JSON_LOGGING, new JsonConsoleSink<>()); register(AVRO_LOGGING, new AvroConsoleSink<>()); + register(JSON_MAP_LOGGING, new JsonConsoleSink<>()); + register(AVRO_GENERIC_LOGGING, new AvroConsoleSink<>()); } /// Registers a new message sink with the specified key. @@ -110,9 +119,12 @@ public void clear() { /// @return the sink, or null if not found @SuppressWarnings("unchecked") public MessageSink get(final RegistryKey key) { - final var entry = (SinkEntry) registry.get(key); - if (entry == null) return null; - return entry::accept; + return value -> { + final var entry = (SinkEntry) registry.get(key); + if (entry != null) { + entry.accept(value); + } + }; } /// Creates a composite sink that sends objects to multiple sinks identified by keys. diff --git a/lib/src/main/java/org/kpipe/registry/TypedPipelineBuilder.java b/lib/src/main/java/org/kpipe/registry/TypedPipelineBuilder.java index feac9de..9a16823 100644 --- a/lib/src/main/java/org/kpipe/registry/TypedPipelineBuilder.java +++ b/lib/src/main/java/org/kpipe/registry/TypedPipelineBuilder.java @@ -1,5 +1,7 @@ package org.kpipe.registry; +import java.lang.System.Logger; +import java.lang.System.Logger.Level; import java.util.ArrayList; import java.util.List; import java.util.Objects; @@ -12,6 +14,7 @@ /// @param The type of the object in the pipeline. public final class TypedPipelineBuilder { + private static final Logger LOGGER = System.getLogger(TypedPipelineBuilder.class.getName()); private final MessageFormat format; private final List> operators = new ArrayList<>(); private final MessageProcessorRegistry registry; @@ -96,7 +99,9 @@ public TypedPipelineBuilder toSink(MessageSink sink) { /// @param key The registry key for the sink. /// @return This builder. public TypedPipelineBuilder toSink(RegistryKey key) { - return toSink(registry.wrapSink(key, t -> {})); + return toSink(registry.wrapSink(key, t -> { + LOGGER.log(Level.WARNING, "No sink found in registry for key: {0}", key); + })); } /// Composes a sequence of registry keys into a single sink. From a842f6888954d671f75e4a316ee21b187e68cbd3 Mon Sep 17 00:00:00 2001 From: mariano Date: Wed, 1 Apr 2026 06:56:35 +0900 Subject: [PATCH 05/14] refactor: update registry sink keys to use typed variants and streamline benchmark pipelines - Replace byte[] sink keys with typed RegistryKey variants for JSON and Avro sinks - Remove redundant sink keys and registrations from MessageSinkRegistry - Update benchmarks and App to use new typed pipeline and sink APIs - Refactor manual processing in benchmarks to use MessageFormat for serialization/deserialization - Minor cleanup in TypedPipelineBuilder and test assertions --- app/avro/src/main/java/org/kpipe/App.java | 5 +- app/json/src/main/java/org/kpipe/App.java | 3 +- .../java/org/kpipe/AppIntegrationTest.java | 17 ++++-- app/protobuf/src/main/java/org/kpipe/App.java | 5 +- .../benchmarks/AvroPipelineBenchmark.java | 29 ++++----- .../benchmarks/JsonPipelineBenchmark.java | 61 ++++++++++--------- ...llelProcessingBenchmarkInfrastructure.java | 3 +- .../kpipe/registry/MessageSinkRegistry.java | 20 ++---- .../kpipe/registry/TypedPipelineBuilder.java | 14 +++-- .../registry/MessageSinkRegistryTest.java | 4 +- 10 files changed, 80 insertions(+), 81 deletions(-) diff --git a/app/avro/src/main/java/org/kpipe/App.java b/app/avro/src/main/java/org/kpipe/App.java index 31a6e9f..d93caa6 100644 --- a/app/avro/src/main/java/org/kpipe/App.java +++ b/app/avro/src/main/java/org/kpipe/App.java @@ -7,7 +7,6 @@ import java.util.Map; import java.util.Queue; import java.util.concurrent.ConcurrentLinkedQueue; -import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicReference; import java.util.function.Function; import org.apache.kafka.clients.consumer.Consumer; @@ -156,7 +155,9 @@ private static Function, OffsetManager> /// /// @param registry the message sink registry /// @return a message sink that processes messages through the pipeline - private static MessageSink createSinksPipeline(final MessageSinkRegistry registry) { + private static MessageSink createSinksPipeline( + final MessageSinkRegistry registry + ) { return registry.pipeline(MessageSinkRegistry.AVRO_LOGGING); } diff --git a/app/json/src/main/java/org/kpipe/App.java b/app/json/src/main/java/org/kpipe/App.java index 973469e..21ee57c 100644 --- a/app/json/src/main/java/org/kpipe/App.java +++ b/app/json/src/main/java/org/kpipe/App.java @@ -10,7 +10,6 @@ import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicReference; import java.util.function.Function; -import java.util.function.UnaryOperator; import org.apache.kafka.clients.consumer.Consumer; import org.kpipe.config.AppConfig; import org.kpipe.config.KafkaConsumerConfig; @@ -140,7 +139,7 @@ private static Function, OffsetManager> /// /// @param registry the message sink registry /// @return a message sink that processes messages through the pipeline - private static MessageSink createSinksPipeline(final MessageSinkRegistry registry) { + private static MessageSink> createSinksPipeline(final MessageSinkRegistry registry) { return registry.pipeline(MessageSinkRegistry.JSON_LOGGING); } diff --git a/app/json/src/test/java/org/kpipe/AppIntegrationTest.java b/app/json/src/test/java/org/kpipe/AppIntegrationTest.java index 55e5cf6..e733d32 100644 --- a/app/json/src/test/java/org/kpipe/AppIntegrationTest.java +++ b/app/json/src/test/java/org/kpipe/AppIntegrationTest.java @@ -2,7 +2,6 @@ import static org.junit.jupiter.api.Assertions.*; -import com.dslplatform.json.DslJson; import java.lang.System.Logger; import java.lang.System.Logger.Level; import java.nio.charset.StandardCharsets; @@ -19,7 +18,6 @@ import org.junit.jupiter.api.Test; import org.kpipe.config.AppConfig; import org.kpipe.processor.JsonMessageProcessor; -import org.kpipe.registry.MessageSinkRegistry; import org.kpipe.registry.RegistryKey; import org.kpipe.sink.MessageSink; import org.testcontainers.junit.jupiter.Container; @@ -62,9 +60,18 @@ void testJsonAppEndToEnd() throws Exception { System.out.println("[DEBUG_LOG] Registered sink is capturing: " + (sink != null)); // Set up the processor registry - app.getProcessorRegistry().registerOperator(RegistryKey.json("addSource"), JsonMessageProcessor.addFieldOperator("source", "test-app")); - app.getProcessorRegistry().registerOperator(RegistryKey.json("markProcessed"), JsonMessageProcessor.addFieldOperator("status", "processed")); - app.getProcessorRegistry().registerOperator(RegistryKey.json("addTimestamp"), JsonMessageProcessor.addTimestampOperator("processedAt")); + app + .getProcessorRegistry() + .registerOperator(RegistryKey.json("addSource"), JsonMessageProcessor.addFieldOperator("source", "test-app")); + app + .getProcessorRegistry() + .registerOperator( + RegistryKey.json("markProcessed"), + JsonMessageProcessor.addFieldOperator("status", "processed") + ); + app + .getProcessorRegistry() + .registerOperator(RegistryKey.json("addTimestamp"), JsonMessageProcessor.addTimestampOperator("processedAt")); // Start the app in a virtual thread final var appThread = Thread.ofVirtual().start(() -> { diff --git a/app/protobuf/src/main/java/org/kpipe/App.java b/app/protobuf/src/main/java/org/kpipe/App.java index 5acffa4..37ba553 100644 --- a/app/protobuf/src/main/java/org/kpipe/App.java +++ b/app/protobuf/src/main/java/org/kpipe/App.java @@ -26,7 +26,6 @@ import org.kpipe.registry.MessageFormat; import org.kpipe.registry.MessageProcessorRegistry; import org.kpipe.registry.MessageSinkRegistry; -import org.kpipe.sink.MessageSink; /// Application that consumes messages from a Kafka topic and processes them using a configurable /// pipeline of message processors. @@ -138,9 +137,7 @@ private static Function, OffsetManager> /// @param registry the message processor registry /// @return a function that processes messages through the pipeline private static UnaryOperator createProtobufProcessorPipeline(final MessageProcessorRegistry registry) { - return registry - .pipeline(MessageFormat.PROTOBUF) - .build(); + return registry.pipeline(MessageFormat.PROTOBUF).build(); } /// Gets the processor registry used by this application. diff --git a/benchmarks/src/jmh/java/org/kpipe/benchmarks/AvroPipelineBenchmark.java b/benchmarks/src/jmh/java/org/kpipe/benchmarks/AvroPipelineBenchmark.java index ec83863..724eca6 100644 --- a/benchmarks/src/jmh/java/org/kpipe/benchmarks/AvroPipelineBenchmark.java +++ b/benchmarks/src/jmh/java/org/kpipe/benchmarks/AvroPipelineBenchmark.java @@ -10,6 +10,8 @@ import org.apache.avro.generic.GenericDatumWriter; import org.apache.avro.io.EncoderFactory; import org.kpipe.processor.AvroMessageProcessor; +import org.kpipe.registry.AvroFormat; +import org.kpipe.registry.MessageFormat; import org.kpipe.registry.MessageProcessorRegistry; import org.kpipe.registry.RegistryKey; import org.openjdk.jmh.annotations.*; @@ -78,8 +80,10 @@ final var record = new GenericData.Record(schema); avroWithMagicBytes[4] = 1; System.arraycopy(avroBytes, 0, avroWithMagicBytes, 5, avroBytes.length); - final var registry = new MessageProcessorRegistry("benchmark-app", org.kpipe.registry.MessageFormat.AVRO); - registry.addSchema("user", "com.kpipe.User", schemaJson); + final var registry = new MessageProcessorRegistry("benchmark-app", MessageFormat.AVRO); + final var format = (AvroFormat) MessageFormat.AVRO; + format.addSchema("user", "com.kpipe.User", schemaJson); + format.withDefaultSchema("user"); // Register operators final var op1 = RegistryKey.avro("op1"); @@ -88,8 +92,8 @@ final var record = new GenericData.Record(schema); registry.registerOperator(op1, AvroMessageProcessor.addFieldOperator("processed", true)); registry.registerOperator(op2, AvroMessageProcessor.addFieldOperator("name", "PROCESSED")); - kpipePipeline = registry.avroPipelineBuilder("user").add(op1).add(op2).build(); - kpipeMagicPipeline = registry.avroPipelineBuilder("user", 5).add(op1).add(op2).build(); + kpipePipeline = registry.pipeline(format).add(op1).add(op2).build(); + kpipeMagicPipeline = registry.pipeline(format).skipBytes(5).add(op1).add(op2).build(); } @Benchmark @@ -107,15 +111,12 @@ public void kpipeAvroMagicPipeline(final Blackhole bh) { public void manualAvroMagicHandling(final Blackhole bh) { // This mimics the manual way of handling magic bytes with copying final var stripped = Arrays.copyOfRange(avroWithMagicBytes, 5, avroWithMagicBytes.length); - final var result = AvroMessageProcessor.processAvro( - stripped, - schema, - record -> { - record.put("processed", true); - record.put("name", "PROCESSED"); - return record; - } - ); - bh.consume(result); + final var format = (AvroFormat) MessageFormat.AVRO; + final var record = format.deserialize(stripped); + if (record != null) { + record.put("processed", true); + record.put("name", "PROCESSED"); + } + bh.consume(format.serialize(record)); } } diff --git a/benchmarks/src/jmh/java/org/kpipe/benchmarks/JsonPipelineBenchmark.java b/benchmarks/src/jmh/java/org/kpipe/benchmarks/JsonPipelineBenchmark.java index c2eb3ec..17b4283 100644 --- a/benchmarks/src/jmh/java/org/kpipe/benchmarks/JsonPipelineBenchmark.java +++ b/benchmarks/src/jmh/java/org/kpipe/benchmarks/JsonPipelineBenchmark.java @@ -3,7 +3,7 @@ import java.nio.charset.StandardCharsets; import java.util.concurrent.TimeUnit; import java.util.function.Function; -import org.kpipe.processor.JsonMessageProcessor; +import org.kpipe.registry.MessageFormat; import org.kpipe.registry.MessageProcessorRegistry; import org.kpipe.registry.RegistryKey; import org.openjdk.jmh.annotations.*; @@ -54,7 +54,7 @@ public void setup() { StandardCharsets.UTF_8 ); - final var registry = new MessageProcessorRegistry("benchmark-app"); + final var registry = new MessageProcessorRegistry("benchmark-app", MessageFormat.JSON); // Register some operators final var op1 = RegistryKey.json("op1"); final var op2 = RegistryKey.json("op2"); @@ -82,7 +82,7 @@ public void setup() { } ); - kpipePipeline = registry.jsonPipelineBuilder().add(op1).add(op2).add(op3).build(); + kpipePipeline = registry.pipeline(MessageFormat.JSON).add(op1).add(op2).add(op3).build(); } @Benchmark @@ -93,27 +93,29 @@ public void kpipeJsonPipeline(final Blackhole bh) { @Benchmark public void manualJsonSerDeChained(final Blackhole bh) { // This mimics the "bad" way of chaining byte-to-byte functions - final var step1 = JsonMessageProcessor.processJson( - jsonBytes, - map -> { - map.put("processed_by", "manual"); - return map; - } - ); - final var step2 = JsonMessageProcessor.processJson( - step1, - map -> { - map.put("timestamp", BENCHMARK_TIMESTAMP); - return map; - } - ); - final var step3 = JsonMessageProcessor.processJson( - step2, - map -> { - map.remove("email"); - return map; - } - ); + final var format = MessageFormat.JSON; + + // Step 1 + final var map1 = format.deserialize(jsonBytes); + if (map1 != null) { + map1.put("processed_by", "manual"); + } + final var step1 = format.serialize(map1); + + // Step 2 + final var map2 = format.deserialize(step1); + if (map2 != null) { + map2.put("timestamp", BENCHMARK_TIMESTAMP); + } + final var step2 = format.serialize(map2); + + // Step 3 + final var map3 = format.deserialize(step2); + if (map3 != null) { + map3.remove("email"); + } + final var step3 = format.serialize(map3); + bh.consume(step3); } @@ -127,15 +129,14 @@ public void manualJsonSingleSerDe(final Blackhole bh) { // 2. Logic // 3. Serialization - final var result = JsonMessageProcessor.processJson( - jsonBytes, - map -> { + final var format = MessageFormat.JSON; + final var map = format.deserialize(jsonBytes); + if (map != null) { map.put("processed_by", "manual"); map.put("timestamp", BENCHMARK_TIMESTAMP); map.remove("email"); - return map; - } - ); + } + final var result = format.serialize(map); bh.consume(result); } } diff --git a/benchmarks/src/jmh/java/org/kpipe/benchmarks/ParallelProcessingBenchmarkInfrastructure.java b/benchmarks/src/jmh/java/org/kpipe/benchmarks/ParallelProcessingBenchmarkInfrastructure.java index 223da53..5f8b5f8 100644 --- a/benchmarks/src/jmh/java/org/kpipe/benchmarks/ParallelProcessingBenchmarkInfrastructure.java +++ b/benchmarks/src/jmh/java/org/kpipe/benchmarks/ParallelProcessingBenchmarkInfrastructure.java @@ -190,8 +190,7 @@ public void setup(final KafkaContext kafkaContext) { .builder() .withProperties(kpipeProps) .withTopic(TOPIC) - .withMessageSink((record, processedValue) -> {}) - .withProcessor(val -> { + .withPipeline(val -> { processedCount.incrementAndGet(); return val; }) diff --git a/lib/src/main/java/org/kpipe/registry/MessageSinkRegistry.java b/lib/src/main/java/org/kpipe/registry/MessageSinkRegistry.java index 301a047..7750b5f 100644 --- a/lib/src/main/java/org/kpipe/registry/MessageSinkRegistry.java +++ b/lib/src/main/java/org/kpipe/registry/MessageSinkRegistry.java @@ -6,7 +6,6 @@ import java.util.Objects; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.LongAdder; - import org.apache.avro.generic.GenericRecord; import org.kpipe.sink.AvroConsoleSink; import org.kpipe.sink.JsonConsoleSink; @@ -31,15 +30,10 @@ public class MessageSinkRegistry { private static final Logger LOGGER = System.getLogger(MessageSinkRegistry.class.getName()); private final ConcurrentHashMap, SinkEntry> registry = new ConcurrentHashMap<>(); - /// Pre-defined key for the JSON logging sink. - public static final RegistryKey JSON_LOGGING = RegistryKey.of("jsonLogging", byte[].class); - /// Pre-defined key for the Avro logging sink. - public static final RegistryKey AVRO_LOGGING = RegistryKey.of("avroLogging", byte[].class); - - /// Pre-defined key for the JSON map logging sink. - public static final RegistryKey> JSON_MAP_LOGGING = RegistryKey.json("jsonLogging"); - /// Pre-defined key for the Avro generic record logging sink. - public static final RegistryKey AVRO_GENERIC_LOGGING = RegistryKey.avro("avroLogging"); + /// Pre-defined key for the JSON logging sink (Map based). + public static final RegistryKey> JSON_LOGGING = RegistryKey.json("jsonLogging"); + /// Pre-defined key for the Avro logging sink (GenericRecord based). + public static final RegistryKey AVRO_LOGGING = RegistryKey.avro("avroLogging"); private static class SinkEntry { @@ -82,8 +76,6 @@ public void accept(final T processedValue) { public MessageSinkRegistry() { register(JSON_LOGGING, new JsonConsoleSink<>()); register(AVRO_LOGGING, new AvroConsoleSink<>()); - register(JSON_MAP_LOGGING, new JsonConsoleSink<>()); - register(AVRO_GENERIC_LOGGING, new AvroConsoleSink<>()); } /// Registers a new message sink with the specified key. @@ -121,9 +113,7 @@ public void clear() { public MessageSink get(final RegistryKey key) { return value -> { final var entry = (SinkEntry) registry.get(key); - if (entry != null) { - entry.accept(value); - } + if (entry != null) entry.accept(value); }; } diff --git a/lib/src/main/java/org/kpipe/registry/TypedPipelineBuilder.java b/lib/src/main/java/org/kpipe/registry/TypedPipelineBuilder.java index 9a16823..e09fa36 100644 --- a/lib/src/main/java/org/kpipe/registry/TypedPipelineBuilder.java +++ b/lib/src/main/java/org/kpipe/registry/TypedPipelineBuilder.java @@ -45,7 +45,7 @@ public TypedPipelineBuilder skipBytes(int skipBytes) { /// /// @param operator The operator to add. /// @return This builder. - public TypedPipelineBuilder add(UnaryOperator operator) { + public TypedPipelineBuilder add(final UnaryOperator operator) { operators.add(Objects.requireNonNull(operator, "operator cannot be null")); return this; } @@ -54,7 +54,7 @@ public TypedPipelineBuilder add(UnaryOperator operator) { /// /// @param key The registry key for the operator. /// @return This builder. - public TypedPipelineBuilder add(RegistryKey key) { + public TypedPipelineBuilder add(final RegistryKey key) { final var operator = registry.getOperator(key); return add(registry.wrapOperator(key, operator != null ? operator : t -> t)); } @@ -64,7 +64,7 @@ public TypedPipelineBuilder add(RegistryKey key) { /// @param keys The registry keys for the operators. /// @return This builder. @SafeVarargs - public final TypedPipelineBuilder add(RegistryKey... keys) { + public final TypedPipelineBuilder add(final RegistryKey... keys) { for (final var key : keys) { add(key); } @@ -99,9 +99,11 @@ public TypedPipelineBuilder toSink(MessageSink sink) { /// @param key The registry key for the sink. /// @return This builder. public TypedPipelineBuilder toSink(RegistryKey key) { - return toSink(registry.wrapSink(key, t -> { - LOGGER.log(Level.WARNING, "No sink found in registry for key: {0}", key); - })); + return toSink( + registry.wrapSink(key, t -> { + LOGGER.log(Level.WARNING, "No sink found in registry for key: {0}", key); + }) + ); } /// Composes a sequence of registry keys into a single sink. diff --git a/lib/src/test/java/org/kpipe/registry/MessageSinkRegistryTest.java b/lib/src/test/java/org/kpipe/registry/MessageSinkRegistryTest.java index 42413ec..0a23959 100644 --- a/lib/src/test/java/org/kpipe/registry/MessageSinkRegistryTest.java +++ b/lib/src/test/java/org/kpipe/registry/MessageSinkRegistryTest.java @@ -18,8 +18,10 @@ void setUp() { @Test void shouldHaveDefaultConsoleSink() { - // Assert + // Act final var allSinks = registry.getAll(); + + // Assert assertTrue(allSinks.containsKey(MessageSinkRegistry.AVRO_LOGGING)); assertTrue(allSinks.containsKey(MessageSinkRegistry.JSON_LOGGING)); assertTrue(allSinks.get(MessageSinkRegistry.AVRO_LOGGING).contains("AvroConsoleSink")); From 48bac3df123804b21bc9ce5245c891abd799aaee Mon Sep 17 00:00:00 2001 From: mariano Date: Wed, 1 Apr 2026 07:07:11 +0900 Subject: [PATCH 06/14] refactor: streamline registry key factories and pipeline builder operator/sink addition - Remove redundant sink key factory from RegistryKey and clarify type signatures for json/avro helpers - Simplify TypedPipelineBuilder operator addition to support varargs and direct registry lookup - Refactor sink addition in TypedPipelineBuilder to allow multiple sinks and remove unnecessary logging - Update usage in App and benchmarks to use new registry key helpers and streamlined pipeline builder API - Improve documentation for clarity and consistency --- app/avro/src/main/java/org/kpipe/App.java | 2 +- app/json/src/main/java/org/kpipe/App.java | 4 +- .../benchmarks/AvroPipelineBenchmark.java | 4 +- .../benchmarks/JsonPipelineBenchmark.java | 2 +- .../registry/MessageProcessorRegistry.java | 24 +++++---- .../java/org/kpipe/registry/RegistryKey.java | 36 +++++-------- .../kpipe/registry/TypedPipelineBuilder.java | 53 +++++++------------ 7 files changed, 50 insertions(+), 75 deletions(-) diff --git a/app/avro/src/main/java/org/kpipe/App.java b/app/avro/src/main/java/org/kpipe/App.java index d93caa6..c9562bc 100644 --- a/app/avro/src/main/java/org/kpipe/App.java +++ b/app/avro/src/main/java/org/kpipe/App.java @@ -180,7 +180,7 @@ private static java.util.function.UnaryOperator createAvroProcessorPipel final var builder = registry.pipeline(avroFormat); builder.skipBytes(5); for (final var name : config.processors()) builder.add(RegistryKey.avro(name)); - builder.toSink(RegistryKey.of("avroLogging", org.apache.avro.generic.GenericRecord.class)); + builder.toSink(RegistryKey.avro("avroLogging")); return builder.build(); } diff --git a/app/json/src/main/java/org/kpipe/App.java b/app/json/src/main/java/org/kpipe/App.java index 21ee57c..59412c0 100644 --- a/app/json/src/main/java/org/kpipe/App.java +++ b/app/json/src/main/java/org/kpipe/App.java @@ -63,7 +63,7 @@ public App(final AppConfig config) { this.processorRegistry = new MessageProcessorRegistry(config.appName(), MessageFormat.JSON); this.sinkRegistry = processorRegistry.sinkRegistry(); // Pre-register loggers - sinkRegistry.register(RegistryKey.of("jsonLogging", Map.class), new org.kpipe.sink.JsonConsoleSink<>()); + sinkRegistry.register(RegistryKey.json("jsonLogging"), new org.kpipe.sink.JsonConsoleSink<>()); this.kpipeConsumer = createConsumer(config, processorRegistry); final var consumerMetricsReporter = ConsumerMetricsReporter.forConsumer(kpipeConsumer::getMetrics); @@ -154,7 +154,7 @@ private static java.util.function.UnaryOperator createJsonProcessorPipel ) { final var builder = registry.pipeline(MessageFormat.JSON); for (final var name : config.processors()) builder.add(RegistryKey.json(name)); - builder.toSink(RegistryKey.of("jsonLogging", Map.class)); + builder.toSink(RegistryKey.json("jsonLogging")); return builder.build(); } diff --git a/benchmarks/src/jmh/java/org/kpipe/benchmarks/AvroPipelineBenchmark.java b/benchmarks/src/jmh/java/org/kpipe/benchmarks/AvroPipelineBenchmark.java index 724eca6..bc9fa15 100644 --- a/benchmarks/src/jmh/java/org/kpipe/benchmarks/AvroPipelineBenchmark.java +++ b/benchmarks/src/jmh/java/org/kpipe/benchmarks/AvroPipelineBenchmark.java @@ -92,8 +92,8 @@ final var record = new GenericData.Record(schema); registry.registerOperator(op1, AvroMessageProcessor.addFieldOperator("processed", true)); registry.registerOperator(op2, AvroMessageProcessor.addFieldOperator("name", "PROCESSED")); - kpipePipeline = registry.pipeline(format).add(op1).add(op2).build(); - kpipeMagicPipeline = registry.pipeline(format).skipBytes(5).add(op1).add(op2).build(); + kpipePipeline = registry.pipeline(format).add(op1, op2).build(); + kpipeMagicPipeline = registry.pipeline(format).skipBytes(5).add(op1, op2).build(); } @Benchmark diff --git a/benchmarks/src/jmh/java/org/kpipe/benchmarks/JsonPipelineBenchmark.java b/benchmarks/src/jmh/java/org/kpipe/benchmarks/JsonPipelineBenchmark.java index 17b4283..ae0040d 100644 --- a/benchmarks/src/jmh/java/org/kpipe/benchmarks/JsonPipelineBenchmark.java +++ b/benchmarks/src/jmh/java/org/kpipe/benchmarks/JsonPipelineBenchmark.java @@ -82,7 +82,7 @@ public void setup() { } ); - kpipePipeline = registry.pipeline(MessageFormat.JSON).add(op1).add(op2).add(op3).build(); + kpipePipeline = registry.pipeline(MessageFormat.JSON).add(op1, op2, op3).build(); } @Benchmark diff --git a/lib/src/main/java/org/kpipe/registry/MessageProcessorRegistry.java b/lib/src/main/java/org/kpipe/registry/MessageProcessorRegistry.java index c8b4598..749760b 100644 --- a/lib/src/main/java/org/kpipe/registry/MessageProcessorRegistry.java +++ b/lib/src/main/java/org/kpipe/registry/MessageProcessorRegistry.java @@ -157,29 +157,31 @@ public UnaryOperator wrapOperator(final RegistryKey key, final UnaryOp /// Wraps a sink with additional functionality, such as metrics collection. /// - /// @param The type of data the sink processes - /// @param key The type-safe key to retrieve - /// @param sink The sink to wrap - /// @return The wrapped sink, or the original sink if no wrapping is needed + /// @param The type of data the sink processes. + /// @param key The type-safe key to retrieve. + /// @return The wrapped sink. @SuppressWarnings("unchecked") - public MessageSink wrapSink(final RegistryKey key, final MessageSink sink) { + public MessageSink wrapSink(final RegistryKey key) { return input -> { final var entry = (RegistryEntry>) registryMap.get(key); if (entry != null) { entry.accept(input); } else { final var registeredSink = sinkRegistry.get(key); - if (registeredSink != null) registeredSink.accept(input); - else sink.accept(input); + if (registeredSink != null) { + registeredSink.accept(input); + } else { + LOGGER.log(Logger.Level.WARNING, "No sink found in registry for key: {0}", key); + } } }; } - /// Retrieves a typed operator using a type-safe RegistryKey. + /// Retrieves a typed operator from the registry. /// - /// @param The type of data the operator processes - /// @param key The type-safe key to retrieve - /// @return The registered operator, or null if not found + /// @param The type of data the operator processes. + /// @param key The type-safe key to retrieve. + /// @return The registered operator, or a no-op operator if not found. @SuppressWarnings("unchecked") public UnaryOperator getOperator(final RegistryKey key) { return input -> { diff --git a/lib/src/main/java/org/kpipe/registry/RegistryKey.java b/lib/src/main/java/org/kpipe/registry/RegistryKey.java index c5028a3..28dd4eb 100644 --- a/lib/src/main/java/org/kpipe/registry/RegistryKey.java +++ b/lib/src/main/java/org/kpipe/registry/RegistryKey.java @@ -20,38 +20,28 @@ public record RegistryKey(String name, Class type) { /// Creates a type-safe registry key for a given type. /// - /// @param The type of data the key refers to - /// @param name The unique name of the registry entry - /// @param type The class representing the type - /// @return A new type-safe RegistryKey - @SuppressWarnings("unchecked") - public static RegistryKey of(final String name, final Class type) { - return new RegistryKey<>(name, (Class) type); + /// @param The type of data the key refers to. + /// @param name The unique name of the registry entry. + /// @param type The class representing the type. + /// @return A new type-safe RegistryKey. + public static RegistryKey of(final String name, final Class type) { + return new RegistryKey<>(name, type); } - /// Convenience factory for JSON-like map keys (Map). + /// Convenience factory for JSON-like map keys (`Map`). /// - /// @param name The unique name of the registry entry - /// @return A new RegistryKey for JSON map data + /// @param name The unique name of the registry entry. + /// @return A new RegistryKey for JSON map data. + @SuppressWarnings("unchecked") public static RegistryKey> json(final String name) { - return of(name, Map.class); + return of(name, (Class>) (Class) Map.class); } /// Convenience factory for Avro GenericRecord keys. /// - /// @param name The unique name of the registry entry - /// @return A new RegistryKey for Avro GenericRecord data + /// @param name The unique name of the registry entry. + /// @return A new RegistryKey for Avro GenericRecord data. public static RegistryKey avro(final String name) { return of(name, GenericRecord.class); } - - /// Convenience factory for sink keys. - /// - /// @param The type of the processed object the sink accepts - /// @param name The unique name of the sink - /// @param type The class representing the type - /// @return A new type-safe RegistryKey for a sink - public static RegistryKey sink(final String name, final Class type) { - return new RegistryKey<>(name, type); - } } diff --git a/lib/src/main/java/org/kpipe/registry/TypedPipelineBuilder.java b/lib/src/main/java/org/kpipe/registry/TypedPipelineBuilder.java index e09fa36..fe86539 100644 --- a/lib/src/main/java/org/kpipe/registry/TypedPipelineBuilder.java +++ b/lib/src/main/java/org/kpipe/registry/TypedPipelineBuilder.java @@ -1,7 +1,5 @@ package org.kpipe.registry; -import java.lang.System.Logger; -import java.lang.System.Logger.Level; import java.util.ArrayList; import java.util.List; import java.util.Objects; @@ -14,7 +12,6 @@ /// @param The type of the object in the pipeline. public final class TypedPipelineBuilder { - private static final Logger LOGGER = System.getLogger(TypedPipelineBuilder.class.getName()); private final MessageFormat format; private final List> operators = new ArrayList<>(); private final MessageProcessorRegistry registry; @@ -50,23 +47,14 @@ public TypedPipelineBuilder add(final UnaryOperator operator) { return this; } - /// Adds a transformation operator from the registry. - /// - /// @param key The registry key for the operator. - /// @return This builder. - public TypedPipelineBuilder add(final RegistryKey key) { - final var operator = registry.getOperator(key); - return add(registry.wrapOperator(key, operator != null ? operator : t -> t)); - } - - /// Adds multiple transformation operators from the registry. + /// Adds transformation operators from the registry. /// /// @param keys The registry keys for the operators. /// @return This builder. @SafeVarargs public final TypedPipelineBuilder add(final RegistryKey... keys) { for (final var key : keys) { - add(key); + add(registry.getOperator(key)); } return this; } @@ -90,29 +78,28 @@ public TypedPipelineBuilder when(Predicate condition, UnaryOperator ifT /// @param sink The sink to add. /// @return This builder. public TypedPipelineBuilder toSink(MessageSink sink) { - this.sink = Objects.requireNonNull(sink, "sink cannot be null"); + if (this.sink == null) { + this.sink = Objects.requireNonNull(sink, "sink cannot be null"); + } else { + final var currentSink = this.sink; + this.sink = value -> { + currentSink.accept(value); + sink.accept(value); + }; + } return this; } - /// Sets a terminal sink for the pipeline from the registry. - /// - /// @param key The registry key for the sink. - /// @return This builder. - public TypedPipelineBuilder toSink(RegistryKey key) { - return toSink( - registry.wrapSink(key, t -> { - LOGGER.log(Level.WARNING, "No sink found in registry for key: {0}", key); - }) - ); - } - - /// Composes a sequence of registry keys into a single sink. + /// Sets terminal sinks for the pipeline from the registry. /// /// @param sinkKeys The registry keys for the sinks. /// @return This builder. @SafeVarargs public final TypedPipelineBuilder toSink(RegistryKey... sinkKeys) { - return toSink(registry.sinkRegistry().pipeline(sinkKeys)); + for (final var key : sinkKeys) { + toSink(registry.wrapSink(key)); + } + return this; } /// Builds the [MessagePipeline]. @@ -147,17 +134,13 @@ public T deserialize(byte[] data) { @Override public byte[] serialize(T data) { - if (data == null) { - return null; - } + if (data == null) return null; return format.serialize(data); } @Override public T process(T data) { - if (data == null) { - return null; - } + if (data == null) return null; var current = data; for (final var operator : pipelineOperators) { current = operator.apply(current); From a800639fad754129b28a14a15a6eb8d3d4e46e26 Mon Sep 17 00:00:00 2001 From: mariano Date: Wed, 1 Apr 2026 07:15:56 +0900 Subject: [PATCH 07/14] refactor: remove redundant wrapping logic and improve sink retrieval error handling --- .../registry/MessageProcessorRegistry.java | 48 ++----------------- .../kpipe/registry/MessageSinkRegistry.java | 6 ++- .../kpipe/registry/TypedPipelineBuilder.java | 6 +-- 3 files changed, 10 insertions(+), 50 deletions(-) diff --git a/lib/src/main/java/org/kpipe/registry/MessageProcessorRegistry.java b/lib/src/main/java/org/kpipe/registry/MessageProcessorRegistry.java index 749760b..e799344 100644 --- a/lib/src/main/java/org/kpipe/registry/MessageProcessorRegistry.java +++ b/lib/src/main/java/org/kpipe/registry/MessageProcessorRegistry.java @@ -1,6 +1,5 @@ package org.kpipe.registry; -import java.lang.System.Logger; import java.util.Collections; import java.util.Map; import java.util.Objects; @@ -15,18 +14,16 @@ /// This class allows registration, retrieval, and composition of message processors for different /// formats (JSON, Avro, Protobuf, POJO). It supports type-safe pipelines for Kafka message /// processing and provides utilities for building and composing processing chains via -// [TypedPipelineBuilder]. +/// [TypedPipelineBuilder]. /// /// Example usage: /// ```java /// final var registry = new MessageProcessorRegistry("my-app"); /// var pipeline = -// registry.pipeline(MessageFormat.JSON).add(RegistryKey.json("addTimestamp")).build(); +/// registry.pipeline(MessageFormat.JSON).add(RegistryKey.json("addTimestamp")).build(); /// ``` public class MessageProcessorRegistry { - private static final Logger LOGGER = System.getLogger(MessageProcessorRegistry.class.getName()); - /// Pre-defined key for adding a source field to JSON messages. public static final RegistryKey> JSON_ADD_SOURCE = RegistryKey.json("addSource"); /// Pre-defined key for adding a timestamp field to JSON messages. @@ -140,43 +137,6 @@ public & UnaryOperator> void registerEnum(final Class The type of data the operator processes - /// @param key The type-safe key to retrieve - /// @param operator The operator to wrap - /// @return The wrapped operator, or the original operator if no wrapping is needed - @SuppressWarnings("unchecked") - public UnaryOperator wrapOperator(final RegistryKey key, final UnaryOperator operator) { - return input -> { - final var entry = (RegistryEntry>) registryMap.get(key); - if (entry == null) return operator.apply(input); - return entry.apply(input); - }; - } - - /// Wraps a sink with additional functionality, such as metrics collection. - /// - /// @param The type of data the sink processes. - /// @param key The type-safe key to retrieve. - /// @return The wrapped sink. - @SuppressWarnings("unchecked") - public MessageSink wrapSink(final RegistryKey key) { - return input -> { - final var entry = (RegistryEntry>) registryMap.get(key); - if (entry != null) { - entry.accept(input); - } else { - final var registeredSink = sinkRegistry.get(key); - if (registeredSink != null) { - registeredSink.accept(input); - } else { - LOGGER.log(Logger.Level.WARNING, "No sink found in registry for key: {0}", key); - } - } - }; - } - /// Retrieves a typed operator from the registry. /// /// @param The type of data the operator processes. @@ -186,9 +146,7 @@ public MessageSink wrapSink(final RegistryKey key) { public UnaryOperator getOperator(final RegistryKey key) { return input -> { final var entry = (RegistryEntry>) registryMap.get(key); - if (entry != null) { - return entry.apply(input); - } + if (entry != null) return entry.apply(input); return input; }; } diff --git a/lib/src/main/java/org/kpipe/registry/MessageSinkRegistry.java b/lib/src/main/java/org/kpipe/registry/MessageSinkRegistry.java index 7750b5f..e126385 100644 --- a/lib/src/main/java/org/kpipe/registry/MessageSinkRegistry.java +++ b/lib/src/main/java/org/kpipe/registry/MessageSinkRegistry.java @@ -113,7 +113,11 @@ public void clear() { public MessageSink get(final RegistryKey key) { return value -> { final var entry = (SinkEntry) registry.get(key); - if (entry != null) entry.accept(value); + if (entry != null) { + entry.accept(value); + } else { + LOGGER.log(Level.WARNING, "No sink found in registry for key: {0}", key); + } }; } diff --git a/lib/src/main/java/org/kpipe/registry/TypedPipelineBuilder.java b/lib/src/main/java/org/kpipe/registry/TypedPipelineBuilder.java index fe86539..de3918e 100644 --- a/lib/src/main/java/org/kpipe/registry/TypedPipelineBuilder.java +++ b/lib/src/main/java/org/kpipe/registry/TypedPipelineBuilder.java @@ -97,7 +97,7 @@ public TypedPipelineBuilder toSink(MessageSink sink) { @SafeVarargs public final TypedPipelineBuilder toSink(RegistryKey... sinkKeys) { for (final var key : sinkKeys) { - toSink(registry.wrapSink(key)); + toSink(registry.sinkRegistry().get(key)); } return this; } @@ -144,9 +144,7 @@ public T process(T data) { var current = data; for (final var operator : pipelineOperators) { current = operator.apply(current); - if (current == null) { - return null; - } + if (current == null) return null; } return current; } From 4a838a59a18d0ee3f6e8eac83ec2338e3cfcb4a8 Mon Sep 17 00:00:00 2001 From: mariano Date: Wed, 1 Apr 2026 07:18:29 +0900 Subject: [PATCH 08/14] refactor: update integration tests to use typed registry key constructors for sink registration and retrieval --- app/avro/src/test/java/org/kpipe/AppIntegrationTest.java | 2 +- app/json/src/test/java/org/kpipe/AppIntegrationTest.java | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/app/avro/src/test/java/org/kpipe/AppIntegrationTest.java b/app/avro/src/test/java/org/kpipe/AppIntegrationTest.java index 5cd4e38..02d1c5e 100644 --- a/app/avro/src/test/java/org/kpipe/AppIntegrationTest.java +++ b/app/avro/src/test/java/org/kpipe/AppIntegrationTest.java @@ -119,7 +119,7 @@ void testAvroAppEndToEnd() throws Exception { app .getProcessorRegistry() .sinkRegistry() - .register(RegistryKey.of("avroLogging", GenericRecord.class), capturingSink); + .register(RegistryKey.avro("avroLogging"), capturingSink); // Start the app final var appThread = Thread.ofVirtual().start(() -> { diff --git a/app/json/src/test/java/org/kpipe/AppIntegrationTest.java b/app/json/src/test/java/org/kpipe/AppIntegrationTest.java index e733d32..7a5f18c 100644 --- a/app/json/src/test/java/org/kpipe/AppIntegrationTest.java +++ b/app/json/src/test/java/org/kpipe/AppIntegrationTest.java @@ -54,9 +54,9 @@ void testJsonAppEndToEnd() throws Exception { try (final var app = new App(config)) { // Register the capturing sink - app.getProcessorRegistry().sinkRegistry().register(RegistryKey.of("jsonLogging", Map.class), capturingSink); + app.getProcessorRegistry().sinkRegistry().register(RegistryKey.json("jsonLogging"), capturingSink); // Verify registration - final var sink = app.getProcessorRegistry().sinkRegistry().get(RegistryKey.of("jsonLogging", Map.class)); + final var sink = app.getProcessorRegistry().sinkRegistry().get(RegistryKey.json("jsonLogging")); System.out.println("[DEBUG_LOG] Registered sink is capturing: " + (sink != null)); // Set up the processor registry From 587a9495fe15ac88a8e092b4d974a56670f0d974 Mon Sep 17 00:00:00 2001 From: mariano Date: Wed, 1 Apr 2026 07:36:35 +0900 Subject: [PATCH 09/14] refactor: streamline code formatting and simplify control flow in benchmarks and pipeline builder --- .../java/org/kpipe/AppIntegrationTest.java | 5 +- .../benchmarks/AvroPipelineBenchmark.java | 25 +++--- .../benchmarks/JsonPipelineBenchmark.java | 84 ++++++++----------- ...llelProcessingBenchmarkInfrastructure.java | 76 ++++++++--------- .../org/kpipe/consumer/KPipeConsumer.java | 3 +- .../java/org/kpipe/registry/AvroFormat.java | 10 +-- .../kpipe/registry/TypedPipelineBuilder.java | 24 +++--- 7 files changed, 97 insertions(+), 130 deletions(-) diff --git a/app/avro/src/test/java/org/kpipe/AppIntegrationTest.java b/app/avro/src/test/java/org/kpipe/AppIntegrationTest.java index 02d1c5e..62e4d6e 100644 --- a/app/avro/src/test/java/org/kpipe/AppIntegrationTest.java +++ b/app/avro/src/test/java/org/kpipe/AppIntegrationTest.java @@ -116,10 +116,7 @@ void testAvroAppEndToEnd() throws Exception { try (final var app = new App(config, srUrl)) { // Register the capturing sink - app - .getProcessorRegistry() - .sinkRegistry() - .register(RegistryKey.avro("avroLogging"), capturingSink); + app.getProcessorRegistry().sinkRegistry().register(RegistryKey.avro("avroLogging"), capturingSink); // Start the app final var appThread = Thread.ofVirtual().start(() -> { diff --git a/benchmarks/src/jmh/java/org/kpipe/benchmarks/AvroPipelineBenchmark.java b/benchmarks/src/jmh/java/org/kpipe/benchmarks/AvroPipelineBenchmark.java index bc9fa15..96e30c8 100644 --- a/benchmarks/src/jmh/java/org/kpipe/benchmarks/AvroPipelineBenchmark.java +++ b/benchmarks/src/jmh/java/org/kpipe/benchmarks/AvroPipelineBenchmark.java @@ -44,19 +44,18 @@ public class AvroPipelineBenchmark { @Setup public void setup() throws IOException { - String schemaJson = - """ - { - "type": "record", - "name": "User", - "fields": [ - {"name": "id", "type": "long"}, - {"name": "name", "type": "string"}, - {"name": "email", "type": ["null", "string"], "default": null}, - {"name": "processed", "type": "boolean", "default": false} - ] - } - """; + String schemaJson = """ + { + "type": "record", + "name": "User", + "fields": [ + {"name": "id", "type": "long"}, + {"name": "name", "type": "string"}, + {"name": "email", "type": ["null", "string"], "default": null}, + {"name": "processed", "type": "boolean", "default": false} + ] + } + """; schema = new Schema.Parser().parse(schemaJson); final var record = new GenericData.Record(schema); diff --git a/benchmarks/src/jmh/java/org/kpipe/benchmarks/JsonPipelineBenchmark.java b/benchmarks/src/jmh/java/org/kpipe/benchmarks/JsonPipelineBenchmark.java index ae0040d..9d9576d 100644 --- a/benchmarks/src/jmh/java/org/kpipe/benchmarks/JsonPipelineBenchmark.java +++ b/benchmarks/src/jmh/java/org/kpipe/benchmarks/JsonPipelineBenchmark.java @@ -36,23 +36,20 @@ public class JsonPipelineBenchmark { @Setup public void setup() { - jsonBytes = - """ - { - "id": 12345, - "name": "John Doe", - "email": "john.doe@example.com", - "active": true, - "balance": 1250.50, - "tags": ["customer", "premium"], - "metadata": { - "source": "mobile", - "version": "1.2.3" - } - } - """.getBytes( - StandardCharsets.UTF_8 - ); + jsonBytes = """ + { + "id": 12345, + "name": "John Doe", + "email": "john.doe@example.com", + "active": true, + "balance": 1250.50, + "tags": ["customer", "premium"], + "metadata": { + "source": "mobile", + "version": "1.2.3" + } + } + """.getBytes(StandardCharsets.UTF_8); final var registry = new MessageProcessorRegistry("benchmark-app", MessageFormat.JSON); // Register some operators @@ -60,27 +57,18 @@ public void setup() { final var op2 = RegistryKey.json("op2"); final var op3 = RegistryKey.json("op3"); - registry.registerOperator( - op1, - map -> { - map.put("processed_by", "kpipe"); - return map; - } - ); - registry.registerOperator( - op2, - map -> { - map.put("timestamp", BENCHMARK_TIMESTAMP); - return map; - } - ); - registry.registerOperator( - op3, - map -> { - map.remove("email"); - return map; - } - ); + registry.registerOperator(op1, map -> { + map.put("processed_by", "kpipe"); + return map; + }); + registry.registerOperator(op2, map -> { + map.put("timestamp", BENCHMARK_TIMESTAMP); + return map; + }); + registry.registerOperator(op3, map -> { + map.remove("email"); + return map; + }); kpipePipeline = registry.pipeline(MessageFormat.JSON).add(op1, op2, op3).build(); } @@ -94,28 +82,28 @@ public void kpipeJsonPipeline(final Blackhole bh) { public void manualJsonSerDeChained(final Blackhole bh) { // This mimics the "bad" way of chaining byte-to-byte functions final var format = MessageFormat.JSON; - + // Step 1 final var map1 = format.deserialize(jsonBytes); if (map1 != null) { - map1.put("processed_by", "manual"); + map1.put("processed_by", "manual"); } final var step1 = format.serialize(map1); - + // Step 2 final var map2 = format.deserialize(step1); if (map2 != null) { - map2.put("timestamp", BENCHMARK_TIMESTAMP); + map2.put("timestamp", BENCHMARK_TIMESTAMP); } final var step2 = format.serialize(map2); - + // Step 3 final var map3 = format.deserialize(step2); if (map3 != null) { - map3.remove("email"); + map3.remove("email"); } final var step3 = format.serialize(map3); - + bh.consume(step3); } @@ -132,9 +120,9 @@ public void manualJsonSingleSerDe(final Blackhole bh) { final var format = MessageFormat.JSON; final var map = format.deserialize(jsonBytes); if (map != null) { - map.put("processed_by", "manual"); - map.put("timestamp", BENCHMARK_TIMESTAMP); - map.remove("email"); + map.put("processed_by", "manual"); + map.put("timestamp", BENCHMARK_TIMESTAMP); + map.remove("email"); } final var result = format.serialize(map); bh.consume(result); diff --git a/benchmarks/src/jmh/java/org/kpipe/benchmarks/ParallelProcessingBenchmarkInfrastructure.java b/benchmarks/src/jmh/java/org/kpipe/benchmarks/ParallelProcessingBenchmarkInfrastructure.java index 5f8b5f8..1a8b7c9 100644 --- a/benchmarks/src/jmh/java/org/kpipe/benchmarks/ParallelProcessingBenchmarkInfrastructure.java +++ b/benchmarks/src/jmh/java/org/kpipe/benchmarks/ParallelProcessingBenchmarkInfrastructure.java @@ -73,10 +73,10 @@ static void awaitProcessedMessages(final String benchmarkName, final AtomicInteg if (System.nanoTime() >= deadline) { throw new IllegalStateException( "%s timed out waiting for %d messages; processed=%d".formatted( - benchmarkName, - TARGET_MESSAGES, - processedCount.get() - ) + benchmarkName, + TARGET_MESSAGES, + processedCount.get() + ) ); } Thread.onSpinWait(); @@ -138,15 +138,12 @@ private static void seedTopic(final Properties clientProperties) { producerProps.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, ByteArraySerializer.class.getName()); try (final var producer = new KafkaProducer(producerProps)) { - final var value = - """ - { - "id": 12345, - "message": "Benchmark message" - } - """.getBytes( - StandardCharsets.UTF_8 - ); + final var value = """ + { + "id": 12345, + "message": "Benchmark message" + } + """.getBytes(StandardCharsets.UTF_8); for (int i = 0; i < TARGET_MESSAGES; i++) { producer.send(new ProducerRecord<>(TOPIC, value)).get(); @@ -185,17 +182,15 @@ public void setup(final KafkaContext kafkaContext) { final var kpipeProps = kafkaContext.consumerProps("kpipe-group"); kpipeProps.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false"); - consumer = - KPipeConsumer - .builder() - .withProperties(kpipeProps) - .withTopic(TOPIC) - .withPipeline(val -> { - processedCount.incrementAndGet(); - return val; - }) - .withSequentialProcessing(false) - .build(); + consumer = KPipeConsumer.builder() + .withProperties(kpipeProps) + .withTopic(TOPIC) + .withPipeline(val -> { + processedCount.incrementAndGet(); + return val; + }) + .withSequentialProcessing(false) + .build(); } void start() { @@ -232,16 +227,14 @@ public void setup(final KafkaContext kafkaContext) { consumerProps.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false"); kafkaConsumer = new KafkaConsumer<>(consumerProps); - processor = - ParallelStreamProcessor.createEosStreamProcessor( - ParallelConsumerOptions - .builder() - .ordering(ParallelConsumerOptions.ProcessingOrder.UNORDERED) - .maxConcurrency(100) - .ignoreReflectiveAccessExceptionsForAutoCommitDisabledCheck(true) - .consumer(kafkaConsumer) - .build() - ); + processor = ParallelStreamProcessor.createEosStreamProcessor( + ParallelConsumerOptions.builder() + .ordering(ParallelConsumerOptions.ProcessingOrder.UNORDERED) + .maxConcurrency(100) + .ignoreReflectiveAccessExceptionsForAutoCommitDisabledCheck(true) + .consumer(kafkaConsumer) + .build() + ); processor.subscribe(Collections.singletonList(TOPIC)); } @@ -281,14 +274,13 @@ void start() { .setNumControllerNodes(1) .build(); - cluster = - new KafkaClusterTestKit.Builder(nodes) - .setConfigProp("auto.create.topics.enable", "true") - .setConfigProp("offsets.topic.replication.factor", "1") - .setConfigProp("transaction.state.log.replication.factor", "1") - .setConfigProp("transaction.state.log.min.isr", "1") - .setConfigProp("min.insync.replicas", "1") - .build(); + cluster = new KafkaClusterTestKit.Builder(nodes) + .setConfigProp("auto.create.topics.enable", "true") + .setConfigProp("offsets.topic.replication.factor", "1") + .setConfigProp("transaction.state.log.replication.factor", "1") + .setConfigProp("transaction.state.log.min.isr", "1") + .setConfigProp("min.insync.replicas", "1") + .build(); cluster.format(); cluster.startup(); diff --git a/lib/src/main/java/org/kpipe/consumer/KPipeConsumer.java b/lib/src/main/java/org/kpipe/consumer/KPipeConsumer.java index fcdc97b..6852e35 100644 --- a/lib/src/main/java/org/kpipe/consumer/KPipeConsumer.java +++ b/lib/src/main/java/org/kpipe/consumer/KPipeConsumer.java @@ -16,6 +16,7 @@ import org.apache.kafka.common.errors.WakeupException; import org.kpipe.config.AppConfig; import org.kpipe.consumer.enums.ConsumerState; +import org.kpipe.registry.MessagePipeline; import org.kpipe.sink.JsonConsoleSink; import org.kpipe.sink.MessageSink; @@ -815,7 +816,7 @@ private boolean tryProcessRecord(final ConsumerRecord record) { } try { - if (processor instanceof org.kpipe.registry.MessagePipeline typedPipeline) { + if (processor instanceof MessagePipeline typedPipeline) { final var recordValue = (byte[]) record.value(); final var deserialized = typedPipeline.deserialize(recordValue); if (deserialized == null) return false; diff --git a/lib/src/main/java/org/kpipe/registry/AvroFormat.java b/lib/src/main/java/org/kpipe/registry/AvroFormat.java index e9b0cba..7a1147b 100644 --- a/lib/src/main/java/org/kpipe/registry/AvroFormat.java +++ b/lib/src/main/java/org/kpipe/registry/AvroFormat.java @@ -123,15 +123,9 @@ public byte[] serialize(final GenericRecord data) { @Override public GenericRecord deserialize(final byte[] data) { if (data == null || data.length == 0) return null; - if (defaultSchemaKey == null) { - throw new UnsupportedOperationException( - "Avro deserialization requires a default schema key. Use withDefaultSchema()." - ); - } + if (defaultSchemaKey == null) throw new UnsupportedOperationException("Avro deserialization requires a default schema key. Use withDefaultSchema()."); final var schema = AvroMessageProcessor.getSchema(defaultSchemaKey); - if (schema == null) { - throw new IllegalArgumentException("No schema found for key: " + defaultSchemaKey); - } + if (schema == null) throw new IllegalArgumentException("No schema found for key: %s".formatted(defaultSchemaKey)); return AvroMessageProcessor.inScopedCaches(() -> { final var datumReader = new org.apache.avro.generic.GenericDatumReader(schema); final var decoder = org.apache.avro.io.DecoderFactory.get().binaryDecoder(data, null); diff --git a/lib/src/main/java/org/kpipe/registry/TypedPipelineBuilder.java b/lib/src/main/java/org/kpipe/registry/TypedPipelineBuilder.java index de3918e..fd12490 100644 --- a/lib/src/main/java/org/kpipe/registry/TypedPipelineBuilder.java +++ b/lib/src/main/java/org/kpipe/registry/TypedPipelineBuilder.java @@ -53,9 +53,7 @@ public TypedPipelineBuilder add(final UnaryOperator operator) { /// @return This builder. @SafeVarargs public final TypedPipelineBuilder add(final RegistryKey... keys) { - for (final var key : keys) { - add(registry.getOperator(key)); - } + for (final var key : keys) add(registry.getOperator(key)); return this; } @@ -65,7 +63,11 @@ public final TypedPipelineBuilder add(final RegistryKey... keys) { /// @param ifTrue The operator to apply if the condition is true. /// @param ifFalse The operator to apply if the condition is false. /// @return This builder. - public TypedPipelineBuilder when(Predicate condition, UnaryOperator ifTrue, UnaryOperator ifFalse) { + public TypedPipelineBuilder when( + final Predicate condition, + final UnaryOperator ifTrue, + final UnaryOperator ifFalse + ) { Objects.requireNonNull(condition, "condition cannot be null"); Objects.requireNonNull(ifTrue, "ifTrue operator cannot be null"); Objects.requireNonNull(ifFalse, "ifFalse operator cannot be null"); @@ -77,7 +79,7 @@ public TypedPipelineBuilder when(Predicate condition, UnaryOperator ifT /// /// @param sink The sink to add. /// @return This builder. - public TypedPipelineBuilder toSink(MessageSink sink) { + public TypedPipelineBuilder toSink(final MessageSink sink) { if (this.sink == null) { this.sink = Objects.requireNonNull(sink, "sink cannot be null"); } else { @@ -96,9 +98,7 @@ public TypedPipelineBuilder toSink(MessageSink sink) { /// @return This builder. @SafeVarargs public final TypedPipelineBuilder toSink(RegistryKey... sinkKeys) { - for (final var key : sinkKeys) { - toSink(registry.sinkRegistry().get(key)); - } + for (final var key : sinkKeys) toSink(registry.sinkRegistry().get(key)); return this; } @@ -118,13 +118,9 @@ public MessageSink getSink() { @Override public T deserialize(byte[] data) { - if (data == null) { - return null; - } + if (data == null) return null; if (bytesToSkip > 0) { - if (data.length <= bytesToSkip) { - return null; - } + if (data.length <= bytesToSkip) return null; final var actualData = new byte[data.length - bytesToSkip]; System.arraycopy(data, bytesToSkip, actualData, 0, actualData.length); return format.deserialize(actualData); From bdecb5e4458364bfbaf76103a41ea5cebce76cc2 Mon Sep 17 00:00:00 2001 From: mariano Date: Wed, 1 Apr 2026 07:41:35 +0900 Subject: [PATCH 10/14] refactor: extract retry, typed record processing, and error handling into helper methods in KPipeConsumer --- .../org/kpipe/consumer/KPipeConsumer.java | 94 +++++++++++-------- .../java/org/kpipe/registry/AvroFormat.java | 4 +- 2 files changed, 57 insertions(+), 41 deletions(-) diff --git a/lib/src/main/java/org/kpipe/consumer/KPipeConsumer.java b/lib/src/main/java/org/kpipe/consumer/KPipeConsumer.java index 6852e35..7210b22 100644 --- a/lib/src/main/java/org/kpipe/consumer/KPipeConsumer.java +++ b/lib/src/main/java/org/kpipe/consumer/KPipeConsumer.java @@ -801,42 +801,17 @@ protected void processRecord(final ConsumerRecord record) { private boolean tryProcessRecord(final ConsumerRecord record) { for (int attempt = 0; attempt <= maxRetries; attempt++) { if (attempt > 0) { - if (enableMetrics) metrics.get(METRIC_RETRIES).incrementAndGet(); - LOGGER.log( - Level.INFO, - "Retrying message at offset %d (attempt %d of %d)".formatted(record.offset(), attempt, maxRetries) - ); - - try { - Thread.sleep(retryBackoff.toMillis()); - } catch (final InterruptedException ie) { - Thread.currentThread().interrupt(); - return false; - } + if (!handleRetry(record, attempt)) return false; } try { if (processor instanceof MessagePipeline typedPipeline) { - final var recordValue = (byte[]) record.value(); - final var deserialized = typedPipeline.deserialize(recordValue); - if (deserialized == null) return false; - - final var processed = typedPipeline.process(deserialized); - if (processed == null) return false; - - final var sink = typedPipeline.getSink(); - // Call configured sink for typed object - if (sink != null) sink.accept(processed); - // Fallback to consumer sink - else messageSink.accept((V) processed); - - if (offsetManager != null) commandQueue.offer(new ConsumerCommand.MarkOffsetProcessed(record)); - return true; + return processTypedRecord(record, typedPipeline); } final var processedValue = processor.apply(record.value()); messageSink.accept(processedValue); - if (offsetManager != null) commandQueue.offer(new ConsumerCommand.MarkOffsetProcessed(record)); + markOffsetProcessed(record); return true; } catch (final Exception e) { if (isInterruptionRelated(e)) { @@ -845,18 +820,7 @@ private boolean tryProcessRecord(final ConsumerRecord record) { } if (attempt == maxRetries) { - if (enableMetrics) metrics.get(METRIC_PROCESSING_ERRORS).incrementAndGet(); - LOGGER.log( - Level.WARNING, - "Failed to process message at offset %d after %d attempts: %s".formatted( - record.offset(), - maxRetries + 1, - e.getMessage() - ), - e - ); - errorHandler.accept(new ProcessingError<>(record, e, maxRetries)); - if (offsetManager != null) commandQueue.offer(new ConsumerCommand.MarkOffsetProcessed(record)); + handleProcessingError(record, e); return false; } } @@ -864,6 +828,56 @@ private boolean tryProcessRecord(final ConsumerRecord record) { return false; } + private boolean handleRetry(final ConsumerRecord record, final int attempt) { + if (enableMetrics) metrics.get(METRIC_RETRIES).incrementAndGet(); + LOGGER.log( + Level.INFO, + "Retrying message at offset {0} (attempt {1} of {2})", + new Object[] { record.offset(), attempt, maxRetries } + ); + + try { + Thread.sleep(retryBackoff.toMillis()); + return true; + } catch (final InterruptedException ie) { + Thread.currentThread().interrupt(); + return false; + } + } + + @SuppressWarnings("unchecked") + private boolean processTypedRecord(final ConsumerRecord record, final MessagePipeline typedPipeline) { + final var recordValue = (byte[]) record.value(); + final var deserialized = typedPipeline.deserialize(recordValue); + if (deserialized == null) return false; + + final var processed = typedPipeline.process(deserialized); + if (processed == null) return false; + + final var sink = typedPipeline.getSink(); + if (sink != null) sink.accept(processed); + else messageSink.accept((V) processed); + + markOffsetProcessed(record); + return true; + } + + private void markOffsetProcessed(final ConsumerRecord record) { + if (offsetManager != null) { + commandQueue.offer(new ConsumerCommand.MarkOffsetProcessed(record)); + } + } + + private void handleProcessingError(final ConsumerRecord record, final Exception e) { + if (enableMetrics) metrics.get(METRIC_PROCESSING_ERRORS).incrementAndGet(); + LOGGER.log( + Level.WARNING, + "Failed to process message at offset {0} after {1} attempts: {2}", + record.offset(), maxRetries + 1, e.getMessage()); + errorHandler.accept(new ProcessingError<>(record, e, maxRetries)); + markOffsetProcessed(record); + } + private void checkBackpressure() { if (backpressureController == null) return; diff --git a/lib/src/main/java/org/kpipe/registry/AvroFormat.java b/lib/src/main/java/org/kpipe/registry/AvroFormat.java index 7a1147b..f9b3a0b 100644 --- a/lib/src/main/java/org/kpipe/registry/AvroFormat.java +++ b/lib/src/main/java/org/kpipe/registry/AvroFormat.java @@ -123,7 +123,9 @@ public byte[] serialize(final GenericRecord data) { @Override public GenericRecord deserialize(final byte[] data) { if (data == null || data.length == 0) return null; - if (defaultSchemaKey == null) throw new UnsupportedOperationException("Avro deserialization requires a default schema key. Use withDefaultSchema()."); + if (defaultSchemaKey == null) throw new UnsupportedOperationException( + "Avro deserialization requires a default schema key. Use withDefaultSchema()." + ); final var schema = AvroMessageProcessor.getSchema(defaultSchemaKey); if (schema == null) throw new IllegalArgumentException("No schema found for key: %s".formatted(defaultSchemaKey)); return AvroMessageProcessor.inScopedCaches(() -> { From 362503ae622b44a7d6dbfea701c8ba6346c3159c Mon Sep 17 00:00:00 2001 From: mariano Date: Wed, 1 Apr 2026 07:58:26 +0900 Subject: [PATCH 11/14] refactor: simplify consumer state transitions, logging, and metric initialization in KPipeConsumer --- .../org/kpipe/consumer/KPipeConsumer.java | 221 ++++++------------ 1 file changed, 72 insertions(+), 149 deletions(-) diff --git a/lib/src/main/java/org/kpipe/consumer/KPipeConsumer.java b/lib/src/main/java/org/kpipe/consumer/KPipeConsumer.java index 7210b22..eb26557 100644 --- a/lib/src/main/java/org/kpipe/consumer/KPipeConsumer.java +++ b/lib/src/main/java/org/kpipe/consumer/KPipeConsumer.java @@ -134,19 +134,14 @@ private Builder() {} private String topic; private Function processor; private Duration pollTimeout = Duration.ofMillis(100); - private java.util.function.Consumer> errorHandler = error -> { + private java.util.function.Consumer> errorHandler = e -> LOGGER.log( Level.WARNING, - "Processing failed for record (topic=%s, partition=%d, offset=%d) after %d retries: %s".formatted( - error.record().topic(), - error.record().partition(), - error.record().offset(), - error.retryCount(), - error.exception().getMessage() - ), - error.exception() + "Failed at offset {0} after {1} retries: {2}", + e.record().offset(), + e.retryCount(), + e.exception().getMessage() ); - }; private int maxRetries = 0; private Duration retryBackoff = Duration.ofMillis(500); private boolean enableMetrics = true; @@ -406,13 +401,22 @@ public KPipeConsumer(final Builder builder) { : null; if (enableMetrics) { - metrics.put(METRIC_MESSAGES_RECEIVED, new AtomicLong(0)); - metrics.put(METRIC_MESSAGES_PROCESSED, new AtomicLong(0)); - metrics.put(METRIC_PROCESSING_ERRORS, new AtomicLong(0)); - metrics.put(METRIC_RETRIES, new AtomicLong(0)); + metrics.putAll( + Map.of( + METRIC_MESSAGES_RECEIVED, + new AtomicLong(0), + METRIC_MESSAGES_PROCESSED, + new AtomicLong(0), + METRIC_PROCESSING_ERRORS, + new AtomicLong(0), + METRIC_RETRIES, + new AtomicLong(0) + ) + ); if (backpressureController != null) { - metrics.put(METRIC_BACKPRESSURE_PAUSE_COUNT, new AtomicLong(0)); - metrics.put(METRIC_BACKPRESSURE_TIME_MS, new AtomicLong(0)); + metrics.putAll( + Map.of(METRIC_BACKPRESSURE_PAUSE_COUNT, new AtomicLong(0), METRIC_BACKPRESSURE_TIME_MS, new AtomicLong(0)) + ); } } } @@ -443,47 +447,33 @@ public MessageTracker createMessageTracker() { /// closed public void start() { if (state.get() == ConsumerState.CLOSED) throw new IllegalStateException("Cannot restart a closed consumer"); - - if (!state.compareAndSet(ConsumerState.CREATED, ConsumerState.RUNNING)) { - LOGGER.log(Level.WARNING, "Consumer already running for topic {0}", topic); - return; - } + if (!state.compareAndSet(ConsumerState.CREATED, ConsumerState.RUNNING)) return; if (offsetManager != null) offsetManager.start(); - if (rebalanceListener != null) kafkaConsumer.subscribe(List.of(topic), rebalanceListener); else kafkaConsumer.subscribe(List.of(topic)); - Thread.UncaughtExceptionHandler exceptionHandler = (thread, throwable) -> { - LOGGER.log(Level.ERROR, "Uncaught exception in consumer thread: %s".formatted(thread.getName()), throwable); - state.set(ConsumerState.CLOSING); - }; - final var thread = Thread.ofVirtual() .name("kafka-consumer-%s-%s".formatted(topic, UUID.randomUUID().toString().substring(0, 8))) - .uncaughtExceptionHandler(exceptionHandler) + .uncaughtExceptionHandler((t, e) -> { + LOGGER.log(Level.ERROR, "Uncaught exception in thread {0}", t.getName(), e); + state.set(ConsumerState.CLOSING); + }) .start(() -> { try { while (isRunning()) { processCommands(); checkBackpressure(); - if (!isRunning()) break; - - if (isPaused()) { - Thread.sleep(100); - continue; - } + if (isPaused()) { Thread.sleep(100); continue; } final var records = pollRecords(); if (records != null && !records.isEmpty()) processRecords(records); } } catch (final InterruptedException e) { Thread.currentThread().interrupt(); - LOGGER.log(Level.INFO, "Consumer thread interrupted for topic {0}", topic); } catch (final Exception e) { - LOGGER.log(Level.WARNING, "Error in consumer thread", e); - throw e; + if (isRunning()) LOGGER.log(Level.WARNING, "Error in consumer thread", e); } finally { try { kafkaConsumer.close(); @@ -504,22 +494,9 @@ public void start() { /// ///

This method is idempotent - calling it multiple times has no additional effect. public void pause() { - final var currentState = state.get(); - - // Don't send a pause command if already paused or closed - if (currentState == ConsumerState.PAUSED || currentState == ConsumerState.CLOSED) { - LOGGER.log(Level.INFO, "Consumer already paused or closed for topic {0}", topic); - return; - } - - // Always add command to the queue for proper test verification + if (state.get() == ConsumerState.PAUSED || state.get() == ConsumerState.CLOSED) return; commandQueue.offer(new ConsumerCommand.Pause()); - LOGGER.log(Level.INFO, "Consumer pause requested for topic {0}", topic); - - // Update state if not closed or closing - if (currentState != ConsumerState.CLOSING) { - state.set(ConsumerState.PAUSED); - } + if (state.get() != ConsumerState.CLOSING) state.set(ConsumerState.PAUSED); } /// Processes pending commands from the command queue. @@ -588,22 +565,10 @@ final var record = (ConsumerRecord) cmd.record(); /// /// @throws IllegalStateException if the consumer has been closed public void resume() { - final var currentState = state.get(); - - if (currentState == ConsumerState.CLOSED) throw new IllegalStateException("Cannot resume a closed consumer"); - - // Don't send a resume command if already running - if (currentState == ConsumerState.RUNNING) { - LOGGER.log(Level.INFO, "Consumer already running for topic {0}", topic); - return; - } - - // Always add command to the queue for proper test verification + if (state.get() == ConsumerState.CLOSED) throw new IllegalStateException("Cannot resume a closed consumer"); + if (state.get() == ConsumerState.RUNNING) return; commandQueue.offer(new ConsumerCommand.Resume()); - LOGGER.log(Level.INFO, "Consumer resume requested for topic {0}", topic); - - // Update state if not closing or closed - if (currentState != ConsumerState.CLOSING) state.set(ConsumerState.RUNNING); + if (state.get() != ConsumerState.CLOSING) state.set(ConsumerState.RUNNING); } /// Returns whether the consumer is currently paused. @@ -660,78 +625,54 @@ public boolean isRunning() { ///

This method is idempotent - calling it multiple times has no additional effect. @Override public void close() { - // Only proceed if not already closed or closing if ( !state.compareAndSet(ConsumerState.RUNNING, ConsumerState.CLOSING) && !state.compareAndSet(ConsumerState.PAUSED, ConsumerState.CLOSING) - ) { - return; // Already closed or closing - } + ) return; - final var waitForMessagesMs = waitForMessagesTimeout.toMillis(); - final var tracker = (waitForMessagesMs > 0 && enableMetrics) ? createMessageTracker() : null; - - // Signal shutdown + final var tracker = (waitForMessagesTimeout.toMillis() > 0 && enableMetrics) ? createMessageTracker() : null; pause(); commandQueue.offer(new ConsumerCommand.Close()); - // Wait for in-flight messages - Optional.ofNullable(tracker).ifPresent(t -> { + if (tracker != null && tracker.getInFlightMessageCount() > 0) { + LOGGER.log(Level.INFO, "Waiting for {0} in-flight messages to complete", tracker.getInFlightMessageCount()); + tracker.waitForCompletion(waitForMessagesTimeout.toMillis()); + } + + if (kafkaConsumer != null) { try { - final var inFlight = t.getInFlightMessageCount(); - if (inFlight > 0) { - LOGGER.log(Level.INFO, "Waiting for %d in-flight messages to complete".formatted(inFlight)); - t.waitForCompletion(waitForMessagesMs); - } - } catch (final Exception e) { - LOGGER.log(Level.WARNING, "Error waiting for in-flight messages", e); + kafkaConsumer.wakeup(); + } catch (Exception e) { + LOGGER.log(Level.WARNING, "Error during wakeup", e); } - }); + } - // Wake up consumer and wait for thread termination - Optional.ofNullable(kafkaConsumer).ifPresent(consumer -> { + final var thread = consumerThread.get(); + if (thread != null && thread.isAlive()) { try { - consumer.wakeup(); - } catch (final Exception e) { - LOGGER.log(Level.WARNING, "Error during consumer wakeup", e); + thread.join(threadTerminationTimeout.toMillis()); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); } - }); - - Optional.ofNullable(consumerThread.get()) - .filter(Thread::isAlive) - .ifPresent(thread -> { - try { - thread.join(threadTerminationTimeout.toMillis()); - } catch (final InterruptedException e) { - Thread.currentThread().interrupt(); - LOGGER.log(Level.WARNING, "Interrupted while waiting for consumer thread"); - } - }); + } - // Shutdown executor try { virtualThreadExecutor.shutdown(); if (!virtualThreadExecutor.awaitTermination(executorTerminationTimeout.toMillis(), TimeUnit.MILLISECONDS)) { - LOGGER.log(Level.WARNING, "Not all processing tasks completed during shutdown"); - final var pending = virtualThreadExecutor.shutdownNow(); - LOGGER.log(Level.WARNING, "%d tasks were not processed".formatted(pending.size())); + LOGGER.log(Level.WARNING, "{0} tasks not processed", virtualThreadExecutor.shutdownNow().size()); } - } catch (final InterruptedException e) { + } catch (InterruptedException e) { Thread.currentThread().interrupt(); virtualThreadExecutor.shutdownNow(); - LOGGER.log(Level.WARNING, "Interrupted while waiting for executor termination"); } - // Shutdown offset manager if enabled if (offsetManager != null) { try { offsetManager.close(); - } catch (final Exception e) { + } catch (Exception e) { LOGGER.log(Level.WARNING, "Error closing offset manager", e); } } - - // Ensure the state is set to CLOSED state.set(ConsumerState.CLOSED); } @@ -739,28 +680,19 @@ public void close() { /// /// @param records the batch of records to process protected void processRecords(final ConsumerRecords records) { - if (sequentialProcessing) { - // Process sequentially for cases where order matters - for (final var record : records.records(topic)) { - // Track offset before processing - if (offsetManager != null) commandQueue.offer(new ConsumerCommand.TrackOffset(record)); - inFlightCount.incrementAndGet(); + for (final var record : records.records(topic)) { + if (offsetManager != null) commandQueue.offer(new ConsumerCommand.TrackOffset(record)); + inFlightCount.incrementAndGet(); + + if (sequentialProcessing) { processRecord(record); - } - } else { - // Process in parallel using virtual threads - final var topicRecords = records.records(topic); - for (final var record : topicRecords) { - // Track offset before submitting to virtual thread - if (offsetManager != null) commandQueue.offer(new ConsumerCommand.TrackOffset(record)); - inFlightCount.incrementAndGet(); + } else { try { virtualThreadExecutor.submit(() -> processRecord(record)); } catch (final RejectedExecutionException e) { - // Handle task rejection (typically during shutdown) inFlightCount.decrementAndGet(); if (isRunning()) { - LOGGER.log(Level.WARNING, "Task submission rejected, likely during shutdown", e); + LOGGER.log(Level.WARNING, "Task submission rejected during shutdown", e); if (enableMetrics) metrics.get(METRIC_PROCESSING_ERRORS).incrementAndGet(); errorHandler.accept(new ProcessingError<>(record, e, 0)); } @@ -806,7 +738,7 @@ private boolean tryProcessRecord(final ConsumerRecord record) { try { if (processor instanceof MessagePipeline typedPipeline) { - return processTypedRecord(record, typedPipeline); + return processTypedRecord(record, typedPipeline); } final var processedValue = processor.apply(record.value()); @@ -873,7 +805,10 @@ private void handleProcessingError(final ConsumerRecord record, final Exce LOGGER.log( Level.WARNING, "Failed to process message at offset {0} after {1} attempts: {2}", - record.offset(), maxRetries + 1, e.getMessage()); + record.offset(), + maxRetries + 1, + e.getMessage() + ); errorHandler.accept(new ProcessingError<>(record, e, maxRetries)); markOffsetProcessed(record); } @@ -883,33 +818,21 @@ private void checkBackpressure() { switch (backpressureController.check(kafkaConsumer, isPaused())) { case PAUSE -> { - final long currentValue = backpressureController.getMetric(kafkaConsumer); + final long value = backpressureController.getMetric(kafkaConsumer); LOGGER.log( Level.WARNING, - "Backpressure triggered: pausing consumer (%s=%d, highWatermark=%d) for topic %s".formatted( - backpressureController.getMetricName(), - currentValue, - backpressureController.highWatermark(), - topic - ) + "Backpressure triggered: pausing consumer ({0}={1})", + backpressureController.getMetricName(), + value ); if (enableMetrics) metrics.get(METRIC_BACKPRESSURE_PAUSE_COUNT).incrementAndGet(); backpressurePauseStartTime = System.currentTimeMillis(); pause(); } case RESUME -> { - final long currentValue = backpressureController.getMetric(kafkaConsumer); - final long pauseDurationMs = System.currentTimeMillis() - backpressurePauseStartTime; - if (enableMetrics) metrics.get(METRIC_BACKPRESSURE_TIME_MS).addAndGet(pauseDurationMs); - LOGGER.log( - Level.INFO, - "Backpressure resolved: resuming consumer (paused for %d ms, %s=%d) for topic %s".formatted( - pauseDurationMs, - backpressureController.getMetricName(), - currentValue, - topic - ) - ); + final long duration = System.currentTimeMillis() - backpressurePauseStartTime; + if (enableMetrics) metrics.get(METRIC_BACKPRESSURE_TIME_MS).addAndGet(duration); + LOGGER.log(Level.INFO, "Backpressure resolved: resuming consumer (paused for {0} ms)", duration); resume(); } case NONE -> { From 92fddb3ea81985e5554872dd168fed2ca28af89c Mon Sep 17 00:00:00 2001 From: mariano Date: Wed, 1 Apr 2026 08:13:12 +0900 Subject: [PATCH 12/14] refactor: remove unused constants and streamline control flow in message processors and consumer --- lib/src/main/java/org/kpipe/consumer/KPipeConsumer.java | 6 ++++-- .../java/org/kpipe/processor/AvroMessageProcessor.java | 1 - .../java/org/kpipe/processor/JsonMessageProcessor.java | 7 ------- 3 files changed, 4 insertions(+), 10 deletions(-) diff --git a/lib/src/main/java/org/kpipe/consumer/KPipeConsumer.java b/lib/src/main/java/org/kpipe/consumer/KPipeConsumer.java index eb26557..74f8ee6 100644 --- a/lib/src/main/java/org/kpipe/consumer/KPipeConsumer.java +++ b/lib/src/main/java/org/kpipe/consumer/KPipeConsumer.java @@ -71,7 +71,6 @@ public class KPipeConsumer implements AutoCloseable { private static final Logger LOGGER = System.getLogger(KPipeConsumer.class.getName()); - // Metric key constants private static final String METRIC_MESSAGES_RECEIVED = "messagesReceived"; private static final String METRIC_MESSAGES_PROCESSED = "messagesProcessed"; private static final String METRIC_PROCESSING_ERRORS = "processingErrors"; @@ -465,7 +464,10 @@ public void start() { processCommands(); checkBackpressure(); if (!isRunning()) break; - if (isPaused()) { Thread.sleep(100); continue; } + if (isPaused()) { + Thread.sleep(100); + continue; + } final var records = pollRecords(); if (records != null && !records.isEmpty()) processRecords(records); diff --git a/lib/src/main/java/org/kpipe/processor/AvroMessageProcessor.java b/lib/src/main/java/org/kpipe/processor/AvroMessageProcessor.java index 3d43582..acbd7b0 100644 --- a/lib/src/main/java/org/kpipe/processor/AvroMessageProcessor.java +++ b/lib/src/main/java/org/kpipe/processor/AvroMessageProcessor.java @@ -47,7 +47,6 @@ public class AvroMessageProcessor { private AvroMessageProcessor() {} private static final Logger LOGGER = System.getLogger(AvroMessageProcessor.class.getName()); - private static final byte[] EMPTY_AVRO = new byte[0]; private static final ConcurrentHashMap SCHEMA_REGISTRY = new ConcurrentHashMap<>(); private static final ScopedValue SCHEMA_PARSER = ScopedValue.newInstance(); diff --git a/lib/src/main/java/org/kpipe/processor/JsonMessageProcessor.java b/lib/src/main/java/org/kpipe/processor/JsonMessageProcessor.java index 1abce36..b198878 100644 --- a/lib/src/main/java/org/kpipe/processor/JsonMessageProcessor.java +++ b/lib/src/main/java/org/kpipe/processor/JsonMessageProcessor.java @@ -1,9 +1,6 @@ package org.kpipe.processor; -import com.dslplatform.json.DslJson; import java.io.ByteArrayOutputStream; -import java.lang.System.Logger; -import java.nio.charset.StandardCharsets; import java.util.Map; import java.util.function.Function; import java.util.function.UnaryOperator; @@ -27,10 +24,6 @@ public class JsonMessageProcessor { private JsonMessageProcessor() {} - - private static final Logger LOGGER = System.getLogger(JsonMessageProcessor.class.getName()); - private static final DslJson> DSL_JSON = new DslJson<>(); - private static final byte[] EMPTY_JSON = "{}".getBytes(StandardCharsets.UTF_8); private static final ScopedValue OUTPUT_STREAM_CACHE = ScopedValue.newInstance(); /// Creates an operator that adds a field with specified key and value to a JSON map. From 703eea5eb113bd641efe08ce8f67f4e9edc260c6 Mon Sep 17 00:00:00 2001 From: mariano Date: Wed, 1 Apr 2026 08:23:02 +0900 Subject: [PATCH 13/14] chore: add module-info.java and configure build for Java module system compatibility --- lib/build.gradle.kts | 15 +++++++++++++++ lib/src/main/java/module-info.java | 18 ++++++++++++++++++ 2 files changed, 33 insertions(+) create mode 100644 lib/src/main/java/module-info.java diff --git a/lib/build.gradle.kts b/lib/build.gradle.kts index 60d77eb..7b981f3 100644 --- a/lib/build.gradle.kts +++ b/lib/build.gradle.kts @@ -78,6 +78,21 @@ tasks.test { forkEvery = 200 } +tasks.compileJava { + doFirst { + options.compilerArgs.addAll(listOf("--module-path", classpath.asPath)) + classpath = files() + } +} + +tasks.javadoc { + doFirst { + val javadocOptions = options as StandardJavadocDocletOptions + javadocOptions.addMultilineStringsOption("-module-path").value = listOf(classpath.asPath) + classpath = files() + } +} + tasks.jacocoTestReport { reports { csv.required.set(true) diff --git a/lib/src/main/java/module-info.java b/lib/src/main/java/module-info.java new file mode 100644 index 0000000..98db415 --- /dev/null +++ b/lib/src/main/java/module-info.java @@ -0,0 +1,18 @@ +module org.kpipe { + requires com.fasterxml.jackson.core; + requires java.net.http; + requires jdk.httpserver; + requires org.apache.avro; + requires org.slf4j; + requires dsl.json; + requires kafka.clients; + + exports org.kpipe.config; + exports org.kpipe.consumer; + exports org.kpipe.consumer.enums; + exports org.kpipe.health; + exports org.kpipe.metrics; + exports org.kpipe.processor; + exports org.kpipe.registry; + exports org.kpipe.sink; +} From 2f90d89ca799531a1465d61884227eba2570e2f4 Mon Sep 17 00:00:00 2001 From: mariano Date: Wed, 1 Apr 2026 08:28:19 +0900 Subject: [PATCH 14/14] refactor: rename ConsumerRunner to KPipeRunner and update references across codebase --- app/avro/src/main/java/org/kpipe/App.java | 14 +- app/json/src/main/java/org/kpipe/App.java | 14 +- app/protobuf/src/main/java/org/kpipe/App.java | 10 +- .../{ConsumerRunner.java => KPipeRunner.java} | 84 ++++++------ .../kpipe/processor/JsonMessageProcessor.java | 1 + .../kpipe/consumer/ConsumerRunnerTest.java | 128 ++++++++---------- 6 files changed, 113 insertions(+), 138 deletions(-) rename lib/src/main/java/org/kpipe/consumer/{ConsumerRunner.java => KPipeRunner.java} (88%) diff --git a/app/avro/src/main/java/org/kpipe/App.java b/app/avro/src/main/java/org/kpipe/App.java index c9562bc..0337986 100644 --- a/app/avro/src/main/java/org/kpipe/App.java +++ b/app/avro/src/main/java/org/kpipe/App.java @@ -12,11 +12,7 @@ import org.apache.kafka.clients.consumer.Consumer; import org.kpipe.config.AppConfig; import org.kpipe.config.KafkaConsumerConfig; -import org.kpipe.consumer.ConsumerCommand; -import org.kpipe.consumer.ConsumerRunner; -import org.kpipe.consumer.KPipeConsumer; -import org.kpipe.consumer.KafkaOffsetManager; -import org.kpipe.consumer.OffsetManager; +import org.kpipe.consumer.*; import org.kpipe.health.HttpHealthServer; import org.kpipe.metrics.ConsumerMetricsReporter; import org.kpipe.metrics.MetricsReporter; @@ -36,7 +32,7 @@ public class App implements AutoCloseable { private static final String DEFAULT_SCHEMA_REGISTRY_URL = "http://schema-registry:8081"; private final KPipeConsumer functionalConsumer; - private final ConsumerRunner> runner; + private final KPipeRunner> runner; private final HttpHealthServer healthServer; private final AtomicReference> currentMetrics = new AtomicReference<>(); private final MessageProcessorRegistry processorRegistry; @@ -93,19 +89,19 @@ private static String resolveSchemaRegistryUrl() { } /// Creates the consumer runner with appropriate lifecycle hooks. - private ConsumerRunner> createConsumerRunner( + private KPipeRunner> createConsumerRunner( final AppConfig config, final MetricsReporter consumerMetricsReporter, final MetricsReporter processorMetricsReporter, final MetricsReporter sinkMetricsReporter ) { - return ConsumerRunner.builder(functionalConsumer) + return KPipeRunner.builder(functionalConsumer) .withStartAction(c -> { c.start(); LOGGER.log(Level.INFO, "Kafka consumer application started successfully"); }) .withHealthCheck(KPipeConsumer::isRunning) - .withGracefulShutdown(ConsumerRunner::performGracefulConsumerShutdown) + .withGracefulShutdown(KPipeRunner::performGracefulConsumerShutdown) .withMetricsReporters(List.of(consumerMetricsReporter, processorMetricsReporter, sinkMetricsReporter)) .withMetricsInterval(config.metricsInterval().toMillis()) .withShutdownTimeout(config.shutdownTimeout().toMillis()) diff --git a/app/json/src/main/java/org/kpipe/App.java b/app/json/src/main/java/org/kpipe/App.java index 59412c0..cbeeabc 100644 --- a/app/json/src/main/java/org/kpipe/App.java +++ b/app/json/src/main/java/org/kpipe/App.java @@ -13,11 +13,7 @@ import org.apache.kafka.clients.consumer.Consumer; import org.kpipe.config.AppConfig; import org.kpipe.config.KafkaConsumerConfig; -import org.kpipe.consumer.ConsumerCommand; -import org.kpipe.consumer.ConsumerRunner; -import org.kpipe.consumer.KPipeConsumer; -import org.kpipe.consumer.KafkaOffsetManager; -import org.kpipe.consumer.OffsetManager; +import org.kpipe.consumer.*; import org.kpipe.health.HttpHealthServer; import org.kpipe.metrics.ConsumerMetricsReporter; import org.kpipe.metrics.MetricsReporter; @@ -36,7 +32,7 @@ public class App implements AutoCloseable { private final AtomicLong startTime = new AtomicLong(System.currentTimeMillis()); private final KPipeConsumer kpipeConsumer; - private final ConsumerRunner> runner; + private final KPipeRunner> runner; private final HttpHealthServer healthServer; private final AtomicReference> currentMetrics = new AtomicReference<>(); private final MessageProcessorRegistry processorRegistry; @@ -80,18 +76,18 @@ public App(final AppConfig config) { } /// Creates the consumer runner with appropriate lifecycle hooks. - private ConsumerRunner> createConsumerRunner( + private KPipeRunner> createConsumerRunner( final AppConfig config, final MetricsReporter consumerMetricsReporter, final MetricsReporter processorMetricsReporter ) { - return ConsumerRunner.builder(kpipeConsumer) + return KPipeRunner.builder(kpipeConsumer) .withStartAction(c -> { c.start(); LOGGER.log(Level.INFO, "Kafka consumer application started successfully"); }) .withHealthCheck(KPipeConsumer::isRunning) - .withGracefulShutdown(ConsumerRunner::performGracefulConsumerShutdown) + .withGracefulShutdown(KPipeRunner::performGracefulConsumerShutdown) .withMetricsReporters(List.of(consumerMetricsReporter, processorMetricsReporter)) .withMetricsInterval(config.metricsInterval().toMillis()) .withShutdownTimeout(config.shutdownTimeout().toMillis()) diff --git a/app/protobuf/src/main/java/org/kpipe/App.java b/app/protobuf/src/main/java/org/kpipe/App.java index 37ba553..764c2b0 100644 --- a/app/protobuf/src/main/java/org/kpipe/App.java +++ b/app/protobuf/src/main/java/org/kpipe/App.java @@ -15,7 +15,7 @@ import org.kpipe.config.AppConfig; import org.kpipe.config.KafkaConsumerConfig; import org.kpipe.consumer.ConsumerCommand; -import org.kpipe.consumer.ConsumerRunner; +import org.kpipe.consumer.KPipeRunner; import org.kpipe.consumer.KPipeConsumer; import org.kpipe.consumer.KafkaOffsetManager; import org.kpipe.consumer.OffsetManager; @@ -35,7 +35,7 @@ public class App implements AutoCloseable { private final AtomicLong startTime = new AtomicLong(System.currentTimeMillis()); private final KPipeConsumer kpipeConsumer; - private final ConsumerRunner> runner; + private final KPipeRunner> runner; private final HttpHealthServer healthServer; private final AtomicReference> currentMetrics = new AtomicReference<>(); private final MessageProcessorRegistry processorRegistry; @@ -77,18 +77,18 @@ public App(final AppConfig config) { } /// Creates the consumer runner with appropriate lifecycle hooks. - private ConsumerRunner> createConsumerRunner( + private KPipeRunner> createConsumerRunner( final AppConfig config, final MetricsReporter consumerMetricsReporter, final MetricsReporter processorMetricsReporter ) { - return ConsumerRunner.builder(kpipeConsumer) + return KPipeRunner.builder(kpipeConsumer) .withStartAction(c -> { c.start(); LOGGER.log(Level.INFO, "Kafka consumer application started successfully"); }) .withHealthCheck(KPipeConsumer::isRunning) - .withGracefulShutdown(ConsumerRunner::performGracefulConsumerShutdown) + .withGracefulShutdown(KPipeRunner::performGracefulConsumerShutdown) .withMetricsReporters(List.of(consumerMetricsReporter, processorMetricsReporter)) .withMetricsInterval(config.metricsInterval().toMillis()) .withShutdownTimeout(config.shutdownTimeout().toMillis()) diff --git a/lib/src/main/java/org/kpipe/consumer/ConsumerRunner.java b/lib/src/main/java/org/kpipe/consumer/KPipeRunner.java similarity index 88% rename from lib/src/main/java/org/kpipe/consumer/ConsumerRunner.java rename to lib/src/main/java/org/kpipe/consumer/KPipeRunner.java index 0c20448..663e2a6 100644 --- a/lib/src/main/java/org/kpipe/consumer/ConsumerRunner.java +++ b/lib/src/main/java/org/kpipe/consumer/KPipeRunner.java @@ -18,7 +18,7 @@ /// A thread-safe runner for {@link KPipeConsumer} instances that manages the consumer /// lifecycle. /// -/// The ConsumerRunner provides: +/// The KPipeRunner provides: /// /// * Controlled startup and shutdown /// * Health monitoring @@ -33,7 +33,7 @@ /// .withProcessor(message -> processMessage(message)) /// .build(); /// -/// final var runner = ConsumerRunner.builder(consumer) +/// final var runner = KPipeRunner.builder(consumer) /// .withHealthCheck(KPipeConsumer::isRunning) /// .withShutdownHook(true) /// .withShutdownTimeout(5000) @@ -44,9 +44,9 @@ /// ``` /// /// @param the type of consumer being managed, must extend KPipeConsumer -public class ConsumerRunner> implements AutoCloseable { +public class KPipeRunner> implements AutoCloseable { - private static final Logger LOGGER = System.getLogger(ConsumerRunner.class.getName()); + private static final Logger LOGGER = System.getLogger(KPipeRunner.class.getName()); // Consumer state private final T consumer; @@ -65,7 +65,7 @@ public class ConsumerRunner> implements AutoClosea private final long metricsInterval; private volatile Thread metricsThread; - private ConsumerRunner(final Builder builder) { + private KPipeRunner(final Builder builder) { this.consumer = builder.consumer; this.startAction = builder.startAction; this.healthCheck = builder.healthCheck; @@ -77,7 +77,7 @@ private ConsumerRunner(final Builder builder) { if (builder.useShutdownHook) Runtime.getRuntime().addShutdownHook(new Thread(this::close)); } - /// Creates a new builder for configuring a ConsumerRunner. + /// Creates a new builder for configuring a KPipeRunner. /// /// @param the type of consumer to run /// @param consumer the consumer instance to manage @@ -209,8 +209,7 @@ public void close() { // Pause the consumer first to prevent receiving new messages consumer.pause(); - return Optional - .ofNullable(consumer.createMessageTracker()) + return Optional.ofNullable(consumer.createMessageTracker()) .map(tracker -> { try { // First check for in-flight messages @@ -236,7 +235,8 @@ public void close() { inFlightCount = tracker.getInFlightMessageCount(); final var allProcessed = completed && inFlightCount == 0; - if (allProcessed) LOGGER.log(Level.INFO, "All in-flight messages processed, shutting down"); else LOGGER.log( + if (allProcessed) LOGGER.log(Level.INFO, "All in-flight messages processed, shutting down"); + else LOGGER.log( Level.WARNING, "Shutdown timeout reached with %s messages still in flight".formatted(inFlightCount) ); @@ -274,43 +274,39 @@ private void startMetricsThread() { // Predicate to check if the thread should continue running final Predicate shouldContinue = thread -> !closed.get() && !thread.isInterrupted(); - metricsThread = - Thread - .ofPlatform() - .name("metrics-reporter") - .daemon(true) - .start(() -> { - final var currentThread = Thread.currentThread(); - while (shouldContinue.test(currentThread)) { - try { - reportAllMetrics.run(); - Thread.sleep(metricsInterval); - } catch (final InterruptedException e) { - Thread.currentThread().interrupt(); - break; - } + metricsThread = Thread.ofPlatform() + .name("metrics-reporter") + .daemon(true) + .start(() -> { + final var currentThread = Thread.currentThread(); + while (shouldContinue.test(currentThread)) { + try { + reportAllMetrics.run(); + Thread.sleep(metricsInterval); + } catch (final InterruptedException e) { + Thread.currentThread().interrupt(); + break; } - }); + } + }); } /// Stops the metrics reporting thread if it's running. private void stopMetricsThread() { - Optional - .ofNullable(metricsThread) - .ifPresent(thread -> { - thread.interrupt(); - try { - thread.join(1_000); // Wait up to 1 second for the thread to terminate - } catch (final InterruptedException e) { - Thread.currentThread().interrupt(); - LOGGER.log(Level.WARNING, "Interrupted while stopping metrics thread"); - } finally { - metricsThread = null; - } - }); + Optional.ofNullable(metricsThread).ifPresent(thread -> { + thread.interrupt(); + try { + thread.join(1_000); // Wait up to 1 second for the thread to terminate + } catch (final InterruptedException e) { + Thread.currentThread().interrupt(); + LOGGER.log(Level.WARNING, "Interrupted while stopping metrics thread"); + } finally { + metricsThread = null; + } + }); } - /// Builder for creating ConsumerRunner instances with custom configuration. + /// Builder for creating KPipeRunner instances with custom configuration. /// /// @param the type of consumer being managed public static class Builder> { @@ -328,7 +324,7 @@ private Builder(final T consumer) { this.consumer = consumer; this.startAction = T::start; this.healthCheck = _ -> true; - this.gracefulShutdown = ConsumerRunner::performGracefulConsumerShutdown; + this.gracefulShutdown = KPipeRunner::performGracefulConsumerShutdown; } /// Sets a custom action to perform when starting the consumer. @@ -404,11 +400,11 @@ public Builder with(final Function, Builder> configurer) { return configurer.apply(this); } - /// Builds a new ConsumerRunner with the configured settings. + /// Builds a new KPipeRunner with the configured settings. /// - /// @return a new ConsumerRunner instance - public ConsumerRunner build() { - return new ConsumerRunner<>(this); + /// @return a new KPipeRunner instance + public KPipeRunner build() { + return new KPipeRunner<>(this); } } } diff --git a/lib/src/main/java/org/kpipe/processor/JsonMessageProcessor.java b/lib/src/main/java/org/kpipe/processor/JsonMessageProcessor.java index b198878..dba23e4 100644 --- a/lib/src/main/java/org/kpipe/processor/JsonMessageProcessor.java +++ b/lib/src/main/java/org/kpipe/processor/JsonMessageProcessor.java @@ -24,6 +24,7 @@ public class JsonMessageProcessor { private JsonMessageProcessor() {} + private static final ScopedValue OUTPUT_STREAM_CACHE = ScopedValue.newInstance(); /// Creates an operator that adds a field with specified key and value to a JSON map. diff --git a/lib/src/test/java/org/kpipe/consumer/ConsumerRunnerTest.java b/lib/src/test/java/org/kpipe/consumer/ConsumerRunnerTest.java index af88ddd..d5796e1 100644 --- a/lib/src/test/java/org/kpipe/consumer/ConsumerRunnerTest.java +++ b/lib/src/test/java/org/kpipe/consumer/ConsumerRunnerTest.java @@ -26,17 +26,15 @@ class ConsumerRunnerTest { @Mock private MetricsReporter mockReporter; - private ConsumerRunner> runner; + private KPipeRunner> runner; @Test void shouldStartConsumer() { // Arrange when(mockConsumer.isRunning()).thenReturn(true); - runner = - ConsumerRunner - .builder(mockConsumer) - .withHealthCheck(KPipeConsumer::isRunning) // Use the isRunning method in health check - .build(); + runner = KPipeRunner.builder(mockConsumer) + .withHealthCheck(KPipeConsumer::isRunning) // Use the isRunning method in health check + .build(); // Act runner.start(); @@ -50,7 +48,7 @@ void shouldStartConsumer() { void shouldHandleStartException() { // Arrange doThrow(new RuntimeException("Start failed")).when(mockConsumer).start(); - runner = ConsumerRunner.builder(mockConsumer).build(); + runner = KPipeRunner.builder(mockConsumer).build(); // Act & Assert assertThrows(RuntimeException.class, () -> runner.start()); @@ -61,7 +59,9 @@ void shouldHandleStartException() { void shouldCheckHealthCorrectly() { // Arrange final var healthCheckResult = new AtomicBoolean(true); - runner = ConsumerRunner.builder(mockConsumer).withHealthCheck(c -> healthCheckResult.get()).build(); + runner = KPipeRunner.builder(mockConsumer) + .withHealthCheck(c -> healthCheckResult.get()) + .build(); runner.start(); // Act & Assert - Initially healthy @@ -77,7 +77,7 @@ void shouldShutdownGracefully() { // Arrange when(mockConsumer.createMessageTracker()).thenReturn(mockTracker); when(mockTracker.getInFlightMessageCount()).thenReturn(0L); - runner = ConsumerRunner.builder(mockConsumer).build(); + runner = KPipeRunner.builder(mockConsumer).build(); runner.start(); // Act @@ -96,7 +96,7 @@ void shouldWaitForInFlightMessagesOnShutdown() { when(mockConsumer.createMessageTracker()).thenReturn(mockTracker); when(mockTracker.getInFlightMessageCount()).thenReturn(5L).thenReturn(0L); when(mockTracker.waitForCompletion(anyLong())).thenReturn(Optional.of(true)); - runner = ConsumerRunner.builder(mockConsumer).build(); + runner = KPipeRunner.builder(mockConsumer).build(); runner.start(); // Act @@ -113,7 +113,7 @@ void shouldTimeoutWhenInFlightMessagesRemain() { when(mockConsumer.createMessageTracker()).thenReturn(mockTracker); when(mockTracker.getInFlightMessageCount()).thenReturn(5L).thenReturn(3L); when(mockTracker.waitForCompletion(anyLong())).thenReturn(Optional.of(false)); - runner = ConsumerRunner.builder(mockConsumer).build(); + runner = KPipeRunner.builder(mockConsumer).build(); runner.start(); // Act @@ -132,7 +132,7 @@ void shouldUseCustomGracefulShutdown() { return true; }; - runner = ConsumerRunner.builder(mockConsumer).withGracefulShutdown(customShutdown).build(); + runner = KPipeRunner.builder(mockConsumer).withGracefulShutdown(customShutdown).build(); runner.start(); // Act @@ -146,7 +146,7 @@ void shouldUseCustomGracefulShutdown() { @Test void shouldNotStartTwice() { // Arrange - runner = ConsumerRunner.builder(mockConsumer).build(); + runner = KPipeRunner.builder(mockConsumer).build(); runner.start(); reset(mockConsumer); // Reset to verify no more calls @@ -160,7 +160,7 @@ void shouldNotStartTwice() { @Test void shouldCloseConsumerWhenClosed() { // Arrange - runner = ConsumerRunner.builder(mockConsumer).build(); + runner = KPipeRunner.builder(mockConsumer).build(); runner.start(); // Act @@ -174,7 +174,7 @@ void shouldCloseConsumerWhenClosed() { @Test void shouldHandleMultipleCloses() { // Arrange - runner = ConsumerRunner.builder(mockConsumer).build(); + runner = KPipeRunner.builder(mockConsumer).build(); runner.start(); runner.close(); reset(mockConsumer); @@ -192,7 +192,7 @@ void shouldAwaitShutdownSuccessfully() throws Exception { when(mockConsumer.createMessageTracker()).thenReturn(mockTracker); when(mockTracker.getInFlightMessageCount()).thenReturn(0L); - runner = ConsumerRunner.builder(mockConsumer).build(); + runner = KPipeRunner.builder(mockConsumer).build(); runner.start(); // Create a thread to close the runner after a delay @@ -216,7 +216,7 @@ void shouldAwaitShutdownSuccessfully() throws Exception { @Test void shouldTimeoutWhenAwaitingShutdown() throws Exception { // Arrange - runner = ConsumerRunner.builder(mockConsumer).build(); + runner = KPipeRunner.builder(mockConsumer).build(); runner.start(); // Act @@ -236,12 +236,10 @@ void shouldApplyCustomShutdownTimeout() { return true; }; - runner = - ConsumerRunner - .builder(mockConsumer) - .withShutdownTimeout(customTimeout) - .withGracefulShutdown(timeoutCapturingShutdown) - .build(); + runner = KPipeRunner.builder(mockConsumer) + .withShutdownTimeout(customTimeout) + .withGracefulShutdown(timeoutCapturingShutdown) + .build(); runner.start(); // Act @@ -258,7 +256,7 @@ void performGracefulConsumerShutdownShouldHandleNoInFlightMessages() { when(mockTracker.getInFlightMessageCount()).thenReturn(0L); // Act - final var result = ConsumerRunner.performGracefulConsumerShutdown(mockConsumer, 1000); + final var result = KPipeRunner.performGracefulConsumerShutdown(mockConsumer, 1000); // Assert assertTrue(result); @@ -275,7 +273,7 @@ void performGracefulConsumerShutdownShouldHandleInFlightMessages() { when(mockTracker.waitForCompletion(anyLong())).thenReturn(Optional.of(true)); // Act - final var result = ConsumerRunner.performGracefulConsumerShutdown(mockConsumer, 1000); + final var result = KPipeRunner.performGracefulConsumerShutdown(mockConsumer, 1000); // Assert assertTrue(result); @@ -292,7 +290,7 @@ void performGracefulConsumerShutdownShouldHandlePartiallyProcessedMessages() { when(mockTracker.waitForCompletion(anyLong())).thenReturn(Optional.of(false)); // Act - final var result = ConsumerRunner.performGracefulConsumerShutdown(mockConsumer, 1000); + final var result = KPipeRunner.performGracefulConsumerShutdown(mockConsumer, 1000); // Assert assertFalse(result); @@ -309,7 +307,7 @@ void performGracefulConsumerShutdownShouldHandleExceptionFromTracker() { when(mockTracker.getInFlightMessageCount()).thenThrow(new RuntimeException("Tracker failure")); // Act - final var result = ConsumerRunner.performGracefulConsumerShutdown(mockConsumer, 1000); + final var result = KPipeRunner.performGracefulConsumerShutdown(mockConsumer, 1000); // Assert assertFalse(result); // Expect false when an exception occurs @@ -324,7 +322,7 @@ void performGracefulConsumerShutdownShouldHandleEmptyCompletionResult() { when(mockTracker.waitForCompletion(anyLong())).thenReturn(Optional.empty()); // Act - final var result = ConsumerRunner.performGracefulConsumerShutdown(mockConsumer, 1000); + final var result = KPipeRunner.performGracefulConsumerShutdown(mockConsumer, 1000); // Assert assertFalse(result); @@ -338,7 +336,7 @@ void performGracefulConsumerShutdownShouldHandleNullTracker() { when(mockConsumer.createMessageTracker()).thenReturn(null); // Act - final var result = ConsumerRunner.performGracefulConsumerShutdown(mockConsumer, 1000); + final var result = KPipeRunner.performGracefulConsumerShutdown(mockConsumer, 1000); // Assert assertTrue(result); @@ -355,7 +353,7 @@ void shouldUseCustomStartAction() { consumer.start(); }; - runner = ConsumerRunner.builder(mockConsumer).withStartAction(customStartAction).build(); + runner = KPipeRunner.builder(mockConsumer).withStartAction(customStartAction).build(); // Act runner.start(); @@ -372,7 +370,7 @@ void shouldAddShutdownHook() { // but we can verify the behavior through reflection or functional testing // Arrange & Act - Create runner with shutdown hook - runner = ConsumerRunner.builder(mockConsumer).withShutdownHook(true).build(); + runner = KPipeRunner.builder(mockConsumer).withShutdownHook(true).build(); // No direct assertion possible - this is mostly for coverage // The actual shutdown hook behavior would be tested in integration tests } @@ -381,12 +379,10 @@ void shouldAddShutdownHook() { void shouldStartMetricsThreadWithReporters() throws Exception { // Arrange final long metricsInterval = 100; // Short interval for testing - runner = - ConsumerRunner - .builder(mockConsumer) - .withMetricsReporters(List.of(mockReporter)) - .withMetricsInterval(metricsInterval) - .build(); + runner = KPipeRunner.builder(mockConsumer) + .withMetricsReporters(List.of(mockReporter)) + .withMetricsInterval(metricsInterval) + .build(); // Act runner.start(); @@ -404,12 +400,10 @@ void shouldStartMetricsThreadWithReporters() throws Exception { @Test void shouldNotStartMetricsThreadWithoutReporters() { // Arrange - runner = - ConsumerRunner - .builder(mockConsumer) - .withMetricsReporters(List.of()) // Empty list - .withMetricsInterval(100) - .build(); + runner = KPipeRunner.builder(mockConsumer) + .withMetricsReporters(List.of()) // Empty list + .withMetricsInterval(100) + .build(); // Act runner.start(); @@ -420,12 +414,10 @@ void shouldNotStartMetricsThreadWithoutReporters() { @Test void shouldNotStartMetricsThreadWithNegativeInterval() { // Arrange - runner = - ConsumerRunner - .builder(mockConsumer) - .withMetricsReporters(List.of(mockReporter)) - .withMetricsInterval(-1) // Negative interval - .build(); + runner = KPipeRunner.builder(mockConsumer) + .withMetricsReporters(List.of(mockReporter)) + .withMetricsInterval(-1) // Negative interval + .build(); // Act runner.start(); @@ -438,12 +430,10 @@ void shouldNotStartMetricsThreadWithNegativeInterval() { void shouldStopMetricsThreadOnShutdown() throws Exception { // Arrange final long metricsInterval = 100; - runner = - ConsumerRunner - .builder(mockConsumer) - .withMetricsReporters(List.of(mockReporter)) - .withMetricsInterval(metricsInterval) - .build(); + runner = KPipeRunner.builder(mockConsumer) + .withMetricsReporters(List.of(mockReporter)) + .withMetricsInterval(metricsInterval) + .build(); runner.start(); @@ -469,12 +459,10 @@ void shouldHandleErrorsInMetricsReporting() throws Exception { final long metricsInterval = 100; doThrow(new RuntimeException("Metrics error")).when(mockReporter).reportMetrics(); - runner = - ConsumerRunner - .builder(mockConsumer) - .withMetricsReporters(List.of(mockReporter)) - .withMetricsInterval(metricsInterval) - .build(); + runner = KPipeRunner.builder(mockConsumer) + .withMetricsReporters(List.of(mockReporter)) + .withMetricsInterval(metricsInterval) + .build(); // Act runner.start(); @@ -495,14 +483,12 @@ void shouldSupportCustomConfigurationThroughWithMethod() { final var configFunctionCalled = new AtomicBoolean(false); // Act - runner = - ConsumerRunner - .builder(mockConsumer) - .with(builder -> { - configFunctionCalled.set(true); - return builder.withShutdownTimeout(2000); - }) - .build(); + runner = KPipeRunner.builder(mockConsumer) + .with(builder -> { + configFunctionCalled.set(true); + return builder.withShutdownTimeout(2000); + }) + .build(); // Assert assertTrue(configFunctionCalled.get()); @@ -511,7 +497,7 @@ void shouldSupportCustomConfigurationThroughWithMethod() { @Test void shouldUseDefaultHealthCheckWhenNotSpecified() { // Arrange - runner = ConsumerRunner.builder(mockConsumer).build(); + runner = KPipeRunner.builder(mockConsumer).build(); runner.start(); // Act & Assert @@ -521,7 +507,7 @@ void shouldUseDefaultHealthCheckWhenNotSpecified() { @Test void shouldNotBeHealthyWhenNotStarted() { // Arrange - runner = ConsumerRunner.builder(mockConsumer).build(); + runner = KPipeRunner.builder(mockConsumer).build(); // Act & Assert - Consumer not started assertFalse(runner.isHealthy());