From 1f516bdedc6dc9b4cb9588f6470959f79aff24fa Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Tue, 24 Mar 2026 10:08:58 +0000
Subject: [PATCH 1/4] feat(vllm_performance): Added
 performance_testing-geospatial-endpoint-custom-dataset experiment

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 .../performance_testing_geospatial.yaml       | 86 ++++++++++++++++++-
 1 file changed, 85 insertions(+), 1 deletion(-)

diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
index 3aa9c365..af2c21a0 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
@@ -274,7 +274,7 @@ performance_testing-geospatial-full:
   metadata:
     description: 'VLLM performance testing across compute resource and workload configuration'
 performance_testing-geospatial-full-custom-dataset:
-  identifier: test-geospatial-deployment-custom-dataset-v1
+  identifier: test-geospatial-full-custom-dataset-v1
   actuatorIdentifier: "vllm_performance"
   requiredProperties: # Any entity passed to this experiment must have constitutive properties with these values
     - identifier: 'model'
@@ -463,3 +463,87 @@ performance_testing-geospatial-full-custom-dataset:
     - identifier: "p99_e2el_ms"
   metadata:
     description: 'VLLM performance testing across compute resource and workload configuration'
+performance_testing-geospatial-endpoint-custom-dataset:
+  identifier: test-geospatial-endpoint-custom-dataset-v1
+  actuatorIdentifier: "vllm_performance"
+  requiredProperties: # Any entity passed to this experiment must have constitutive properties with these values
+    - identifier: 'model'
+      metadata:
+        description: 'model to use for testing. Assumed to be served by all endpoints tested. Required to obtain correct tokenizer for benchmarking metrics calculation'
+      propertyDomain:
+        variableType: "CATEGORICAL_VARIABLE_TYPE"
+        values: ["ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11"]
+    - identifier: 'endpoint'
+      metadata:
+        description: 'The endpoint(s) to test'
+      propertyDomain:
+        variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
+        values: ["http://localhost:8000"]
+    - identifier: 'request_rate'
+      metadata:
+        description: "The number of requests to send per second"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [-1, 1000]
+        interval: 1 # -1 means send all requests at time 0
+    - identifier: 'dataset'
+      metadata:
+        description: "(benchmark) The dataset to be used for the experiment"
+      propertyDomain:
+        variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
+        values: ["custom_dataset.jsonl"]
+  optionalProperties:
+    - identifier: 'num_prompts'
+      metadata:
+        description: "The number of prompts to send (total number of requests)"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [1, 10001]
+        interval: 1
+    - identifier: 'burstiness'
+      metadata:
+        description: "The burstiness of the requests - 1.0 is a Poisson distribution with rate = request_rate. Others are gamma distributions with lambda = request_rate and shape = burstiness."
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [0, 10]
+        interval: 1
+    - identifier: 'max_concurrency'
+      metadata:
+        description: "The maximum number of concurrent requests to send"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [-1, 500] # -1 means no concurrency control
+        interval: 1
+    - identifier: 'dataset'
+      metadata:
+        description: "The dataset to be used for the experiment"
+      propertyDomain:
+        variableType: "CATEGORICAL_VARIABLE_TYPE"
+        values: ['india_url_in_b64_out', 'valencia_url_in_b64_out']
+  defaultParameterization:
+    - value: 100
+      property:
+        identifier: 'num_prompts'
+    - value: -1
+      property:
+        identifier: 'max_concurrency'
+    - value: 1.0
+      property:
+        identifier: 'burstiness'
+    - property:
+        identifier: 'dataset'
+      value: 'india_url_in_b64_out'
+  # measurements
+  targetProperties:
+    - identifier: "duration"
+    - identifier: "completed"
+    - identifier: "request_throughput"
+    - identifier: "mean_e2el_ms"
+    - identifier: "median_e2el_ms"
+    - identifier: "std_e2el_ms"
+    - identifier: "p25_e2el_ms"
+    - identifier: "p50_e2el_ms"
+    - identifier: "p75_e2el_ms"
+    - identifier: "p99_e2el_ms"
+  metadata:
+    description: 'Test inference performance of a geospatial model served by vLLM endpoint across inference workload configurations'

From 9ee6e294bcac1eda5edf8077d7dcab6c660067aa Mon Sep 17 00:00:00 2001
From: Michele Gazzetti <michele.gazzetti1@ibm.com>
Date: Tue, 24 Mar 2026 13:43:05 +0000
Subject: [PATCH 2/4] fix(vllm_performance): set correct experiment identifiers
 and property configuration to test existing vllm geospatial endpoint

Signed-off-by: Michele Gazzetti <michele.gazzetti1@ibm.com>
---
 .../vllm_performance/experiment_executor.py         |  3 ++-
 .../experiments/performance_testing_geospatial.yaml | 13 ++-----------
 2 files changed, 4 insertions(+), 12 deletions(-)

diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
index f243f7d6..30640356 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
@@ -423,6 +423,7 @@ def run_resource_and_workload_experiment(
             if experiment.identifier in [
                 "test-geospatial-deployment-v1",
                 "test-geospatial-deployment-custom-dataset-v1",
+                "test-geospatial-endpoint-custom-dataset-v1",
             ]:
                 logger.info("Using geospatial benchmark for deployment")
                 result = execute_geospatial_benchmark(
@@ -571,7 +572,7 @@ def run_workload_experiment(
             # Will raise VLLMBenchmarkError if there is a problem
             logger.info(f"Executing experiment: {experiment.identifier}")
             result: BenchmarkResult
-            if experiment.identifier == "test-geospatial-endpoint-v1":
+            if experiment.identifier in ["test-geospatial-endpoint-v1", "test-geospatial-endpoint-custom-dataset-v1"]:
                 logger.info("Using geospatial benchmark for endpoint")
                 result = execute_geospatial_benchmark(
                     base_url=benchmark_parameters.endpoint,
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
index af2c21a0..910fd5c2 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
@@ -274,7 +274,7 @@ performance_testing-geospatial-full:
   metadata:
     description: 'VLLM performance testing across compute resource and workload configuration'
 performance_testing-geospatial-full-custom-dataset:
-  identifier: test-geospatial-full-custom-dataset-v1
+  identifier: test-geospatial-deployment-custom-dataset-v1
   actuatorIdentifier: "vllm_performance"
   requiredProperties: # Any entity passed to this experiment must have constitutive properties with these values
     - identifier: 'model'
@@ -471,7 +471,7 @@ performance_testing-geospatial-endpoint-custom-dataset:
       metadata:
         description: 'model to use for testing. Assumed to be served by all endpoints tested. Required to obtain correct tokenizer for benchmarking metrics calculation'
       propertyDomain:
-        variableType: "CATEGORICAL_VARIABLE_TYPE"
+        variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
         values: ["ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11"]
     - identifier: 'endpoint'
       metadata:
@@ -514,12 +514,6 @@ performance_testing-geospatial-endpoint-custom-dataset:
         variableType: 'DISCRETE_VARIABLE_TYPE'
         domainRange: [-1, 500] # -1 means no concurrency control
         interval: 1
-    - identifier: 'dataset'
-      metadata:
-        description: "The dataset to be used for the experiment"
-      propertyDomain:
-        variableType: "CATEGORICAL_VARIABLE_TYPE"
-        values: ['india_url_in_b64_out', 'valencia_url_in_b64_out']
   defaultParameterization:
     - value: 100
       property:
@@ -530,9 +524,6 @@ performance_testing-geospatial-endpoint-custom-dataset:
     - value: 1.0
       property:
         identifier: 'burstiness'
-    - property:
-        identifier: 'dataset'
-      value: 'india_url_in_b64_out'
   # measurements
   targetProperties:
     - identifier: "duration"

From 6281554bc12324b2e6637b12e90dbfb57397a1de Mon Sep 17 00:00:00 2001
From: Michele Gazzetti <michele.gazzetti1@ibm.com>
Date: Tue, 24 Mar 2026 13:45:29 +0000
Subject: [PATCH 3/4] fix(vllm_performance): update default values to benchmark
 geospatial models

Signed-off-by: Michele Gazzetti <michele.gazzetti1@ibm.com>
---
 .../vllm_performance_test/execute_benchmark.py             | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
index f6f85687..9d489ef4 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
@@ -135,7 +135,10 @@ def execute_benchmark(
         command.extend(["--max-concurrency", f"{max_concurrency!s}"])
     if custom_args is not None:
         for key, value in custom_args.items():
-            command.extend([key, f"{value!s}"])
+            if key =="--skip-tokenizer-init":
+                command.extend([key])
+            else:
+                command.extend([key, f"{value!s}"])
 
     logger.debug(f"Command line: {command}")
 
@@ -273,7 +276,7 @@ def execute_geospatial_benchmark(
 
     return execute_benchmark(
         base_url=base_url,
-        backend="io-processor-plugin",
+        backend="vllm-pooling",
         model=model,
         dataset="custom",
         interpreter=interpreter,

From e80ebe9857aed677ff02f6898a665531c7820895 Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Mon, 30 Mar 2026 12:49:26 +0100
Subject: [PATCH 4/4] feat(vllm_performance): Upgraded custom arguments in
 execute_benchmark

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 .../vllm_performance_test/execute_benchmark.py           | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
index 9d489ef4..b154ad3c 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
@@ -135,10 +135,9 @@ def execute_benchmark(
         command.extend(["--max-concurrency", f"{max_concurrency!s}"])
     if custom_args is not None:
         for key, value in custom_args.items():
-            if key =="--skip-tokenizer-init":
-                command.extend([key])
-            else:
-                command.extend([key, f"{value!s}"])
+            command.append(key)
+            if value:
+                command.append(f"{value!s}")
 
     logger.debug(f"Command line: {command}")
 
@@ -290,7 +289,7 @@ def execute_geospatial_benchmark(
         custom_args={
             "--dataset-path": f"{dataset_path.resolve()}",
             "--endpoint": "/pooling",
-            "--skip-tokenizer-init": True,
+            "--skip-tokenizer-init": None,
         },
     )