From 1f516bdedc6dc9b4cb9588f6470959f79aff24fa Mon Sep 17 00:00:00 2001 From: Christian Pinto Date: Tue, 24 Mar 2026 10:08:58 +0000 Subject: [PATCH 1/4] feat(vllm_performance): Added performance_testing-geospatial-endpoint-custom-dataset experiment Signed-off-by: Christian Pinto --- .../performance_testing_geospatial.yaml | 86 ++++++++++++++++++- 1 file changed, 85 insertions(+), 1 deletion(-) diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml index 3aa9c365..af2c21a0 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml @@ -274,7 +274,7 @@ performance_testing-geospatial-full: metadata: description: 'VLLM performance testing across compute resource and workload configuration' performance_testing-geospatial-full-custom-dataset: - identifier: test-geospatial-deployment-custom-dataset-v1 + identifier: test-geospatial-full-custom-dataset-v1 actuatorIdentifier: "vllm_performance" requiredProperties: # Any entity passed to this experiment must have constitutive properties with these values - identifier: 'model' @@ -463,3 +463,87 @@ performance_testing-geospatial-full-custom-dataset: - identifier: "p99_e2el_ms" metadata: description: 'VLLM performance testing across compute resource and workload configuration' +performance_testing-geospatial-endpoint-custom-dataset: + identifier: test-geospatial-endpoint-custom-dataset-v1 + actuatorIdentifier: "vllm_performance" + requiredProperties: # Any entity passed to this experiment must have constitutive properties with these values + - identifier: 'model' + metadata: + description: 'model to use for testing. Assumed to be served by all endpoints tested. Required to obtain correct tokenizer for benchmarking metrics calculation' + propertyDomain: + variableType: "CATEGORICAL_VARIABLE_TYPE" + values: ["ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11"] + - identifier: 'endpoint' + metadata: + description: 'The endpoint(s) to test' + propertyDomain: + variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE" + values: ["http://localhost:8000"] + - identifier: 'request_rate' + metadata: + description: "The number of requests to send per second" + propertyDomain: + variableType: 'DISCRETE_VARIABLE_TYPE' + domainRange: [-1, 1000] + interval: 1 # -1 means send all requests at time 0 + - identifier: 'dataset' + metadata: + description: "(benchmark) The dataset to be used for the experiment" + propertyDomain: + variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE" + values: ["custom_dataset.jsonl"] + optionalProperties: + - identifier: 'num_prompts' + metadata: + description: "The number of prompts to send (total number of requests)" + propertyDomain: + variableType: 'DISCRETE_VARIABLE_TYPE' + domainRange: [1, 10001] + interval: 1 + - identifier: 'burstiness' + metadata: + description: "The burstiness of the requests - 1.0 is a Poisson distribution with rate = request_rate. Others are gamma distributions with lambda = request_rate and shape = burstiness." + propertyDomain: + variableType: 'DISCRETE_VARIABLE_TYPE' + domainRange: [0, 10] + interval: 1 + - identifier: 'max_concurrency' + metadata: + description: "The maximum number of concurrent requests to send" + propertyDomain: + variableType: 'DISCRETE_VARIABLE_TYPE' + domainRange: [-1, 500] # -1 means no concurrency control + interval: 1 + - identifier: 'dataset' + metadata: + description: "The dataset to be used for the experiment" + propertyDomain: + variableType: "CATEGORICAL_VARIABLE_TYPE" + values: ['india_url_in_b64_out', 'valencia_url_in_b64_out'] + defaultParameterization: + - value: 100 + property: + identifier: 'num_prompts' + - value: -1 + property: + identifier: 'max_concurrency' + - value: 1.0 + property: + identifier: 'burstiness' + - property: + identifier: 'dataset' + value: 'india_url_in_b64_out' + # measurements + targetProperties: + - identifier: "duration" + - identifier: "completed" + - identifier: "request_throughput" + - identifier: "mean_e2el_ms" + - identifier: "median_e2el_ms" + - identifier: "std_e2el_ms" + - identifier: "p25_e2el_ms" + - identifier: "p50_e2el_ms" + - identifier: "p75_e2el_ms" + - identifier: "p99_e2el_ms" + metadata: + description: 'Test inference performance of a geospatial model served by vLLM endpoint across inference workload configurations' From 9ee6e294bcac1eda5edf8077d7dcab6c660067aa Mon Sep 17 00:00:00 2001 From: Michele Gazzetti Date: Tue, 24 Mar 2026 13:43:05 +0000 Subject: [PATCH 2/4] fix(vllm_performance): set correct experiment identifiers and property configuration to test existing vllm geospatial endpoint Signed-off-by: Michele Gazzetti --- .../vllm_performance/experiment_executor.py | 3 ++- .../experiments/performance_testing_geospatial.yaml | 13 ++----------- 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py index f243f7d6..30640356 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py @@ -423,6 +423,7 @@ def run_resource_and_workload_experiment( if experiment.identifier in [ "test-geospatial-deployment-v1", "test-geospatial-deployment-custom-dataset-v1", + "test-geospatial-endpoint-custom-dataset-v1", ]: logger.info("Using geospatial benchmark for deployment") result = execute_geospatial_benchmark( @@ -571,7 +572,7 @@ def run_workload_experiment( # Will raise VLLMBenchmarkError if there is a problem logger.info(f"Executing experiment: {experiment.identifier}") result: BenchmarkResult - if experiment.identifier == "test-geospatial-endpoint-v1": + if experiment.identifier in ["test-geospatial-endpoint-v1", "test-geospatial-endpoint-custom-dataset-v1"]: logger.info("Using geospatial benchmark for endpoint") result = execute_geospatial_benchmark( base_url=benchmark_parameters.endpoint, diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml index af2c21a0..910fd5c2 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml @@ -274,7 +274,7 @@ performance_testing-geospatial-full: metadata: description: 'VLLM performance testing across compute resource and workload configuration' performance_testing-geospatial-full-custom-dataset: - identifier: test-geospatial-full-custom-dataset-v1 + identifier: test-geospatial-deployment-custom-dataset-v1 actuatorIdentifier: "vllm_performance" requiredProperties: # Any entity passed to this experiment must have constitutive properties with these values - identifier: 'model' @@ -471,7 +471,7 @@ performance_testing-geospatial-endpoint-custom-dataset: metadata: description: 'model to use for testing. Assumed to be served by all endpoints tested. Required to obtain correct tokenizer for benchmarking metrics calculation' propertyDomain: - variableType: "CATEGORICAL_VARIABLE_TYPE" + variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE" values: ["ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11"] - identifier: 'endpoint' metadata: @@ -514,12 +514,6 @@ performance_testing-geospatial-endpoint-custom-dataset: variableType: 'DISCRETE_VARIABLE_TYPE' domainRange: [-1, 500] # -1 means no concurrency control interval: 1 - - identifier: 'dataset' - metadata: - description: "The dataset to be used for the experiment" - propertyDomain: - variableType: "CATEGORICAL_VARIABLE_TYPE" - values: ['india_url_in_b64_out', 'valencia_url_in_b64_out'] defaultParameterization: - value: 100 property: @@ -530,9 +524,6 @@ performance_testing-geospatial-endpoint-custom-dataset: - value: 1.0 property: identifier: 'burstiness' - - property: - identifier: 'dataset' - value: 'india_url_in_b64_out' # measurements targetProperties: - identifier: "duration" From 6281554bc12324b2e6637b12e90dbfb57397a1de Mon Sep 17 00:00:00 2001 From: Michele Gazzetti Date: Tue, 24 Mar 2026 13:45:29 +0000 Subject: [PATCH 3/4] fix(vllm_performance): update default values to benchmark geospatial models Signed-off-by: Michele Gazzetti --- .../vllm_performance_test/execute_benchmark.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py index f6f85687..9d489ef4 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py @@ -135,7 +135,10 @@ def execute_benchmark( command.extend(["--max-concurrency", f"{max_concurrency!s}"]) if custom_args is not None: for key, value in custom_args.items(): - command.extend([key, f"{value!s}"]) + if key =="--skip-tokenizer-init": + command.extend([key]) + else: + command.extend([key, f"{value!s}"]) logger.debug(f"Command line: {command}") @@ -273,7 +276,7 @@ def execute_geospatial_benchmark( return execute_benchmark( base_url=base_url, - backend="io-processor-plugin", + backend="vllm-pooling", model=model, dataset="custom", interpreter=interpreter, From e80ebe9857aed677ff02f6898a665531c7820895 Mon Sep 17 00:00:00 2001 From: Christian Pinto Date: Mon, 30 Mar 2026 12:49:26 +0100 Subject: [PATCH 4/4] feat(vllm_performance): Upgraded custom arguments in execute_benchmark Signed-off-by: Christian Pinto --- .../vllm_performance_test/execute_benchmark.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py index 9d489ef4..b154ad3c 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py @@ -135,10 +135,9 @@ def execute_benchmark( command.extend(["--max-concurrency", f"{max_concurrency!s}"]) if custom_args is not None: for key, value in custom_args.items(): - if key =="--skip-tokenizer-init": - command.extend([key]) - else: - command.extend([key, f"{value!s}"]) + command.append(key) + if value: + command.append(f"{value!s}") logger.debug(f"Command line: {command}") @@ -290,7 +289,7 @@ def execute_geospatial_benchmark( custom_args={ "--dataset-path": f"{dataset_path.resolve()}", "--endpoint": "/pooling", - "--skip-tokenizer-init": True, + "--skip-tokenizer-init": None, }, )