diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py index f243f7d63..306403561 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py @@ -423,6 +423,7 @@ def run_resource_and_workload_experiment( if experiment.identifier in [ "test-geospatial-deployment-v1", "test-geospatial-deployment-custom-dataset-v1", + "test-geospatial-endpoint-custom-dataset-v1", ]: logger.info("Using geospatial benchmark for deployment") result = execute_geospatial_benchmark( @@ -571,7 +572,7 @@ def run_workload_experiment( # Will raise VLLMBenchmarkError if there is a problem logger.info(f"Executing experiment: {experiment.identifier}") result: BenchmarkResult - if experiment.identifier == "test-geospatial-endpoint-v1": + if experiment.identifier in ["test-geospatial-endpoint-v1", "test-geospatial-endpoint-custom-dataset-v1"]: logger.info("Using geospatial benchmark for endpoint") result = execute_geospatial_benchmark( base_url=benchmark_parameters.endpoint, diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml index 3aa9c3657..910fd5c23 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml @@ -463,3 +463,78 @@ performance_testing-geospatial-full-custom-dataset: - identifier: "p99_e2el_ms" metadata: description: 'VLLM performance testing across compute resource and workload configuration' +performance_testing-geospatial-endpoint-custom-dataset: + identifier: test-geospatial-endpoint-custom-dataset-v1 + actuatorIdentifier: "vllm_performance" + requiredProperties: # Any entity passed to this experiment must have constitutive properties with these values + - identifier: 'model' + metadata: + description: 'model to use for testing. Assumed to be served by all endpoints tested. Required to obtain correct tokenizer for benchmarking metrics calculation' + propertyDomain: + variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE" + values: ["ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11"] + - identifier: 'endpoint' + metadata: + description: 'The endpoint(s) to test' + propertyDomain: + variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE" + values: ["http://localhost:8000"] + - identifier: 'request_rate' + metadata: + description: "The number of requests to send per second" + propertyDomain: + variableType: 'DISCRETE_VARIABLE_TYPE' + domainRange: [-1, 1000] + interval: 1 # -1 means send all requests at time 0 + - identifier: 'dataset' + metadata: + description: "(benchmark) The dataset to be used for the experiment" + propertyDomain: + variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE" + values: ["custom_dataset.jsonl"] + optionalProperties: + - identifier: 'num_prompts' + metadata: + description: "The number of prompts to send (total number of requests)" + propertyDomain: + variableType: 'DISCRETE_VARIABLE_TYPE' + domainRange: [1, 10001] + interval: 1 + - identifier: 'burstiness' + metadata: + description: "The burstiness of the requests - 1.0 is a Poisson distribution with rate = request_rate. Others are gamma distributions with lambda = request_rate and shape = burstiness." + propertyDomain: + variableType: 'DISCRETE_VARIABLE_TYPE' + domainRange: [0, 10] + interval: 1 + - identifier: 'max_concurrency' + metadata: + description: "The maximum number of concurrent requests to send" + propertyDomain: + variableType: 'DISCRETE_VARIABLE_TYPE' + domainRange: [-1, 500] # -1 means no concurrency control + interval: 1 + defaultParameterization: + - value: 100 + property: + identifier: 'num_prompts' + - value: -1 + property: + identifier: 'max_concurrency' + - value: 1.0 + property: + identifier: 'burstiness' + # measurements + targetProperties: + - identifier: "duration" + - identifier: "completed" + - identifier: "request_throughput" + - identifier: "mean_e2el_ms" + - identifier: "median_e2el_ms" + - identifier: "std_e2el_ms" + - identifier: "p25_e2el_ms" + - identifier: "p50_e2el_ms" + - identifier: "p75_e2el_ms" + - identifier: "p99_e2el_ms" + metadata: + description: 'Test inference performance of a geospatial model served by vLLM endpoint across inference workload configurations' diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py index f6f856870..b154ad3cc 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py @@ -135,7 +135,9 @@ def execute_benchmark( command.extend(["--max-concurrency", f"{max_concurrency!s}"]) if custom_args is not None: for key, value in custom_args.items(): - command.extend([key, f"{value!s}"]) + command.append(key) + if value: + command.append(f"{value!s}") logger.debug(f"Command line: {command}") @@ -273,7 +275,7 @@ def execute_geospatial_benchmark( return execute_benchmark( base_url=base_url, - backend="io-processor-plugin", + backend="vllm-pooling", model=model, dataset="custom", interpreter=interpreter, @@ -287,7 +289,7 @@ def execute_geospatial_benchmark( custom_args={ "--dataset-path": f"{dataset_path.resolve()}", "--endpoint": "/pooling", - "--skip-tokenizer-init": True, + "--skip-tokenizer-init": None, }, )