Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -423,6 +423,7 @@ def run_resource_and_workload_experiment(
if experiment.identifier in [
"test-geospatial-deployment-v1",
"test-geospatial-deployment-custom-dataset-v1",
"test-geospatial-endpoint-custom-dataset-v1",
]:
logger.info("Using geospatial benchmark for deployment")
result = execute_geospatial_benchmark(
Expand Down Expand Up @@ -571,7 +572,7 @@ def run_workload_experiment(
# Will raise VLLMBenchmarkError if there is a problem
logger.info(f"Executing experiment: {experiment.identifier}")
result: BenchmarkResult
if experiment.identifier == "test-geospatial-endpoint-v1":
if experiment.identifier in ["test-geospatial-endpoint-v1", "test-geospatial-endpoint-custom-dataset-v1"]:
logger.info("Using geospatial benchmark for endpoint")
result = execute_geospatial_benchmark(
base_url=benchmark_parameters.endpoint,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -463,3 +463,78 @@ performance_testing-geospatial-full-custom-dataset:
- identifier: "p99_e2el_ms"
metadata:
description: 'VLLM performance testing across compute resource and workload configuration'
performance_testing-geospatial-endpoint-custom-dataset:
identifier: test-geospatial-endpoint-custom-dataset-v1
actuatorIdentifier: "vllm_performance"
requiredProperties: # Any entity passed to this experiment must have constitutive properties with these values
- identifier: 'model'
metadata:
description: 'model to use for testing. Assumed to be served by all endpoints tested. Required to obtain correct tokenizer for benchmarking metrics calculation'
propertyDomain:
variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
values: ["ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11"]
- identifier: 'endpoint'
metadata:
description: 'The endpoint(s) to test'
propertyDomain:
variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
values: ["http://localhost:8000"]
- identifier: 'request_rate'
metadata:
description: "The number of requests to send per second"
propertyDomain:
variableType: 'DISCRETE_VARIABLE_TYPE'
domainRange: [-1, 1000]
interval: 1 # -1 means send all requests at time 0
- identifier: 'dataset'
metadata:
description: "(benchmark) The dataset to be used for the experiment"
propertyDomain:
variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
values: ["custom_dataset.jsonl"]
optionalProperties:
- identifier: 'num_prompts'
metadata:
description: "The number of prompts to send (total number of requests)"
propertyDomain:
variableType: 'DISCRETE_VARIABLE_TYPE'
domainRange: [1, 10001]
interval: 1
- identifier: 'burstiness'
metadata:
description: "The burstiness of the requests - 1.0 is a Poisson distribution with rate = request_rate. Others are gamma distributions with lambda = request_rate and shape = burstiness."
propertyDomain:
variableType: 'DISCRETE_VARIABLE_TYPE'
domainRange: [0, 10]
interval: 1
- identifier: 'max_concurrency'
metadata:
description: "The maximum number of concurrent requests to send"
propertyDomain:
variableType: 'DISCRETE_VARIABLE_TYPE'
domainRange: [-1, 500] # -1 means no concurrency control
interval: 1
defaultParameterization:
- value: 100
property:
identifier: 'num_prompts'
- value: -1
property:
identifier: 'max_concurrency'
- value: 1.0
property:
identifier: 'burstiness'
# measurements
targetProperties:
- identifier: "duration"
- identifier: "completed"
- identifier: "request_throughput"
- identifier: "mean_e2el_ms"
- identifier: "median_e2el_ms"
- identifier: "std_e2el_ms"
- identifier: "p25_e2el_ms"
- identifier: "p50_e2el_ms"
- identifier: "p75_e2el_ms"
- identifier: "p99_e2el_ms"
metadata:
description: 'Test inference performance of a geospatial model served by vLLM endpoint across inference workload configurations'
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,9 @@ def execute_benchmark(
command.extend(["--max-concurrency", f"{max_concurrency!s}"])
if custom_args is not None:
for key, value in custom_args.items():
command.extend([key, f"{value!s}"])
command.append(key)
if value:
command.append(f"{value!s}")

logger.debug(f"Command line: {command}")

Expand Down Expand Up @@ -273,7 +275,7 @@ def execute_geospatial_benchmark(

return execute_benchmark(
base_url=base_url,
backend="io-processor-plugin",
backend="vllm-pooling",
model=model,
dataset="custom",
interpreter=interpreter,
Expand All @@ -287,7 +289,7 @@ def execute_geospatial_benchmark(
custom_args={
"--dataset-path": f"{dataset_path.resolve()}",
"--endpoint": "/pooling",
"--skip-tokenizer-init": True,
"--skip-tokenizer-init": None,
},
)

Expand Down
Loading