From 58a11ccfd2820fb1fa5b4402ec0cdf95fba818fa Mon Sep 17 00:00:00 2001 From: Siddarth Chalasani Date: Fri, 19 Dec 2025 17:19:54 -0800 Subject: [PATCH] feat(sdk): add BenchmarkOps and AsyncBenchmarkOps to SDK --- src/runloop_api_client/sdk/__init__.py | 23 ++++++-- src/runloop_api_client/sdk/_types.py | 10 ++++ src/runloop_api_client/sdk/async_.py | 56 ++++++++++++++++++ src/runloop_api_client/sdk/sync.py | 56 ++++++++++++++++++ tests/sdk/test_async_ops.py | 60 ++++++++++++++++++++ tests/sdk/test_ops.py | 57 +++++++++++++++++++ tests/smoketests/sdk/test_async_benchmark.py | 19 +++---- tests/smoketests/sdk/test_benchmark.py | 17 +++--- 8 files changed, 271 insertions(+), 27 deletions(-) diff --git a/src/runloop_api_client/sdk/__init__.py b/src/runloop_api_client/sdk/__init__.py index 170389e4c..610017b79 100644 --- a/src/runloop_api_client/sdk/__init__.py +++ b/src/runloop_api_client/sdk/__init__.py @@ -5,7 +5,17 @@ from __future__ import annotations -from .sync import AgentOps, DevboxOps, ScorerOps, RunloopSDK, ScenarioOps, SnapshotOps, BlueprintOps, StorageObjectOps +from .sync import ( + AgentOps, + DevboxOps, + ScorerOps, + RunloopSDK, + ScenarioOps, + SnapshotOps, + BenchmarkOps, + BlueprintOps, + StorageObjectOps, +) from .agent import Agent from ._types import ScenarioPreview from .async_ import ( @@ -15,6 +25,7 @@ AsyncRunloopSDK, AsyncScenarioOps, AsyncSnapshotOps, + AsyncBenchmarkOps, AsyncBlueprintOps, AsyncStorageObjectOps, ) @@ -51,6 +62,8 @@ # Management interfaces "AgentOps", "AsyncAgentOps", + "BenchmarkOps", + "AsyncBenchmarkOps", "DevboxOps", "AsyncDevboxOps", "BlueprintOps", @@ -66,6 +79,10 @@ # Resource classes "Agent", "AsyncAgent", + "Benchmark", + "AsyncBenchmark", + "BenchmarkRun", + "AsyncBenchmarkRun", "Devbox", "AsyncDevbox", "Execution", @@ -89,8 +106,4 @@ "AsyncStorageObject", "NamedShell", "AsyncNamedShell", - "BenchmarkRun", - "AsyncBenchmarkRun", - "Benchmark", - "AsyncBenchmark", ] diff --git a/src/runloop_api_client/sdk/_types.py b/src/runloop_api_client/sdk/_types.py index 4685b3401..c3024b4ca 100644 --- a/src/runloop_api_client/sdk/_types.py +++ b/src/runloop_api_client/sdk/_types.py @@ -11,9 +11,11 @@ DevboxCreateParams, ObjectCreateParams, ScenarioListParams, + BenchmarkListParams, BlueprintListParams, ObjectDownloadParams, ScenarioUpdateParams, + BenchmarkCreateParams, BenchmarkUpdateParams, BlueprintCreateParams, DevboxUploadFileParams, @@ -212,6 +214,14 @@ class ScenarioPreview(ScenarioView): """The input context for the Scenario.""" +class SDKBenchmarkCreateParams(BenchmarkCreateParams, LongRequestOptions): + pass + + +class SDKBenchmarkListParams(BenchmarkListParams, BaseRequestOptions): + pass + + class SDKBenchmarkUpdateParams(BenchmarkUpdateParams, LongRequestOptions): pass diff --git a/src/runloop_api_client/sdk/async_.py b/src/runloop_api_client/sdk/async_.py index 6bbac2631..6e6e828ff 100644 --- a/src/runloop_api_client/sdk/async_.py +++ b/src/runloop_api_client/sdk/async_.py @@ -21,7 +21,9 @@ SDKObjectCreateParams, SDKScenarioListParams, SDKScorerCreateParams, + SDKBenchmarkListParams, SDKBlueprintListParams, + SDKBenchmarkCreateParams, SDKBlueprintCreateParams, SDKDiskSnapshotListParams, SDKDevboxCreateFromImageParams, @@ -34,6 +36,7 @@ from .async_scorer import AsyncScorer from .async_scenario import AsyncScenario from .async_snapshot import AsyncSnapshot +from .async_benchmark import AsyncBenchmark from .async_blueprint import AsyncBlueprint from ..lib.context_loader import TarFilter, build_directory_tar from .async_storage_object import AsyncStorageObject @@ -815,6 +818,55 @@ async def list(self, **params: Unpack[SDKScenarioListParams]) -> list[AsyncScena return [AsyncScenario(self._client, item.id) async for item in page] +class AsyncBenchmarkOps: + """Manage benchmarks (async). Access via ``runloop.benchmark``. + + Example: + >>> runloop = AsyncRunloopSDK() + >>> benchmarks = await runloop.benchmark.list() + >>> benchmark = runloop.benchmark.from_id("bmd_xxx") + >>> run = await benchmark.start_run(run_name="evaluation-v1") + """ + + def __init__(self, client: AsyncRunloop) -> None: + """Initialize AsyncBenchmarkOps. + + :param client: AsyncRunloop client instance + :type client: AsyncRunloop + """ + self._client = client + + async def create(self, **params: Unpack[SDKBenchmarkCreateParams]) -> AsyncBenchmark: + """Create a new benchmark. + + :param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkCreateParams` for available parameters + :return: The newly created benchmark + :rtype: AsyncBenchmark + """ + response = await self._client.benchmarks.create(**params) + return AsyncBenchmark(self._client, response.id) + + def from_id(self, benchmark_id: str) -> AsyncBenchmark: + """Get an AsyncBenchmark instance for an existing benchmark ID. + + :param benchmark_id: ID of the benchmark + :type benchmark_id: str + :return: AsyncBenchmark instance for the given ID + :rtype: AsyncBenchmark + """ + return AsyncBenchmark(self._client, benchmark_id) + + async def list(self, **params: Unpack[SDKBenchmarkListParams]) -> list[AsyncBenchmark]: + """List all benchmarks, optionally filtered by parameters. + + :param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkListParams` for available parameters + :return: List of benchmarks + :rtype: list[AsyncBenchmark] + """ + page = await self._client.benchmarks.list(**params) + return [AsyncBenchmark(self._client, item.id) for item in page.benchmarks] + + class AsyncRunloopSDK: """High-level asynchronous entry point for the Runloop SDK. @@ -826,6 +878,8 @@ class AsyncRunloopSDK: :vartype api: AsyncRunloop :ivar agent: High-level async interface for agent management. :vartype agent: AsyncAgentOps + :ivar benchmark: High-level async interface for benchmark management + :vartype benchmark: AsyncBenchmarkOps :ivar devbox: High-level async interface for devbox management :vartype devbox: AsyncDevboxOps :ivar blueprint: High-level async interface for blueprint management @@ -849,6 +903,7 @@ class AsyncRunloopSDK: api: AsyncRunloop agent: AsyncAgentOps + benchmark: AsyncBenchmarkOps devbox: AsyncDevboxOps blueprint: AsyncBlueprintOps scenario: AsyncScenarioOps @@ -895,6 +950,7 @@ def __init__( ) self.agent = AsyncAgentOps(self.api) + self.benchmark = AsyncBenchmarkOps(self.api) self.devbox = AsyncDevboxOps(self.api) self.blueprint = AsyncBlueprintOps(self.api) self.scenario = AsyncScenarioOps(self.api) diff --git a/src/runloop_api_client/sdk/sync.py b/src/runloop_api_client/sdk/sync.py index f831fafd7..d83eb5a6e 100644 --- a/src/runloop_api_client/sdk/sync.py +++ b/src/runloop_api_client/sdk/sync.py @@ -21,7 +21,9 @@ SDKObjectCreateParams, SDKScenarioListParams, SDKScorerCreateParams, + SDKBenchmarkListParams, SDKBlueprintListParams, + SDKBenchmarkCreateParams, SDKBlueprintCreateParams, SDKDiskSnapshotListParams, SDKDevboxCreateFromImageParams, @@ -33,6 +35,7 @@ from ._helpers import detect_content_type from .scenario import Scenario from .snapshot import Snapshot +from .benchmark import Benchmark from .blueprint import Blueprint from .storage_object import StorageObject from .scenario_builder import ScenarioBuilder @@ -840,6 +843,55 @@ def list(self, **params: Unpack[SDKScenarioListParams]) -> list[Scenario]: return [Scenario(self._client, item.id) for item in page] +class BenchmarkOps: + """Manage benchmarks. Access via ``runloop.benchmark``. + + Example: + >>> runloop = RunloopSDK() + >>> benchmarks = runloop.benchmark.list() + >>> benchmark = runloop.benchmark.from_id("bmd_xxx") + >>> run = benchmark.start_run(run_name="evaluation-v1") + """ + + def __init__(self, client: Runloop) -> None: + """Initialize BenchmarkOps. + + :param client: Runloop client instance + :type client: Runloop + """ + self._client = client + + def create(self, **params: Unpack[SDKBenchmarkCreateParams]) -> Benchmark: + """Create a new benchmark. + + :param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkCreateParams` for available parameters + :return: The newly created benchmark + :rtype: Benchmark + """ + response = self._client.benchmarks.create(**params) + return Benchmark(self._client, response.id) + + def from_id(self, benchmark_id: str) -> Benchmark: + """Get a Benchmark instance for an existing benchmark ID. + + :param benchmark_id: ID of the benchmark + :type benchmark_id: str + :return: Benchmark instance for the given ID + :rtype: Benchmark + """ + return Benchmark(self._client, benchmark_id) + + def list(self, **params: Unpack[SDKBenchmarkListParams]) -> list[Benchmark]: + """List all benchmarks, optionally filtered by parameters. + + :param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkListParams` for available parameters + :return: List of benchmarks + :rtype: list[Benchmark] + """ + page = self._client.benchmarks.list(**params) + return [Benchmark(self._client, item.id) for item in page.benchmarks] + + class RunloopSDK: """High-level synchronous entry point for the Runloop SDK. @@ -851,6 +903,8 @@ class RunloopSDK: :vartype api: Runloop :ivar agent: High-level interface for agent management. :vartype agent: AgentOps + :ivar benchmark: High-level interface for benchmark management + :vartype benchmark: BenchmarkOps :ivar devbox: High-level interface for devbox management :vartype devbox: DevboxOps :ivar blueprint: High-level interface for blueprint management @@ -874,6 +928,7 @@ class RunloopSDK: api: Runloop agent: AgentOps + benchmark: BenchmarkOps devbox: DevboxOps blueprint: BlueprintOps scenario: ScenarioOps @@ -920,6 +975,7 @@ def __init__( ) self.agent = AgentOps(self.api) + self.benchmark = BenchmarkOps(self.api) self.devbox = DevboxOps(self.api) self.blueprint = BlueprintOps(self.api) self.scenario = ScenarioOps(self.api) diff --git a/tests/sdk/test_async_ops.py b/tests/sdk/test_async_ops.py index 41da4f737..7e36e938d 100644 --- a/tests/sdk/test_async_ops.py +++ b/tests/sdk/test_async_ops.py @@ -17,6 +17,7 @@ MockScorerView, MockScenarioView, MockSnapshotView, + MockBenchmarkView, MockBlueprintView, create_mock_httpx_response, ) @@ -27,12 +28,14 @@ AsyncAgentOps, AsyncScenario, AsyncSnapshot, + AsyncBenchmark, AsyncBlueprint, AsyncDevboxOps, AsyncScorerOps, AsyncRunloopSDK, AsyncScenarioOps, AsyncSnapshotOps, + AsyncBenchmarkOps, AsyncBlueprintOps, AsyncStorageObject, AsyncStorageObjectOps, @@ -1200,6 +1203,62 @@ async def async_iter(): mock_async_client.scenarios.list.assert_awaited_once() +class TestAsyncBenchmarkOps: + """Tests for AsyncBenchmarkOps class.""" + + @pytest.mark.asyncio + async def test_create(self, mock_async_client: AsyncMock, benchmark_view: MockBenchmarkView) -> None: + """Test create method.""" + mock_async_client.benchmarks.create = AsyncMock(return_value=benchmark_view) + + ops = AsyncBenchmarkOps(mock_async_client) + benchmark = await ops.create(name="test-benchmark", scenario_ids=["scn_001", "scn_002"]) + + assert isinstance(benchmark, AsyncBenchmark) + assert benchmark.id == "bmd_123" + mock_async_client.benchmarks.create.assert_awaited_once_with( + name="test-benchmark", scenario_ids=["scn_001", "scn_002"] + ) + + def test_from_id(self, mock_async_client: AsyncMock) -> None: + """Test from_id method.""" + ops = AsyncBenchmarkOps(mock_async_client) + benchmark = ops.from_id("bmd_123") + + assert isinstance(benchmark, AsyncBenchmark) + assert benchmark.id == "bmd_123" + + @pytest.mark.asyncio + async def test_list_multiple(self, mock_async_client: AsyncMock) -> None: + """Test list method with multiple results.""" + benchmark_view1 = MockBenchmarkView(id="bmd_001", name="benchmark-1") + benchmark_view2 = MockBenchmarkView(id="bmd_002", name="benchmark-2") + page = SimpleNamespace(benchmarks=[benchmark_view1, benchmark_view2]) + mock_async_client.benchmarks.list = AsyncMock(return_value=page) + + ops = AsyncBenchmarkOps(mock_async_client) + benchmarks = await ops.list(limit=10) + + assert len(benchmarks) == 2 + assert isinstance(benchmarks[0], AsyncBenchmark) + assert isinstance(benchmarks[1], AsyncBenchmark) + assert benchmarks[0].id == "bmd_001" + assert benchmarks[1].id == "bmd_002" + mock_async_client.benchmarks.list.assert_awaited_once_with(limit=10) + + @pytest.mark.asyncio + async def test_list_with_name_filter(self, mock_async_client: AsyncMock, benchmark_view: MockBenchmarkView) -> None: + """Test list method with name filter.""" + page = SimpleNamespace(benchmarks=[benchmark_view]) + mock_async_client.benchmarks.list = AsyncMock(return_value=page) + + ops = AsyncBenchmarkOps(mock_async_client) + benchmarks = await ops.list(name="test-benchmark", limit=10) + + assert len(benchmarks) == 1 + mock_async_client.benchmarks.list.assert_awaited_once_with(name="test-benchmark", limit=10) + + class TestAsyncRunloopSDK: """Tests for AsyncRunloopSDK class.""" @@ -1208,6 +1267,7 @@ def test_init(self) -> None: runloop = AsyncRunloopSDK(bearer_token="test-token") assert runloop.api is not None assert isinstance(runloop.agent, AsyncAgentOps) + assert isinstance(runloop.benchmark, AsyncBenchmarkOps) assert isinstance(runloop.devbox, AsyncDevboxOps) assert isinstance(runloop.scorer, AsyncScorerOps) assert isinstance(runloop.snapshot, AsyncSnapshotOps) diff --git a/tests/sdk/test_ops.py b/tests/sdk/test_ops.py index 0a2d9bd56..af54776af 100644 --- a/tests/sdk/test_ops.py +++ b/tests/sdk/test_ops.py @@ -17,6 +17,7 @@ MockScorerView, MockScenarioView, MockSnapshotView, + MockBenchmarkView, MockBlueprintView, create_mock_httpx_response, ) @@ -27,12 +28,14 @@ AgentOps, Scenario, Snapshot, + Benchmark, Blueprint, DevboxOps, ScorerOps, RunloopSDK, ScenarioOps, SnapshotOps, + BenchmarkOps, BlueprintOps, StorageObject, StorageObjectOps, @@ -1085,6 +1088,59 @@ def test_list_multiple(self, mock_client: Mock) -> None: mock_client.scenarios.list.assert_called_once() +class TestBenchmarkOps: + """Tests for BenchmarkOps class.""" + + def test_create(self, mock_client: Mock, benchmark_view: MockBenchmarkView) -> None: + """Test create method.""" + mock_client.benchmarks.create.return_value = benchmark_view + + ops = BenchmarkOps(mock_client) + benchmark = ops.create(name="test-benchmark", scenario_ids=["scn_001", "scn_002"]) + + assert isinstance(benchmark, Benchmark) + assert benchmark.id == "bmd_123" + mock_client.benchmarks.create.assert_called_once_with( + name="test-benchmark", scenario_ids=["scn_001", "scn_002"] + ) + + def test_from_id(self, mock_client: Mock) -> None: + """Test from_id method.""" + ops = BenchmarkOps(mock_client) + benchmark = ops.from_id("bmd_123") + + assert isinstance(benchmark, Benchmark) + assert benchmark.id == "bmd_123" + + def test_list_multiple(self, mock_client: Mock) -> None: + """Test list method with multiple results.""" + benchmark_view1 = MockBenchmarkView(id="bmd_001", name="benchmark-1") + benchmark_view2 = MockBenchmarkView(id="bmd_002", name="benchmark-2") + page = SimpleNamespace(benchmarks=[benchmark_view1, benchmark_view2]) + mock_client.benchmarks.list.return_value = page + + ops = BenchmarkOps(mock_client) + benchmarks = ops.list(limit=10) + + assert len(benchmarks) == 2 + assert isinstance(benchmarks[0], Benchmark) + assert isinstance(benchmarks[1], Benchmark) + assert benchmarks[0].id == "bmd_001" + assert benchmarks[1].id == "bmd_002" + mock_client.benchmarks.list.assert_called_once_with(limit=10) + + def test_list_with_name_filter(self, mock_client: Mock, benchmark_view: MockBenchmarkView) -> None: + """Test list method with name filter.""" + page = SimpleNamespace(benchmarks=[benchmark_view]) + mock_client.benchmarks.list.return_value = page + + ops = BenchmarkOps(mock_client) + benchmarks = ops.list(name="test-benchmark", limit=10) + + assert len(benchmarks) == 1 + mock_client.benchmarks.list.assert_called_once_with(name="test-benchmark", limit=10) + + class TestRunloopSDK: """Tests for RunloopSDK class.""" @@ -1093,6 +1149,7 @@ def test_init(self) -> None: runloop = RunloopSDK(bearer_token="test-token") assert runloop.api is not None assert isinstance(runloop.agent, AgentOps) + assert isinstance(runloop.benchmark, BenchmarkOps) assert isinstance(runloop.devbox, DevboxOps) assert isinstance(runloop.scorer, ScorerOps) assert isinstance(runloop.snapshot, SnapshotOps) diff --git a/tests/smoketests/sdk/test_async_benchmark.py b/tests/smoketests/sdk/test_async_benchmark.py index 640d874f4..7316355a6 100644 --- a/tests/smoketests/sdk/test_async_benchmark.py +++ b/tests/smoketests/sdk/test_async_benchmark.py @@ -52,21 +52,16 @@ async def get_or_create_benchmark( ) -> AsyncBenchmark: """Get an existing benchmark by name or create a new one.""" # Check if benchmark already exists - benchmarks_page = await async_sdk_client.api.benchmarks.list(name=name, limit=1) - for benchmark in benchmarks_page.benchmarks: + benchmarks = await async_sdk_client.benchmark.list(name=name, limit=1) + for benchmark in benchmarks: # Return the first matching benchmark - return AsyncBenchmark(async_sdk_client.api, benchmark.id) + return benchmark # Create a new benchmark - return AsyncBenchmark( - async_sdk_client.api, - ( - await async_sdk_client.api.benchmarks.create( - name=name, - scenario_ids=scenario_ids, - description="Smoketest benchmark for SDK testing", - ) - ).id, + return await async_sdk_client.benchmark.create( + name=name, + scenario_ids=scenario_ids, + description="Smoketest benchmark for SDK testing", ) diff --git a/tests/smoketests/sdk/test_benchmark.py b/tests/smoketests/sdk/test_benchmark.py index 70658fb9d..2dfe5bb6c 100644 --- a/tests/smoketests/sdk/test_benchmark.py +++ b/tests/smoketests/sdk/test_benchmark.py @@ -52,19 +52,16 @@ def get_or_create_benchmark( ) -> Benchmark: """Get an existing benchmark by name or create a new one.""" # Check if benchmark already exists - benchmarks_page = sdk_client.api.benchmarks.list(name=name, limit=1) - for benchmark in benchmarks_page.benchmarks: + benchmarks = sdk_client.benchmark.list(name=name, limit=1) + for benchmark in benchmarks: # Return the first matching benchmark - return Benchmark(sdk_client.api, benchmark.id) + return benchmark # Create a new benchmark - return Benchmark( - sdk_client.api, - sdk_client.api.benchmarks.create( - name=name, - scenario_ids=scenario_ids, - description="Smoketest benchmark for SDK testing", - ).id, + return sdk_client.benchmark.create( + name=name, + scenario_ids=scenario_ids, + description="Smoketest benchmark for SDK testing", )