diff --git a/devops/scripts/benchmarks/benches/compute/compute.py b/devops/scripts/benchmarks/benches/compute/compute.py index 90cf0fe81831e..1923d56e6417d 100644 --- a/devops/scripts/benchmarks/benches/compute/compute.py +++ b/devops/scripts/benchmarks/benches/compute/compute.py @@ -558,6 +558,25 @@ def createTorchLinearKernelSizeBench(variant_name: str, **kwargs): ) ) + # Add TorchEventRecordQuery benchmarks + for runtime in filter(lambda x: x != RUNTIMES.UR, RUNTIMES): + if runtime == RUNTIMES.LEVEL_ZERO: + # XXX: verify why L0 benchmarks with counter based events cause issues + continue + for profiler_type in list(PROFILERS): + benches.append( + TorchEventRecordQuery( + self, + runtime, + "medium", + profiler_type, + Profiling=0, + KernelWGCount=256, + KernelWGSize=512, + EventQueryIterations=1000, + ) + ) + # # Note: Graph benchmarks segfault on pvc on L0 # diff --git a/devops/scripts/benchmarks/benches/compute/compute_torch.py b/devops/scripts/benchmarks/benches/compute/compute_torch.py index 614dc3f7858bf..fe9f5be232dc8 100644 --- a/devops/scripts/benchmarks/benches/compute/compute_torch.py +++ b/devops/scripts/benchmarks/benches/compute/compute_torch.py @@ -251,6 +251,25 @@ def __init__( ) +class TorchEventRecordQuery(TorchBenchmark): + def __init__( + self, + suite, + runtime: RUNTIMES, + variant_name: str, + profiler_type: PROFILERS, + **kwargs, + ): + super().__init__( + suite, + runtime, + "KernelSubmitEventRecordQuery", + variant_name, + profiler_type, + **kwargs, + ) + + class TorchGraphVllmMock(TorchBenchmark): def __init__( self, diff --git a/devops/scripts/benchmarks/tests/test_integration.py b/devops/scripts/benchmarks/tests/test_integration.py index 8922508e0a624..10ab5b1ac3b41 100644 --- a/devops/scripts/benchmarks/tests/test_integration.py +++ b/devops/scripts/benchmarks/tests/test_integration.py @@ -231,6 +231,11 @@ def test_torch_l0(self): # "KernelSubmitEventRecordWait medium", # {"pytorch", "L0"}, # ) + # self._checkCase( + # "torch_benchmark_l0 KernelSubmitEventRecordQuery EventQueryIterations 1000, KernelWGCount 256, KernelWGSize 512, Profiling 0 CPU count", + # "KernelSubmitEventRecordQuery medium, CPU count", + # {"pytorch", "L0"}, + # ) self._checkCase( "torch_benchmark_l0 KernelSubmitSingleQueue KernelDataType Int32, KernelWGCount 4096, KernelWGSize 512", "KernelSubmitSingleQueue Int32Large", @@ -292,6 +297,11 @@ def test_torch_sycl(self): "KernelSubmitEventRecordWait medium, CPU count", {"pytorch", "SYCL"}, ) + self._checkCase( + "torch_benchmark_sycl KernelSubmitEventRecordQuery EventQueryIterations 1000, KernelWGCount 256, KernelWGSize 512, Profiling 0", + "KernelSubmitEventRecordQuery medium", + {"pytorch", "SYCL"}, + ) self._checkCase( "torch_benchmark_sycl KernelSubmitSingleQueue KernelDataType Mixed, KernelWGCount 512, KernelWGSize 256", "KernelSubmitSingleQueue MixedMedium",