intel · lukaszstolarczuk · Apr 3, 2026
@@ -558,6 +558,22 @@ def createTorchLinearKernelSizeBench(variant_name: str, **kwargs):
                     )
                 )
 
+        # Add TorchEventRecordQuery benchmarks
+        for runtime in filter(lambda x: x != RUNTIMES.UR, RUNTIMES):
+            for profiler_type in list(PROFILERS):
+                benches.append(
+                    TorchEventRecordQuery(
+                        self,
+                        runtime,
+                        "medium",
+                        profiler_type,
+                        Profiling=0,
+                        KernelWGCount=256,
+                        KernelWGSize=512,
+                        EventQueryIterations=1000,
+                    )
+                )
+
         #
         # Note: Graph benchmarks segfault on pvc on L0
         #

@@ -251,6 +251,25 @@ def __init__(
         )
 
 
+class TorchEventRecordQuery(TorchBenchmark):
+    def __init__(
+        self,
+        suite,
+        runtime: RUNTIMES,
+        variant_name: str,
+        profiler_type: PROFILERS,
+        **kwargs,
+    ):
+        super().__init__(
+            suite,
+            runtime,
+            "KernelSubmitEventRecordQuery",
+            variant_name,
+            profiler_type,
+            **kwargs,
+        )
+
+
 class TorchGraphVllmMock(TorchBenchmark):
     def __init__(
         self,

@@ -231,6 +231,11 @@ def test_torch_l0(self):
         #     "KernelSubmitEventRecordWait medium",
         #     {"pytorch", "L0"},
         # )
+        self._checkCase(
+            "torch_benchmark_l0 KernelSubmitEventRecordQuery EventQueryIterations 1000, KernelWGCount 256, KernelWGSize 512, Profiling 0 CPU count",
+            "KernelSubmitEventRecordQuery medium, CPU count",
+            {"pytorch", "L0"},
+        )
         self._checkCase(
             "torch_benchmark_l0 KernelSubmitSingleQueue KernelDataType Int32, KernelWGCount 4096, KernelWGSize 512",
             "KernelSubmitSingleQueue Int32Large",
@@ -292,6 +297,11 @@ def test_torch_sycl(self):
             "KernelSubmitEventRecordWait medium, CPU count",
             {"pytorch", "SYCL"},
         )
+        self._checkCase(
+            "torch_benchmark_sycl KernelSubmitEventRecordQuery EventQueryIterations 1000, KernelWGCount 256, KernelWGSize 512, Profiling 0",
+            "KernelSubmitEventRecordQuery medium",
+            {"pytorch", "SYCL"},
+        )
         self._checkCase(
             "torch_benchmark_sycl KernelSubmitSingleQueue KernelDataType Mixed, KernelWGCount 512, KernelWGSize 256",
             "KernelSubmitSingleQueue MixedMedium",