Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
134 changes: 115 additions & 19 deletions BackendBench/backends/kernel_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,11 @@
import os
from typing import Callable, Dict

from BackendBench.utils import compile_kernel_from_string, op_name_to_folder_name
from BackendBench.utils import (
compile_kernel_from_string,
extract_operator_name,
op_name_to_folder_name,
)

from .base import Backend

Expand Down Expand Up @@ -154,7 +158,7 @@ def _adapt_kernel_function_name(self, kernel_code: str, op_name: str) -> str:
Returns:
Modified kernel code with correct function name
"""
folder_name = os.path.basename(os.path.dirname(kernel_code))
folder_name = op_name_to_folder_name(op_name)
expected_name = f"{folder_name}_kernel_impl"

# Replace 'def kernel_function' with 'def {op_name}_kernel_impl'
Expand All @@ -179,38 +183,40 @@ def {expected_name}(*args, **kwargs):
'''
return kernel_code + wrapper_code

def _generate_kernel_file_path(self, folder_name: str, attempt: int) -> str:
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so we had some existing setup directory utils IIRC, wouldn't this just duplicate stuff?

Copy link
Copy Markdown
Contributor Author

@jiannanWang jiannanWang Jan 15, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We do have a similar function in the llm backend and this function is a duplicate for that.

This kernel_file_path is only used in the llm and kernel_agent backends because those backends requires generating kernels.

Since kernel_file_path is only used in the llm and kernel_agent backends, I could create a new class, GenerativeBackend, which inherits from Backend. I can move the shared functions (e.g., _generate_kernel_file_path) from both llm and kernel_agent into GenerativeBackend. Then, both the LLM and kernel_agent classes can inherit from GenerativeBackend instead of directly from Backend, which will help reduce duplication.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

would it not be possible to have the same function?

"""Generate the file path for a kernel, creating the operator subdirectory if needed."""
op_dir = os.path.join(self.kernels_dir, folder_name)
os.makedirs(op_dir, exist_ok=True)
return os.path.join(op_dir, f"{folder_name}_implementation_v{attempt}.py")

def compile_kernel_from_string(
self, kernel_code: str, op_name: str, attempt: int = 1
) -> Callable:
"""Compile a kernel from string code and return a callable."""
folder_name = op_name_to_folder_name(op_name)
adapted_code = self._adapt_kernel_function_name(kernel_code, op_name)
kernel_file_path = os.path.join(self.kernels_dir, f"{folder_name}_kernel.py")
expected_fn_name = f"{folder_name}_kernel_impl"
module_name = f"kernel_agent_{folder_name}"

kernel_file_path = self._generate_kernel_file_path(folder_name, attempt)
module_name = f"{folder_name}_implementation_v{attempt}"
kernel = None
try:
kernel = compile_kernel_from_string(
kernel_code=adapted_code,
op_name=op_name,
kernel_file_path=kernel_file_path,
expected_fn_name=expected_fn_name,
expected_fn_name=folder_name,
module_name=module_name,
)
except Exception as e:
raise e
return kernel

def add_kernel(self, op, kernel_code: str, op_name: str):
def add_kernel(self, op, kernel_code: str, op_name: str, attempt: int = 1):
"""Add a kernel implementation for a specific operator."""
compiled_kernel = self.compile_kernel_from_string(kernel_code, op_name, attempt=1)
self.compiled_kernels[op] = compiled_kernel

# Save the original KernelAgent code as well
folder_name = op_name_to_folder_name(op_name)
original_file = os.path.join(self.kernels_dir, f"{folder_name}_original_kernel_agent.py")
with open(original_file, "w") as f:
f.write(kernel_code)
try:
compiled_kernel = self.compile_kernel_from_string(kernel_code, op_name, attempt=attempt)
self.compiled_kernels[op] = compiled_kernel
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this function means something totally different now, we went from something like write_kernel_to_file to bind the compiled op? so maybe the function just needs a better name

except Exception as e:
print(f"❌ Failed to compile kernel for {op_name}: {e}")

def generate_kernel_with_agent(self, op, op_name: str) -> tuple[str, bool]:
"""
Expand Down Expand Up @@ -260,14 +266,104 @@ def generate_kernel_with_agent(self, op, op_name: str) -> tuple[str, bool]:
except Exception as e:
print(f" Warning: Could not preserve session: {e}")

return result["kernel_code"], True
return result["kernel_code"], result["rounds"], True
else:
print(f"❌ KernelAgent failed for {op_name}: {result['message']}")
return "", False
return "", result["rounds"], False

except Exception as e:
print(f"❌ KernelAgent error for {op_name}: {e}")
return "", False
return "", 0, False

def generate_kernels(self, suite, daemon=True):
"""Generate kernels for all operators in the suite with comprehensive feedback."""
self.daemon = daemon
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what is this doing? I'd have expected a daemon to be a service and not a boolean. Also generally boolean args in a function typically hint that you might need 2 seperate functions

successful_ops = 0
total_ops = 0

for op_test in suite:
total_ops += 1
op = op_test.op
op_str = str(op)
op_name = extract_operator_name(op_str)
folder_name = op_name_to_folder_name(op_name)

logger.info(f"Generating kernel for {op_name} (full op: {op_str})")

# Generate kernel with feedback-driven retry
kernel_code, best_kernel_attempt, success = self.generate_kernel_with_agent(
op=op, op_name=op_name
)

# Add kernel to backend and track success
self.add_kernel(op, kernel_code, op_name)
if success:
successful_ops += 1
logger.info(f"✓ Success! Best attempt: {best_kernel_attempt}")
else:
logger.info(f"✗ Failed after {best_kernel_attempt} rounds")

# Write operation summary in the operator subdirectory
self._write_summary(
folder_name=folder_name,
op_name=op_name,
op_str=op_str,
best_kernel_attempt=best_kernel_attempt,
success=success,
)

# Generate and save overall summary
self._write_overall_summary(successful_ops, total_ops)

def _write_summary(
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I feel like a few stuff is getting mixed here

  1. What the eval is expected to output
  2. What kernelagent is expected to output
  3. What the specific integration of kernelagent and the eval is supposed to follow

For instance Sahan also had some extensive reporting code in the past, so I'd like to hear some ideas for how to clean all this up

self,
folder_name: str,
op_name: str,
op_str: str,
best_kernel_attempt: int,
success: bool,
):
"""Write operation summary to the operator subdirectory."""
op_dir = os.path.join(self.kernels_dir, folder_name)
os.makedirs(op_dir, exist_ok=True)
summary_file = os.path.join(op_dir, f"{folder_name}_summary.txt")
with open(summary_file, "w") as f:
f.write(f"Operation: {op_name}\n")
f.write(f"Full op: {op_str}\n")
f.write("Backend: KernelAgent\n")
f.write(f"Workers: {self.num_workers}\n")
f.write(f"Max rounds: {self.max_rounds}\n")
f.write(f"Best kernel attempt: {best_kernel_attempt}\n")
f.write(f"Final Status: {'✓ Success' if success else '✗ Failure'}\n")
f.write(f"Final kernel file: {folder_name}_implementation_v{best_kernel_attempt}.py\n")

def _write_overall_summary(self, successful_ops: int, total_ops: int):
"""Write overall summary of kernel generation results."""
failed_ops = total_ops - successful_ops
success_rate = f"{successful_ops / total_ops * 100:.1f}%" if total_ops > 0 else "0.0%"

summary_lines = [
"=" * 60,
"KERNEL AGENT BACKEND SETUP SUMMARY",
"=" * 60,
f"Total operations attempted: {total_ops}",
f"Successfully created correct kernels for: {successful_ops} ops",
f"Failed to create correct kernels for: {failed_ops} ops",
f"Success rate: {success_rate}",
f"Workers: {self.num_workers}",
f"Max rounds: {self.max_rounds}",
f"Generated kernels saved to: {self.kernels_dir}",
"Backend: KernelAgent",
"=" * 60,
]

# Log summary
for line in summary_lines:
logger.info(line)

# Save to file
with open(os.path.join(self.kernels_dir, "OVERALL_SUMMARY.txt"), "w") as f:
f.write("\n".join(summary_lines))

def __getitem__(self, key):
if key in self.compiled_kernels:
Expand Down
7 changes: 5 additions & 2 deletions BackendBench/data_loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
from datasets import load_dataset
from tqdm import tqdm

from BackendBench.utils import extract_operator_name

# constants for downloading the test set from huggingface
# you can explore the dataset here
# https://huggingface.co/datasets/GPUMODE/backendbench_tests
Expand Down Expand Up @@ -224,9 +226,10 @@ def _load_from_parquet(
table = pq.read_table(source)

df = table.to_pandas()
# Apply filter if provided
# Apply filter if provided - use exact matching on extracted operator names
# e.g., "relu.default" should match "aten.relu.default" but NOT "aten.leaky_relu.default"
if filter:
mask = df["op_name"].apply(lambda op: any(f in op for f in filter))
mask = df["op_name"].apply(lambda op: extract_operator_name(op) in filter)
df = df[mask]

return df.to_dict("records")
Expand Down
4 changes: 3 additions & 1 deletion BackendBench/scripts/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,8 @@ def cli(
elif backend == "kernel_agent":
if backends.KernelAgentBackend is None:
raise NotImplementedError("KernelAgent backend is for internal use only")
backend = backends.KernelAgentBackend()
backend.generate_kernels(suite, daemon=daemon)
elif backend == "directory":
if dsl == "cuda":
backend = backends.DirectoryBackend(ops_directory, load_cpp_source=load_cpp_source)
Expand All @@ -233,7 +235,6 @@ def cli(
backend = {
"aten": backends.AtenBackend,
"flag_gems": backends.FlagGemsBackend,
"kernel_agent": backends.KernelAgentBackend,
"directory": backends.DirectoryBackend,
}[backend]()

Expand All @@ -253,6 +254,7 @@ def cli(

if num_workers is None:
for test in suite:
print(test.op)
if test.op not in backend:
continue

Expand Down