agentcontrol · amabito · Mar 21, 2026 · Mar 21, 2026 · Mar 21, 2026 · Mar 21, 2026
diff --git a/evaluators/builtin/src/agent_control_evaluators/__init__.py b/evaluators/builtin/src/agent_control_evaluators/__init__.py
@@ -9,6 +9,7 @@
         - list: List-based value matching
         - json: JSON validation
         - sql: SQL query validation
+        - budget: Cumulative LLM token and cost tracking
 
 Naming convention:
     - Built-in: "regex", "list", "json", "sql"
@@ -47,6 +48,13 @@
 from agent_control_evaluators.json import JSONEvaluator, JSONEvaluatorConfig
 from agent_control_evaluators.list import ListEvaluator, ListEvaluatorConfig
 from agent_control_evaluators.regex import RegexEvaluator, RegexEvaluatorConfig
+from agent_control_evaluators.budget import (
+    BudgetEvaluator,
+    BudgetEvaluatorConfig,
+    BudgetSnapshot,
+    BudgetStore,
+    InMemoryBudgetStore,
+)
 from agent_control_evaluators.sql import SQLEvaluator, SQLEvaluatorConfig
 
 __all__ = [
@@ -73,4 +81,9 @@
     "JSONEvaluatorConfig",
     "SQLEvaluator",
     "SQLEvaluatorConfig",
+    "BudgetEvaluator",
+    "BudgetEvaluatorConfig",
+    "BudgetSnapshot",
+    "BudgetStore",
+    "InMemoryBudgetStore",
 ]
diff --git a/evaluators/builtin/src/agent_control_evaluators/budget/__init__.py b/evaluators/builtin/src/agent_control_evaluators/budget/__init__.py
@@ -0,0 +1,17 @@
+"""Budget evaluator for per-agent LLM cost and token tracking."""
+
+from agent_control_evaluators.budget.config import BudgetEvaluatorConfig
+from agent_control_evaluators.budget.evaluator import BudgetEvaluator
+from agent_control_evaluators.budget.store import (
+    BudgetSnapshot,
+    BudgetStore,
+    InMemoryBudgetStore,
+)
+
+__all__ = [
+    "BudgetEvaluator",
+    "BudgetEvaluatorConfig",
+    "BudgetSnapshot",
+    "BudgetStore",
+    "InMemoryBudgetStore",
+]
diff --git a/evaluators/builtin/src/agent_control_evaluators/budget/config.py b/evaluators/builtin/src/agent_control_evaluators/budget/config.py
@@ -0,0 +1,81 @@
+"""Configuration for the budget evaluator."""
+
+from __future__ import annotations
+
+import math
+from typing import Any, Literal
+
+from pydantic import Field, field_validator, model_validator
+
+from agent_control_evaluators._base import EvaluatorConfig
+
+
+class BudgetLimitRule(EvaluatorConfig):
+    """A single budget limit rule.
+
+    Each rule defines a ceiling (USD and/or tokens) for a combination
+    of scope dimensions and time window. Multiple rules can apply to
+    the same step -- the evaluator checks all of them and triggers
+    on the first breach.
+
+    Attributes:
+        scope: Static scope dimensions that must match for this rule
+            to apply. Empty dict = global rule.
+        per: If set, the limit is applied independently for each unique
+            value of this metadata field (e.g. "user_id" creates per-user
+            budgets within the scope).
+        window: Time window for accumulation. None = cumulative (no reset).
+        limit_usd: Maximum USD spend in the window. None = uncapped.
+        limit_tokens: Maximum tokens in the window. None = uncapped.
+    """
+
+    scope: dict[str, str] = Field(default_factory=dict)
+    per: str | None = None
+    window: Literal["daily", "weekly", "monthly"] | None = None
+    limit_usd: float | None = None
+    limit_tokens: int | None = None
+
+    @model_validator(mode="after")
+    def at_least_one_limit(self) -> "BudgetLimitRule":
+        if self.limit_usd is None and self.limit_tokens is None:
+            raise ValueError("At least one of limit_usd or limit_tokens must be set")
+        return self
+
+    @field_validator("limit_usd")
+    @classmethod
+    def validate_limit_usd(cls, v: float | None) -> float | None:
+        if v is not None and (not math.isfinite(v) or v <= 0):
+            raise ValueError("limit_usd must be a finite positive number")
+        return v
+
+    @field_validator("limit_tokens")
+    @classmethod
+    def validate_limit_tokens(cls, v: int | None) -> int | None:
+        if v is not None and v <= 0:
+            raise ValueError("limit_tokens must be positive")
+        return v
+
+
+class BudgetEvaluatorConfig(EvaluatorConfig):
+    """Configuration for the budget evaluator.
+
+    Attributes:
+        limits: List of budget limit rules. Each is checked independently.
+        pricing: Optional model pricing table. Maps model name to per-1K
+            token rates. Used to derive cost_usd from token counts when
+            cost is not provided in step data.
+        token_path: Dot-notation path to extract token usage from step
+            data (e.g. "usage.total_tokens"). If None, looks for standard
+            fields (input_tokens, output_tokens, total_tokens, usage).
+        cost_path: Dot-notation path to extract cost from step data.
+        model_path: Dot-notation path to extract model name (for pricing lookup).
+        metadata_paths: Mapping of metadata field name to dot-notation path
+            in step data. Used to extract scope dimensions (channel, user_id, etc).
+    """
+
+    limits: list[BudgetLimitRule] = Field(min_length=1)
+    pricing: dict[str, dict[str, float]] | None = None
+    token_path: str | None = None
+    cost_path: str | None = None
+    model_path: str | None = None
+    metadata_paths: dict[str, str] = Field(default_factory=dict)