diff --git a/packages/testing/src/execution_testing/cli/pytest_commands/plugins/filler/tests/test_verify_traces.py b/packages/testing/src/execution_testing/cli/pytest_commands/plugins/filler/tests/test_verify_traces.py new file mode 100644 index 00000000000..5682cddb71c --- /dev/null +++ b/packages/testing/src/execution_testing/cli/pytest_commands/plugins/filler/tests/test_verify_traces.py @@ -0,0 +1,95 @@ +"""Tests for baseline trace loading from dump directories.""" + +import json +from pathlib import Path + +from execution_testing.cli.pytest_commands.plugins.filler.verify_traces import ( # noqa: E501 + _load_traces_from_dump_dir, +) +from execution_testing.client_clis.cli_types import ( + Traces, +) + + +def _write_trace_file( + path: Path, + trace_lines: list[dict] | None = None, + output: str = "0x", + gas_used: str = "0x5208", +) -> None: + """Write a minimal .jsonl trace file.""" + if trace_lines is None: + trace_lines = [ + { + "pc": 0, + "op": 96, + "gas": "0x5f5e100", + "gasCost": "0x3", + "memSize": 0, + "stack": [], + "depth": 1, + "refund": 0, + "opName": "PUSH1", + } + ] + path.parent.mkdir(parents=True, exist_ok=True) + with path.open("w") as f: + for line in trace_lines: + f.write(json.dumps(line) + "\n") + f.write(json.dumps({"output": output, "gasUsed": gas_used}) + "\n") + + +class TestLoadTracesFromDumpDir: + """Test _load_traces_from_dump_dir.""" + + def test_empty_directory(self, tmp_path: Path) -> None: + """Empty directory returns empty list.""" + result = _load_traces_from_dump_dir(tmp_path) + assert result == [] + + def test_single_call_dir_two_trace_files(self, tmp_path: Path) -> None: + """Single call dir with two trace files returns one Traces.""" + call_dir = tmp_path / "0" + call_dir.mkdir() + _write_trace_file(call_dir / "trace-0-0xaaa.jsonl") + _write_trace_file(call_dir / "trace-1-0xbbb.jsonl") + + result = _load_traces_from_dump_dir(tmp_path) + assert len(result) == 1 + assert isinstance(result[0], Traces) + assert len(result[0].root) == 2 + + def test_multiple_call_dirs(self, tmp_path: Path) -> None: + """Multiple call dirs (0, 1, 2) return correctly ordered list.""" + for i in range(3): + call_dir = tmp_path / str(i) + call_dir.mkdir() + _write_trace_file(call_dir / f"trace-0-0x{i:03x}.jsonl") + + result = _load_traces_from_dump_dir(tmp_path) + assert len(result) == 3 + for traces in result: + assert isinstance(traces, Traces) + assert len(traces.root) == 1 + + def test_non_numeric_subdirs_ignored(self, tmp_path: Path) -> None: + """Non-numeric subdirectories are ignored.""" + (tmp_path / "0").mkdir() + _write_trace_file(tmp_path / "0" / "trace-0-0xaaa.jsonl") + (tmp_path / "metadata").mkdir() + (tmp_path / "metadata" / "info.json").write_text("{}") + + result = _load_traces_from_dump_dir(tmp_path) + assert len(result) == 1 + + def test_numeric_sorting_not_lexical(self, tmp_path: Path) -> None: + """Call dirs are sorted numerically (2 before 10).""" + for i in [10, 2, 0]: + call_dir = tmp_path / str(i) + call_dir.mkdir() + _write_trace_file(call_dir / f"trace-0-0x{i:03x}.jsonl") + + result = _load_traces_from_dump_dir(tmp_path) + assert len(result) == 3 + # Verify they are in order 0, 2, 10 by checking the list + # length — ordering is guaranteed by the implementation diff --git a/packages/testing/src/execution_testing/cli/pytest_commands/plugins/filler/verify_traces.py b/packages/testing/src/execution_testing/cli/pytest_commands/plugins/filler/verify_traces.py new file mode 100644 index 00000000000..56847ed0765 --- /dev/null +++ b/packages/testing/src/execution_testing/cli/pytest_commands/plugins/filler/verify_traces.py @@ -0,0 +1,267 @@ +"""Pytest plugin for trace verification against a baseline.""" + +from __future__ import annotations + +from pathlib import Path +from typing import Any, Generator + +import pytest +from _pytest.terminal import TerminalReporter + +from execution_testing.cli.pytest_commands.plugins.filler.filler import ( + node_to_test_info, +) +from execution_testing.client_clis.cli_types import ( + Traces, + TransactionTraces, +) +from execution_testing.client_clis.trace_comparators import ( + TraceComparator, + TraceComparatorType, + TraceComparisonResult, + create_comparator, +) +from execution_testing.client_clis.trace_report_formatter import ( + JsonTracesDiffReportFormatter, + TextTracesDiffReportFormatter, + TracesDiffReportFormatter, +) + +# --------------------------------------------------------------------------- +# Baseline loading +# --------------------------------------------------------------------------- + + +def _load_traces_from_dump_dir(dump_dir: Path) -> list[Traces]: + """Load traces from numbered call subdirectories.""" + traces_list: list[Traces] = [] + call_dirs = sorted( + (d for d in dump_dir.iterdir() if d.is_dir() and d.name.isdigit()), + key=lambda d: int(d.name), + ) + for call_dir in call_dirs: + traces = Traces(root=[]) + trace_files = sorted(call_dir.glob("trace-*.jsonl")) + for trace_file in trace_files: + traces.append(TransactionTraces.from_file(trace_file)) + traces_list.append(traces) + return traces_list + + +# --------------------------------------------------------------------------- +# CLI flags +# --------------------------------------------------------------------------- + + +def pytest_addoption(parser: pytest.Parser) -> None: + """Register --verify-traces and --verify-traces-comparator.""" + group = parser.getgroup("verify_traces", "Trace verification options") + group.addoption( + "--verify-traces", + action="store", + dest="verify_traces_dir", + type=Path, + default=None, + help=( + "Baseline trace dump directory. " + "Compares current traces against baseline. " + "Implies --traces." + ), + ) + all_comparators = ",".join(c.value for c in TraceComparatorType) + group.addoption( + "--verify-traces-comparator", + action="store", + dest="verify_traces_comparator", + type=str, + default=all_comparators, + help=( + "Comma-separated comparator names. " + f"Choices: {all_comparators}. " + f"Default: {all_comparators}." + ), + ) + group.addoption( + "--verify-traces-json", + action="store", + dest="verify_traces_json", + type=Path, + default=None, + help="Write the trace verification report to a JSON file.", + ) + + +# --------------------------------------------------------------------------- +# Plugin registration +# --------------------------------------------------------------------------- + + +@pytest.hookimpl(tryfirst=True) +def pytest_configure(config: pytest.Config) -> None: + """Register the TraceVerifier plugin if --verify-traces is set.""" + verify_traces_dir = config.getoption("verify_traces_dir", None) + if verify_traces_dir is None: + return + + config.collect_traces = True # type: ignore[attr-defined] + config.option.evm_collect_traces = True + + comparator_names = config.getoption("verify_traces_comparator").split(",") + comparators = [ + create_comparator(TraceComparatorType(name.strip())) + for name in comparator_names + ] + + formatter = TextTracesDiffReportFormatter() + + json_path = config.getoption("verify_traces_json", None) + json_formatter = ( + JsonTracesDiffReportFormatter(Path(json_path)) + if json_path is not None + else None + ) + + filler_path = Path(config.getoption("filler_path")) + + config.pluginmanager.register( + TraceVerifier( + config=config, + comparators=comparators, + formatter=formatter, + json_formatter=json_formatter, + baseline_dir=Path(verify_traces_dir), + filler_path=filler_path, + ), + "trace-verifier", + ) + + +# --------------------------------------------------------------------------- +# Plugin class +# --------------------------------------------------------------------------- + + +class TraceVerifier: + """Pytest plugin for trace verification against a baseline.""" + + def __init__( + self, + config: pytest.Config, + comparators: list[TraceComparator], + formatter: TracesDiffReportFormatter, + baseline_dir: Path, + filler_path: Path, + json_formatter: JsonTracesDiffReportFormatter | None = None, + ) -> None: + """Initialize with comparators, formatter, and baseline path.""" + self.config = config + self.comparators = comparators + self.formatter = formatter + self.json_formatter = json_formatter + self.baseline_dir = baseline_dir + self.filler_path = filler_path + self.test_results: dict[str, dict[str, TraceComparisonResult]] = {} + + @pytest.hookimpl(hookwrapper=True) + def pytest_runtest_makereport( + self, item: pytest.Item, call: pytest.CallInfo[None] + ) -> Generator[None, Any, None]: + """Collect trace diffs after each test's call phase.""" + outcome = yield + report = outcome.get_result() + + if call.when != "call" or report.failed: + return + + t8n = getattr(item.config, "t8n", None) + if t8n is None: + return + + current_traces_list = t8n.get_traces() + if current_traces_list is None: + return + + test_info = node_to_test_info(item) + baseline_dump_dir = test_info.get_dump_dir_path( + self.baseline_dir, + self.filler_path, + level="test_parameter", + ) + if baseline_dump_dir is None or not baseline_dump_dir.exists(): + return + + baseline_traces_list = _load_traces_from_dump_dir(baseline_dump_dir) + + if not current_traces_list: + return # No traces collected (e.g. t8n cache hit) + + # Compare each pair of Traces objects (one per t8n call). + # Run "exact" last and skip it if any other comparator failed, + # since exact is strictly stricter than the others. + exact_comparator = None + other_comparators = [] + for c in self.comparators: + if c.name == TraceComparatorType.EXACT: + exact_comparator = c + else: + other_comparators.append(c) + + results: dict[str, TraceComparisonResult] = {} + any_failed = False + for comparator in other_comparators: + all_diffs = [] + all_equivalent = True + for baseline, current in zip( + baseline_traces_list, current_traces_list, strict=False + ): + result = comparator.compare_traces(baseline, current) + all_diffs.extend(result.differences) + if not result.equivalent: + all_equivalent = False + results[comparator.name] = TraceComparisonResult( + equivalent=all_equivalent, + differences=all_diffs, + ) + if not all_equivalent: + any_failed = True + + if exact_comparator is not None and not any_failed: + all_diffs = [] + all_equivalent = True + for baseline, current in zip( + baseline_traces_list, + current_traces_list, + strict=False, + ): + result = exact_comparator.compare_traces(baseline, current) + all_diffs.extend(result.differences) + if not result.equivalent: + all_equivalent = False + results[exact_comparator.name] = TraceComparisonResult( + equivalent=all_equivalent, + differences=all_diffs, + ) + + if results: + self.test_results[item.nodeid] = results + + def pytest_terminal_summary( + self, + terminalreporter: TerminalReporter, + exitstatus: int, # noqa: ARG002 + config: pytest.Config, # noqa: ARG002 + ) -> None: + """Print the aggregated trace verification report.""" + if not self.test_results: + return + + output = self.formatter.format_summary(self.test_results) + terminalreporter.write_sep("=", "trace verification report") + for line in output.splitlines(): + terminalreporter.write_line(line) + + if self.json_formatter is not None: + self.json_formatter.write(self.test_results) + terminalreporter.write_line( + f"JSON report written to: {self.json_formatter.output_path}" + ) diff --git a/packages/testing/src/execution_testing/cli/pytest_commands/pytest_ini_files/pytest-fill.ini b/packages/testing/src/execution_testing/cli/pytest_commands/pytest_ini_files/pytest-fill.ini index e72c1ffea0d..179ac2082f0 100644 --- a/packages/testing/src/execution_testing/cli/pytest_commands/pytest_ini_files/pytest-fill.ini +++ b/packages/testing/src/execution_testing/cli/pytest_commands/pytest_ini_files/pytest-fill.ini @@ -7,6 +7,7 @@ testpaths = tests/ addopts = -p execution_testing.cli.pytest_commands.plugins.shared.execute_fill -p execution_testing.cli.pytest_commands.plugins.filler.filler + -p execution_testing.cli.pytest_commands.plugins.filler.verify_traces -p execution_testing.cli.pytest_commands.plugins.forks.forks -p execution_testing.cli.pytest_commands.plugins.concurrency -p execution_testing.cli.pytest_commands.plugins.filler.pre_alloc diff --git a/packages/testing/src/execution_testing/client_clis/__init__.py b/packages/testing/src/execution_testing/client_clis/__init__.py index e44839fe578..9f44666cf07 100644 --- a/packages/testing/src/execution_testing/client_clis/__init__.py +++ b/packages/testing/src/execution_testing/client_clis/__init__.py @@ -7,6 +7,7 @@ BlockExceptionWithMessage, LazyAlloc, Result, + TraceFieldDiff, Traces, TransactionExceptionWithMessage, TransitionToolOutput, @@ -25,6 +26,16 @@ from .clis.nimbus import NimbusTransitionTool from .ethereum_cli import CLINotFoundInPathError, UnknownCLIError from .fixture_consumer_tool import FixtureConsumerTool +from .trace_comparators import ( + FieldExclusionTraceComparator, + GasExhaustionTraceComparator, + TraceComparator, + TraceComparatorType, + TraceComparisonResult, + TraceDifference, + TransactionCountMismatch, + create_comparator, +) from .transition_tool import TransitionTool TransitionTool.set_default_tool(ExecutionSpecsTransitionTool) @@ -41,7 +52,9 @@ "EvmOneStateFixtureConsumer", "EvmOneBlockchainFixtureConsumer", "ExecutionSpecsTransitionTool", + "FieldExclusionTraceComparator", "FixtureConsumerTool", + "GasExhaustionTraceComparator", "GethFixtureConsumer", "GethTransitionTool", "LazyAlloc", @@ -49,9 +62,16 @@ "NethtestFixtureConsumer", "NimbusTransitionTool", "Result", + "TraceComparator", + "TraceComparatorType", + "TraceComparisonResult", + "TraceDifference", + "TraceFieldDiff", "Traces", + "TransactionCountMismatch", "TransactionExceptionWithMessage", "TransitionTool", "TransitionToolOutput", "UnknownCLIError", + "create_comparator", ) diff --git a/packages/testing/src/execution_testing/client_clis/cli_types.py b/packages/testing/src/execution_testing/client_clis/cli_types.py index 07dff0d72ae..21d89279870 100644 --- a/packages/testing/src/execution_testing/client_clis/cli_types.py +++ b/packages/testing/src/execution_testing/client_clis/cli_types.py @@ -3,7 +3,16 @@ import json from dataclasses import dataclass from pathlib import Path -from typing import Annotated, Any, Dict, Generic, List, Self, TypeVar +from typing import ( + Annotated, + Any, + Dict, + Generic, + List, + NamedTuple, + Self, + TypeVar, +) from pydantic import Field, PlainSerializer, PlainValidator @@ -81,20 +90,60 @@ class TraceLine(CamelModel): error: str | None = None return_data: str | None = None - def are_equivalent(self, other: Self) -> bool: - """Return True if the only difference is the gas counter.""" - self_dict = self.model_dump(mode="python", exclude={"gas", "gas_cost"}) - other_dict = other.model_dump( - mode="python", exclude={"gas", "gas_cost"} - ) - if self_dict != other_dict: + _DEFAULT_EXCLUDE: set[str] = {"gas", "gas_cost"} + + def compare( + self, + other: Self, + exclude_fields: set[str] | None = None, + ) -> tuple[dict[str, str], dict[str, str]]: + """ + Compare two trace lines field-by-field. + + Return (baseline_fields, current_fields) dicts containing only + the fields that differ. Both dicts are empty when lines match. + """ + if exclude_fields is None: + exclude_fields = self._DEFAULT_EXCLUDE + self_dict = self.model_dump(mode="json", exclude=exclude_fields) + other_dict = other.model_dump(mode="json", exclude=exclude_fields) + baseline_diff: dict[str, str] = {} + current_diff: dict[str, str] = {} + for k in self_dict: + if self_dict[k] != other_dict[k]: + baseline_diff[k] = str(self_dict[k]) + current_diff[k] = str(other_dict[k]) + return baseline_diff, current_diff + + def are_equivalent( + self, + other: Self, + exclude_fields: set[str] | None = None, + ) -> bool: + """Return True if the only difference is in excluded fields.""" + baseline_diff, _ = self.compare(other, exclude_fields) + if baseline_diff: logger.debug( - f"Trace lines are not equivalent: {self_dict} != {other_dict}." + f"Trace lines are not equivalent: " + f"differing fields: {list(baseline_diff.keys())}." ) return False return True +class TraceFieldDiff(NamedTuple): + """ + A single diff entry from TransactionTraces.compare(). + + line_index is None for structural diffs (trace_length, output, + gas_used). Field dicts map field name to string value. + """ + + line_index: int | None + baseline_fields: dict[str, str] + current_fields: dict[str, str] + + class TransactionTraces(CamelModel): """Traces of a single transaction.""" @@ -132,41 +181,90 @@ def remove_gas(traces: List[TraceLine]) -> None: # Remove the result of calling `Op.GAS` from the stack. trace.stack[-1] = None - def are_equivalent( - self, other: Self, enable_post_processing: bool - ) -> bool: - """Return True if the only difference is the gas counter.""" + def compare( + self, + other: Self, + exclude_fields: set[str] | None = None, + enable_post_processing: bool = False, + ) -> List[TraceFieldDiff]: + """ + Compare traces and return per-line differing fields. + + Return a list of TraceFieldDiff entries. line_index is None for + structural diffs (trace_length, output, gas_used). Field dicts + map field name to string value. + + When exclude_fields is None, no fields are excluded. Pass an + explicit set to skip fields (e.g. {"gas", "gas_cost"}). + """ + line_exclude = exclude_fields or set() + diffs: List[TraceFieldDiff] = [] + if len(self.traces) != len(other.traces): - logger.debug( - f"Traces have different lengths: " - f"{len(self.traces)} != {len(other.traces)}." + diffs.append( + TraceFieldDiff( + None, + {"trace_length": str(len(self.traces))}, + {"trace_length": str(len(other.traces))}, + ) ) - return False + return diffs + if self.output != other.output: - logger.debug( - f"Traces have different outputs: " - f"{self.output} != {other.output}." + diffs.append( + TraceFieldDiff( + None, + {"output": str(self.output)}, + {"output": str(other.output)}, + ) ) - return False - if self.gas_used != other.gas_used and not enable_post_processing: - logger.debug( - f"Traces have different gas used: " - f"{self.gas_used} != {other.gas_used}." + + if not enable_post_processing and self.gas_used != other.gas_used: + diffs.append( + TraceFieldDiff( + None, + {"gas_used": str(self.gas_used)}, + {"gas_used": str(other.gas_used)}, + ) ) - return False + own_traces = self.traces.copy() other_traces = other.traces.copy() if enable_post_processing: - logger.debug( - "Removing gas from traces (enable_post_processing=True)." - ) TransactionTraces.remove_gas(own_traces) TransactionTraces.remove_gas(other_traces) - for i in range(len(self.traces)): - if not own_traces[i].are_equivalent(other_traces[i]): - logger.debug(f"Trace line {i} is not equivalent.") - return False - return True + + for i, (b_line, c_line) in enumerate( + zip(own_traces, other_traces, strict=False) + ): + baseline_diff, current_diff = b_line.compare(c_line, line_exclude) + if baseline_diff: + diffs.append(TraceFieldDiff(i, baseline_diff, current_diff)) + + return diffs + + def are_equivalent( + self, other: Self, enable_post_processing: bool + ) -> bool: + """Return True if the only difference is the gas counter.""" + diffs = self.compare( + other, + exclude_fields={"gas", "gas_cost"}, + enable_post_processing=enable_post_processing, + ) + for diff in diffs: + if diff.line_index is None: + for field_name in diff.baseline_fields: + logger.debug( + f"Traces have different {field_name}: " + f"{diff.baseline_fields[field_name]} != " + f"{diff.current_fields[field_name]}." + ) + else: + logger.debug( + f"Trace line {diff.line_index} is not equivalent." + ) + return len(diffs) == 0 def print(self) -> None: """Print the traces in a readable format.""" diff --git a/packages/testing/src/execution_testing/client_clis/tests/test_trace_comparators.py b/packages/testing/src/execution_testing/client_clis/tests/test_trace_comparators.py new file mode 100644 index 00000000000..14f0c643b38 --- /dev/null +++ b/packages/testing/src/execution_testing/client_clis/tests/test_trace_comparators.py @@ -0,0 +1,1199 @@ +"""Tests for trace comparator types and ABC.""" + +import pytest + +from execution_testing.base_types import HexNumber +from execution_testing.client_clis.cli_types import ( + TraceLine, + Traces, + TransactionTraces, +) +from execution_testing.client_clis.trace_comparators import ( + FieldExclusionTraceComparator, + GasExhaustionTraceComparator, + TraceComparator, + TraceComparatorType, + TraceComparisonResult, + TraceDifference, + TransactionCountMismatch, + create_comparator, +) + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture() +def trace_line() -> TraceLine: + """Return a default TraceLine.""" + return _make_trace_line() + + +@pytest.fixture() +def transaction_traces(trace_line: TraceLine) -> TransactionTraces: + """Return a default TransactionTraces with one trace line.""" + return _make_transaction_traces([trace_line]) + + +@pytest.fixture() +def traces(transaction_traces: TransactionTraces) -> Traces: + """Return a default Traces with one transaction.""" + return _make_traces([transaction_traces]) + + +@pytest.fixture() +def spy_comparator() -> "_SpyComparator": + """Return a spy comparator that records calls.""" + return _SpyComparator() + + +@pytest.fixture() +def failing_comparator() -> "_FailingComparator": + """Return a comparator that always reports a difference.""" + return _FailingComparator() + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _make_trace_line(**overrides: object) -> TraceLine: + """Create a TraceLine with sensible defaults.""" + defaults = { + "pc": 0, + "op": 0x60, + "gas": 0x5F5E100, + "gas_cost": 0x3, + "mem_size": 0, + "stack": [], + "depth": 1, + "refund": 0, + "op_name": "PUSH1", + } + defaults.update(overrides) + return TraceLine.model_validate(defaults) + + +def _make_transaction_traces( + trace_lines: list[TraceLine] | None = None, + output: str | None = "0x", +) -> TransactionTraces: + """Create a TransactionTraces with default trace lines.""" + if trace_lines is None: + trace_lines = [_make_trace_line()] + return TransactionTraces.model_validate( + {"traces": trace_lines, "output": output} + ) + + +def _make_traces( + transactions: list[TransactionTraces] | None = None, +) -> Traces: + """Create a Traces object.""" + if transactions is None: + transactions = [_make_transaction_traces()] + return Traces(root=transactions) + + +class _SpyComparator(TraceComparator): + """Concrete subclass that records calls for testing the ABC.""" + + @property + def name(self) -> str: + """Return the comparator's name.""" + return "spy" + + def __init__(self) -> None: + self.calls: list[tuple[int]] = [] + + def compare_transaction_traces( + self, + baseline: TransactionTraces, # noqa: ARG002 + current: TransactionTraces, # noqa: ARG002 + transaction_index: int, + ) -> TraceComparisonResult: + self.calls.append((transaction_index,)) + return TraceComparisonResult(equivalent=True, differences=[]) + + +class _FailingComparator(TraceComparator): + """Concrete subclass that always reports a difference.""" + + @property + def name(self) -> str: + """Return the comparator's name.""" + return "failing" + + def compare_transaction_traces( + self, + baseline: TransactionTraces, # noqa: ARG002 + current: TransactionTraces, # noqa: ARG002 + transaction_index: int, + ) -> TraceComparisonResult: + return TraceComparisonResult( + equivalent=False, + differences=[ + TraceDifference( + transaction_index=transaction_index, + trace_line_index=0, + baseline="PUSH1 (pc=0)", + current="PUSH1 (pc=1)", + ) + ], + ) + + +# --------------------------------------------------------------------------- +# Phase 1: Types and ABC +# --------------------------------------------------------------------------- + + +class TestTraceDifference: + """Test TraceDifference dataclass.""" + + def test_construction_and_field_access(self) -> None: + """Verify all fields are accessible after construction.""" + diff = TraceDifference( + transaction_index=2, + trace_line_index=5, + baseline="PUSH1 (pc=0xa)", + current="PUSH1 (pc=0x14)", + ) + assert diff.transaction_index == 2 + assert diff.trace_line_index == 5 + assert "PUSH1" in diff.baseline + assert "PUSH1" in diff.current + + +class TestTransactionCountMismatch: + """Test TransactionCountMismatch subclass.""" + + def test_is_trace_difference_subclass(self) -> None: + """Verify it is a TraceDifference subclass.""" + mismatch = TransactionCountMismatch(baseline_count=3, current_count=2) + assert isinstance(mismatch, TraceDifference) + + def test_stores_counts(self) -> None: + """Verify baseline and current counts are stored.""" + mismatch = TransactionCountMismatch(baseline_count=3, current_count=2) + assert mismatch.baseline_count == 3 + assert mismatch.current_count == 2 + + +class TestTraceComparisonResult: + """Test TraceComparisonResult dataclass.""" + + def test_equivalent_when_no_differences(self) -> None: + """Result is equivalent when differences list is empty.""" + result = TraceComparisonResult(equivalent=True, differences=[]) + assert result.equivalent is True + assert result.differences == [] + + def test_not_equivalent_when_differences_exist(self) -> None: + """Result is not equivalent when differences list is non-empty.""" + diff = TraceDifference( + transaction_index=0, + trace_line_index=0, + baseline="PUSH1", + current="PUSH2", + ) + result = TraceComparisonResult(equivalent=False, differences=[diff]) + assert result.equivalent is False + assert len(result.differences) == 1 + + +class TestTraceComparatorType: + """Test TraceComparatorType enum.""" + + @pytest.mark.parametrize( + "member,value", + [ + (TraceComparatorType.EXACT, "exact"), + (TraceComparatorType.EXACT_NO_GAS, "exact-no-gas"), + (TraceComparatorType.EXACT_NO_STACK, "exact-no-stack"), + (TraceComparatorType.GAS_EXHAUSTION, "gas-exhaustion"), + ], + ) + def test_enum_values( + self, member: TraceComparatorType, value: str + ) -> None: + """Verify enum member values.""" + assert member == value + + +class TestTraceComparatorABC: + """Test TraceComparator base class compare_traces logic.""" + + def test_identical_traces_are_equivalent( + self, traces: Traces, spy_comparator: _SpyComparator + ) -> None: + """Two identical Traces objects produce an equivalent result.""" + result = spy_comparator.compare_traces(traces, traces) + assert result.equivalent is True + assert result.differences == [] + + def test_mismatched_transaction_count( + self, spy_comparator: _SpyComparator + ) -> None: + """Different transaction counts produce a TransactionCountMismatch.""" + baseline = _make_traces( + [ + _make_transaction_traces(), + _make_transaction_traces(), + ] + ) + current = _make_traces([_make_transaction_traces()]) + result = spy_comparator.compare_traces(baseline, current) + assert result.equivalent is False + assert len(result.differences) == 1 + diff = result.differences[0] + assert isinstance(diff, TransactionCountMismatch) + assert diff.baseline_count == 2 + assert diff.current_count == 1 + + def test_delegates_to_compare_transaction_traces( + self, spy_comparator: _SpyComparator + ) -> None: + """Base class calls compare_transaction_traces for each pair.""" + tx1 = _make_transaction_traces() + tx2 = _make_transaction_traces() + baseline = _make_traces([tx1, tx2]) + current = _make_traces([tx1, tx2]) + spy_comparator.compare_traces(baseline, current) + assert len(spy_comparator.calls) == 2 + assert spy_comparator.calls[0] == (0,) + assert spy_comparator.calls[1] == (1,) + + def test_aggregates_differences_from_subclass( + self, failing_comparator: _FailingComparator + ) -> None: + """Base class aggregates differences returned by the subclass.""" + baseline = _make_traces([_make_transaction_traces()]) + current = _make_traces([_make_transaction_traces()]) + result = failing_comparator.compare_traces(baseline, current) + assert result.equivalent is False + assert len(result.differences) == 1 + + def test_empty_traces_are_equivalent( + self, spy_comparator: _SpyComparator + ) -> None: + """Two empty Traces objects are equivalent.""" + baseline = _make_traces([]) + current = _make_traces([]) + result = spy_comparator.compare_traces(baseline, current) + assert result.equivalent is True + assert spy_comparator.calls == [] + + def test_single_transaction_delegates_once( + self, spy_comparator: _SpyComparator + ) -> None: + """Single transaction pair calls compare_transaction_traces once.""" + baseline = _make_traces([_make_transaction_traces()]) + current = _make_traces([_make_transaction_traces()]) + spy_comparator.compare_traces(baseline, current) + assert len(spy_comparator.calls) == 1 + assert spy_comparator.calls[0] == (0,) + + +class TestCreateComparator: + """Test the create_comparator factory function.""" + + @pytest.mark.parametrize( + "comparator_type,expected_name", + [ + (TraceComparatorType.EXACT, "exact"), + (TraceComparatorType.EXACT_NO_GAS, "exact-no-gas"), + (TraceComparatorType.EXACT_NO_STACK, "exact-no-stack"), + (TraceComparatorType.GAS_EXHAUSTION, "gas-exhaustion"), + ], + ) + def test_create_supported_comparators( + self, comparator_type: TraceComparatorType, expected_name: str + ) -> None: + """Factory creates the right comparator for supported types.""" + comparator = create_comparator(comparator_type) + assert comparator.name == expected_name + + +# --------------------------------------------------------------------------- +# Phase 2: TraceLine.compare(), TransactionTraces.compare(), +# FieldExclusionTraceComparator +# --------------------------------------------------------------------------- + + +class TestTraceLineCompare: + """Test TraceLine.compare() and are_equivalent().""" + + def test_gas_excluded_by_default(self) -> None: + """Gas-only difference produces empty diffs with defaults.""" + baseline = _make_trace_line(gas=0x100) + current = _make_trace_line(gas=0x200) + b_diff, c_diff = baseline.compare(current) + assert b_diff == {} + assert c_diff == {} + + def test_custom_exclude_fields(self) -> None: + """Custom exclude_fields skips specified fields only.""" + baseline = _make_trace_line(pc=0, gas=0x100) + current = _make_trace_line(pc=5, gas=0x200) + b_diff, c_diff = baseline.compare(current, exclude_fields={"pc"}) + assert "pc" not in b_diff + assert "gas" in b_diff + + def test_are_equivalent_with_custom_exclude(self) -> None: + """are_equivalent respects custom exclude_fields.""" + baseline = _make_trace_line(pc=0, gas=0x100) + current = _make_trace_line(pc=0, gas=0x200) + # Default excludes gas → equivalent + assert baseline.are_equivalent(current) + # Exclude nothing → not equivalent + assert not baseline.are_equivalent(current, exclude_fields=set()) + + +class TestTransactionTracesCompare: + """Test TransactionTraces.compare() shared method.""" + + def test_identical_traces_empty_diffs(self) -> None: + """Two identical TransactionTraces produce no diffs.""" + tx = _make_transaction_traces() + assert tx.compare(tx) == [] + + def test_different_trace_lengths(self) -> None: + """Different trace lengths return a structural diff.""" + baseline = _make_transaction_traces( + [_make_trace_line(), _make_trace_line()] + ) + current = _make_transaction_traces([_make_trace_line()]) + diffs = baseline.compare(current) + assert len(diffs) == 1 + assert diffs[0].line_index is None + assert "trace_length" in diffs[0].baseline_fields + + def test_different_output(self) -> None: + """Different output returns a structural diff.""" + baseline = _make_transaction_traces(output="0xaa") + current = _make_transaction_traces(output="0xbb") + diffs = baseline.compare(current) + assert any( + d.line_index is None and "output" in d.baseline_fields + for d in diffs + ) + + def test_different_gas_used_without_post_processing(self) -> None: + """Different gas_used is reported when not post-processing.""" + baseline = _make_transaction_traces() + current = _make_transaction_traces() + baseline.gas_used = HexNumber(0x5208) + current.gas_used = HexNumber(0x6000) + diffs = baseline.compare(current, enable_post_processing=False) + assert any( + d.line_index is None and "gas_used" in d.baseline_fields + for d in diffs + ) + + def test_different_gas_used_with_post_processing(self) -> None: + """Different gas_used is ignored when post-processing.""" + baseline = _make_transaction_traces() + current = _make_transaction_traces() + baseline.gas_used = HexNumber(0x5208) + current.gas_used = HexNumber(0x6000) + diffs = baseline.compare(current, enable_post_processing=True) + assert not any( + d.line_index is None and "gas_used" in d.baseline_fields + for d in diffs + ) + + def test_single_field_diff_on_line(self) -> None: + """Single-field difference on a line returns line index and fields.""" + baseline = _make_transaction_traces([_make_trace_line(pc=0)]) + current = _make_transaction_traces([_make_trace_line(pc=5)]) + diffs = baseline.compare(current) + assert len(diffs) == 1 + assert diffs[0].line_index == 0 + assert "pc" in diffs[0].baseline_fields + assert "pc" in diffs[0].current_fields + + def test_multiple_fields_diff_on_one_line(self) -> None: + """Multiple field diffs on one line are grouped together.""" + baseline = _make_transaction_traces([_make_trace_line(pc=0, op=0x60)]) + current = _make_transaction_traces([_make_trace_line(pc=5, op=0x61)]) + diffs = baseline.compare(current) + assert len(diffs) == 1 + assert "pc" in diffs[0].baseline_fields + assert "op" in diffs[0].baseline_fields + + def test_exclude_fields(self) -> None: + """Excluded fields are not reported.""" + baseline = _make_transaction_traces( + [_make_trace_line(gas=0x100, pc=0)] + ) + current = _make_transaction_traces([_make_trace_line(gas=0x200, pc=5)]) + diffs = baseline.compare(current, exclude_fields={"gas", "gas_cost"}) + assert len(diffs) == 1 + assert "pc" in diffs[0].baseline_fields + assert "gas" not in diffs[0].baseline_fields + + def test_exclude_fields_all_diffs_excluded(self) -> None: + """When all differing fields are excluded, no diffs reported.""" + baseline = _make_transaction_traces([_make_trace_line(gas=0x100)]) + current = _make_transaction_traces([_make_trace_line(gas=0x200)]) + diffs = baseline.compare(current, exclude_fields={"gas", "gas_cost"}) + assert diffs == [] + + def test_post_processing_removes_gas_stack_pollution(self) -> None: + """GAS opcode stack pollution is cleaned with post-processing.""" + # GAS opcode pushes remaining gas onto stack; next line has it + gas_line = _make_trace_line( + pc=0, op=0x5A, op_name="GAS", depth=1, stack=[] + ) + # Next line: stack has gas value (differs between runs) + next_line_baseline = _make_trace_line( + pc=1, + op=0x60, + op_name="PUSH1", + depth=1, + stack=[0xAAAA], + ) + next_line_current = _make_trace_line( + pc=1, + op=0x60, + op_name="PUSH1", + depth=1, + stack=[0xBBBB], + ) + baseline = _make_transaction_traces([gas_line, next_line_baseline]) + current = _make_transaction_traces([gas_line, next_line_current]) + # Without post-processing: stack differs + diffs_no_pp = baseline.compare( + current, + exclude_fields={"gas", "gas_cost"}, + enable_post_processing=False, + ) + assert len(diffs_no_pp) == 1 + assert "stack" in diffs_no_pp[0].baseline_fields + + # With post-processing: GAS result nullified, equivalent + diffs_pp = baseline.compare( + current, + exclude_fields={"gas", "gas_cost"}, + enable_post_processing=True, + ) + assert diffs_pp == [] + + +class TestTransactionTracesAreEquivalentRegression: + """Regression tests for are_equivalent() after refactoring.""" + + def test_identical_traces_equivalent(self) -> None: + """Identical traces are equivalent.""" + tx = _make_transaction_traces() + assert tx.are_equivalent(tx, enable_post_processing=False) + + def test_different_length_not_equivalent(self) -> None: + """Different lengths are not equivalent.""" + baseline = _make_transaction_traces( + [_make_trace_line(), _make_trace_line()] + ) + current = _make_transaction_traces([_make_trace_line()]) + assert not baseline.are_equivalent( + current, enable_post_processing=False + ) + + def test_different_output_not_equivalent(self) -> None: + """Different output is not equivalent.""" + baseline = _make_transaction_traces(output="0xaa") + current = _make_transaction_traces(output="0xbb") + assert not baseline.are_equivalent( + current, enable_post_processing=False + ) + + def test_gas_only_difference_is_equivalent(self) -> None: + """Gas-only difference is equivalent (gas excluded by default).""" + baseline = _make_transaction_traces([_make_trace_line(gas=0x100)]) + current = _make_transaction_traces([_make_trace_line(gas=0x200)]) + assert baseline.are_equivalent(current, enable_post_processing=False) + + def test_pc_difference_not_equivalent(self) -> None: + """Non-gas field difference is not equivalent.""" + baseline = _make_transaction_traces([_make_trace_line(pc=0)]) + current = _make_transaction_traces([_make_trace_line(pc=5)]) + assert not baseline.are_equivalent( + current, enable_post_processing=False + ) + + def test_gas_used_checked_without_post_processing(self) -> None: + """gas_used difference is caught without post-processing.""" + baseline = _make_transaction_traces() + current = _make_transaction_traces() + baseline.gas_used = HexNumber(0x5208) + current.gas_used = HexNumber(0x6000) + assert not baseline.are_equivalent( + current, enable_post_processing=False + ) + + def test_gas_used_ignored_with_post_processing(self) -> None: + """gas_used difference is ignored with post-processing.""" + baseline = _make_transaction_traces() + current = _make_transaction_traces() + baseline.gas_used = HexNumber(0x5208) + current.gas_used = HexNumber(0x6000) + assert baseline.are_equivalent(current, enable_post_processing=True) + + +class TestExactComparator: + """Test FieldExclusionTraceComparator with exact config.""" + + @pytest.fixture() + def comparator(self) -> FieldExclusionTraceComparator: + """Return an exact comparator.""" + return create_comparator(TraceComparatorType.EXACT) # type: ignore[return-value] + + def test_identical_traces_are_equivalent( + self, comparator: FieldExclusionTraceComparator + ) -> None: + """Two identical TransactionTraces produce an equivalent result.""" + tx = _make_transaction_traces() + result = comparator.compare_transaction_traces(tx, tx, 0) + assert result.equivalent is True + assert result.differences == [] + + @pytest.mark.parametrize( + "field,baseline_val,current_val", + [ + ("pc", 0, 5), + ("op", 0x60, 0x61), + ("gas", 0x100, 0x200), + ("gas_cost", 0x3, 0x5), + ], + ids=["pc", "op", "gas", "gas_cost"], + ) + def test_single_field_difference( + self, + comparator: FieldExclusionTraceComparator, + field: str, + baseline_val: int, + current_val: int, + ) -> None: + """Single-field differences are detected.""" + baseline = _make_transaction_traces( + [_make_trace_line(**{field: baseline_val})] + ) + current = _make_transaction_traces( + [_make_trace_line(**{field: current_val})] + ) + result = comparator.compare_transaction_traces(baseline, current, 0) + assert result.equivalent is False + assert len(result.differences) == 1 + assert result.differences[0].transaction_index == 0 + assert result.differences[0].trace_line_index == 0 + + def test_differing_stack( + self, comparator: FieldExclusionTraceComparator + ) -> None: + """Different stack values are detected.""" + baseline = _make_transaction_traces( + [_make_trace_line(stack=[0x1, 0x2])] + ) + current = _make_transaction_traces( + [_make_trace_line(stack=[0x1, 0x3])] + ) + result = comparator.compare_transaction_traces(baseline, current, 0) + assert result.equivalent is False + assert "stack" in result.differences[0].baseline + + def test_different_trace_lengths( + self, comparator: FieldExclusionTraceComparator + ) -> None: + """Different trace lengths produce a trace_length diff.""" + baseline = _make_transaction_traces( + [_make_trace_line(), _make_trace_line()] + ) + current = _make_transaction_traces([_make_trace_line()]) + result = comparator.compare_transaction_traces(baseline, current, 0) + assert result.equivalent is False + diff = result.differences[0] + assert "trace_length" in diff.baseline + assert "trace_length" in diff.current + + def test_different_output( + self, comparator: FieldExclusionTraceComparator + ) -> None: + """Different output field is detected.""" + baseline = _make_transaction_traces(output="0xaa") + current = _make_transaction_traces(output="0xbb") + result = comparator.compare_transaction_traces(baseline, current, 0) + assert result.equivalent is False + diff = result.differences[0] + assert "0xaa" in diff.baseline + assert "0xbb" in diff.current + + def test_multiple_differences_in_one_transaction( + self, comparator: FieldExclusionTraceComparator + ) -> None: + """Multiple field diffs on one line produce one TraceDifference.""" + baseline = _make_transaction_traces([_make_trace_line(pc=0, op=0x60)]) + current = _make_transaction_traces([_make_trace_line(pc=5, op=0x61)]) + result = comparator.compare_transaction_traces(baseline, current, 0) + assert result.equivalent is False + # One TraceDifference per line, with all differing fields + assert len(result.differences) == 1 + assert "pc" in result.differences[0].baseline + assert "op" in result.differences[0].baseline + + def test_assembly_format_baseline_and_current( + self, comparator: FieldExclusionTraceComparator + ) -> None: + """Diff strings contain opcode name and differing field values.""" + baseline = _make_transaction_traces([_make_trace_line(pc=0)]) + current = _make_transaction_traces([_make_trace_line(pc=5)]) + result = comparator.compare_transaction_traces(baseline, current, 0) + diff = result.differences[0] + assert diff.baseline.startswith("PUSH1") + assert diff.current.startswith("PUSH1") + assert "pc=" in diff.baseline + assert "pc=" in diff.current + + def test_full_compare_traces_multi_transaction( + self, comparator: FieldExclusionTraceComparator + ) -> None: + """Integration: multi-tx Traces with one differing tx.""" + identical_tx = _make_transaction_traces([_make_trace_line(pc=0)]) + baseline_diff_tx = _make_transaction_traces([_make_trace_line(pc=10)]) + current_diff_tx = _make_transaction_traces([_make_trace_line(pc=20)]) + baseline = _make_traces([identical_tx, baseline_diff_tx]) + current = _make_traces([identical_tx, current_diff_tx]) + result = comparator.compare_traces(baseline, current) + assert result.equivalent is False + assert all(d.transaction_index == 1 for d in result.differences) + + +# --------------------------------------------------------------------------- +# Phase 3: ExactNoGas config +# --------------------------------------------------------------------------- + + +class TestExactNoGasComparator: + """Test FieldExclusionTraceComparator with exact-no-gas config.""" + + @pytest.fixture() + def comparator(self) -> FieldExclusionTraceComparator: + """Return an exact-no-gas comparator.""" + return create_comparator(TraceComparatorType.EXACT_NO_GAS) # type: ignore[return-value] + + def test_gas_field_difference_is_equivalent( + self, comparator: FieldExclusionTraceComparator + ) -> None: + """Traces differing only in remaining gas are equivalent.""" + baseline = _make_transaction_traces([_make_trace_line(gas=0x100)]) + current = _make_transaction_traces([_make_trace_line(gas=0x100)]) + result = comparator.compare_transaction_traces(baseline, current, 0) + assert result.equivalent is True + + def test_gas_and_non_gas_difference( + self, comparator: FieldExclusionTraceComparator + ) -> None: + """Remaining gas diff ignored but non-gas diff (op_name) reported.""" + baseline = _make_transaction_traces( + [_make_trace_line(gas=0x100, op_name="PUSH1")] + ) + current = _make_transaction_traces( + [_make_trace_line(gas=0x200, op_name="PUSH2")] + ) + result = comparator.compare_transaction_traces(baseline, current, 0) + assert result.equivalent is False + diff = result.differences[0] + assert "op_name" in diff.baseline + assert "gas" not in diff.baseline + + def test_stack_difference_detected( + self, comparator: FieldExclusionTraceComparator + ) -> None: + """Non-gas fields like stack are still checked.""" + baseline = _make_transaction_traces([_make_trace_line(stack=[0x1])]) + current = _make_transaction_traces([_make_trace_line(stack=[0x2])]) + result = comparator.compare_transaction_traces(baseline, current, 0) + assert result.equivalent is False + assert "stack" in result.differences[0].baseline + + def test_length_mismatch( + self, comparator: FieldExclusionTraceComparator + ) -> None: + """Different trace lengths are detected.""" + baseline = _make_transaction_traces( + [_make_trace_line(), _make_trace_line()] + ) + current = _make_transaction_traces([_make_trace_line()]) + result = comparator.compare_transaction_traces(baseline, current, 0) + assert result.equivalent is False + assert "trace_length" in result.differences[0].baseline + + def test_output_mismatch( + self, comparator: FieldExclusionTraceComparator + ) -> None: + """Different output field is detected.""" + baseline = _make_transaction_traces(output="0xaa") + current = _make_transaction_traces(output="0xbb") + result = comparator.compare_transaction_traces(baseline, current, 0) + assert result.equivalent is False + assert "0xaa" in result.differences[0].baseline + + def test_gas_used_difference_is_equivalent( + self, comparator: FieldExclusionTraceComparator + ) -> None: + """gas_used difference is ignored (post-processing enabled).""" + baseline = _make_transaction_traces() + current = _make_transaction_traces() + baseline.gas_used = HexNumber(0x5208) + current.gas_used = HexNumber(0x6000) + result = comparator.compare_transaction_traces(baseline, current, 0) + assert result.equivalent is True + assert result.differences == [] + + def test_gas_stack_pollution_is_equivalent( + self, comparator: FieldExclusionTraceComparator + ) -> None: + """GAS opcode stack pollution is cleaned by remove_gas().""" + gas_line = _make_trace_line( + pc=0, op=0x5A, op_name="GAS", depth=1, stack=[] + ) + next_baseline = _make_trace_line( + pc=1, + op=0x60, + op_name="PUSH1", + depth=1, + stack=[0xAAAA], + ) + next_current = _make_trace_line( + pc=1, + op=0x60, + op_name="PUSH1", + depth=1, + stack=[0xBBBB], + ) + baseline = _make_transaction_traces([gas_line, next_baseline]) + current = _make_transaction_traces([gas_line, next_current]) + result = comparator.compare_transaction_traces(baseline, current, 0) + assert result.equivalent is True + assert result.differences == [] + + +# --------------------------------------------------------------------------- +# ExactNoStack config +# --------------------------------------------------------------------------- + + +class TestExactNoStackComparator: + """Test FieldExclusionTraceComparator with exact-no-stack config.""" + + @pytest.fixture() + def comparator(self) -> FieldExclusionTraceComparator: + """Return an exact-no-stack comparator.""" + return create_comparator(TraceComparatorType.EXACT_NO_STACK) # type: ignore[return-value] + + def test_identical_traces_are_equivalent( + self, comparator: FieldExclusionTraceComparator + ) -> None: + """Two identical TransactionTraces are equivalent.""" + tx = _make_transaction_traces() + result = comparator.compare_transaction_traces(tx, tx, 0) + assert result.equivalent is True + assert result.differences == [] + + def test_stack_difference_is_equivalent( + self, comparator: FieldExclusionTraceComparator + ) -> None: + """Traces differing only in stack are equivalent.""" + baseline = _make_transaction_traces( + [_make_trace_line(stack=[0x1, 0x2])] + ) + current = _make_transaction_traces( + [_make_trace_line(stack=[0xA, 0xB])] + ) + result = comparator.compare_transaction_traces(baseline, current, 0) + assert result.equivalent is True + + def test_gas_difference_detected( + self, comparator: FieldExclusionTraceComparator + ) -> None: + """Traces differing in gas are detected.""" + baseline = _make_transaction_traces([_make_trace_line(gas=0x100)]) + current = _make_transaction_traces([_make_trace_line(gas=0x200)]) + result = comparator.compare_transaction_traces(baseline, current, 0) + assert result.equivalent is False + assert "gas" in result.differences[0].baseline + + def test_gas_used_difference_detected( + self, comparator: FieldExclusionTraceComparator + ) -> None: + """gas_used difference is detected.""" + baseline = _make_transaction_traces() + current = _make_transaction_traces() + baseline.gas_used = HexNumber(0x5208) + current.gas_used = HexNumber(0x6000) + result = comparator.compare_transaction_traces(baseline, current, 0) + assert result.equivalent is False + + def test_pc_difference_detected( + self, comparator: FieldExclusionTraceComparator + ) -> None: + """Non-stack, non-gas field diffs are detected.""" + baseline = _make_transaction_traces([_make_trace_line(pc=0)]) + current = _make_transaction_traces([_make_trace_line(pc=5)]) + result = comparator.compare_transaction_traces(baseline, current, 0) + assert result.equivalent is False + assert len(result.differences) == 1 + assert "pc" in result.differences[0].baseline + + def test_op_name_difference_detected( + self, comparator: FieldExclusionTraceComparator + ) -> None: + """op_name differences are detected.""" + baseline = _make_transaction_traces( + [_make_trace_line(op_name="PUSH1")] + ) + current = _make_transaction_traces([_make_trace_line(op_name="PUSH2")]) + result = comparator.compare_transaction_traces(baseline, current, 0) + assert result.equivalent is False + assert "op_name" in result.differences[0].baseline + + def test_depth_difference_detected( + self, comparator: FieldExclusionTraceComparator + ) -> None: + """Depth differences are detected.""" + baseline = _make_transaction_traces([_make_trace_line(depth=1)]) + current = _make_transaction_traces([_make_trace_line(depth=2)]) + result = comparator.compare_transaction_traces(baseline, current, 0) + assert result.equivalent is False + + def test_length_mismatch( + self, comparator: FieldExclusionTraceComparator + ) -> None: + """Different trace lengths are detected.""" + baseline = _make_transaction_traces( + [_make_trace_line(), _make_trace_line()] + ) + current = _make_transaction_traces([_make_trace_line()]) + result = comparator.compare_transaction_traces(baseline, current, 0) + assert result.equivalent is False + assert "trace_length" in result.differences[0].baseline + + def test_output_mismatch( + self, comparator: FieldExclusionTraceComparator + ) -> None: + """Different output field is detected.""" + baseline = _make_transaction_traces(output="0xaa") + current = _make_transaction_traces(output="0xbb") + result = comparator.compare_transaction_traces(baseline, current, 0) + assert result.equivalent is False + assert "0xaa" in result.differences[0].baseline + + +# --------------------------------------------------------------------------- +# ExactNoStackNoGas config +# --------------------------------------------------------------------------- + + +class TestExactNoStackNoGasComparator: + """Test FieldExclusionTraceComparator with exact-no-stack-no-gas config.""" + + @pytest.fixture() + def comparator(self) -> FieldExclusionTraceComparator: + """Return an exact-no-stack-no-gas comparator.""" + return create_comparator(TraceComparatorType.EXACT_NO_STACK_NO_GAS) # type: ignore[return-value] + + def test_identical_traces_are_equivalent( + self, comparator: FieldExclusionTraceComparator + ) -> None: + """Two identical TransactionTraces are equivalent.""" + tx = _make_transaction_traces() + result = comparator.compare_transaction_traces(tx, tx, 0) + assert result.equivalent is True + assert result.differences == [] + + def test_stack_difference_is_equivalent( + self, comparator: FieldExclusionTraceComparator + ) -> None: + """Traces differing only in stack are equivalent.""" + baseline = _make_transaction_traces( + [_make_trace_line(stack=[0x1, 0x2])] + ) + current = _make_transaction_traces( + [_make_trace_line(stack=[0xA, 0xB])] + ) + result = comparator.compare_transaction_traces(baseline, current, 0) + assert result.equivalent is True + + def test_gas_field_difference_is_equivalent( + self, comparator: FieldExclusionTraceComparator + ) -> None: + """Traces differing only in remaining gas are equivalent.""" + baseline = _make_transaction_traces([_make_trace_line(gas=0x100)]) + current = _make_transaction_traces([_make_trace_line(gas=0x100)]) + result = comparator.compare_transaction_traces(baseline, current, 0) + assert result.equivalent is True + + def test_stack_and_gas_difference_is_equivalent( + self, comparator: FieldExclusionTraceComparator + ) -> None: + """Traces differing in stack and remaining gas are equivalent.""" + baseline = _make_transaction_traces( + [_make_trace_line(stack=[0x1, 0x2], gas=0x100)] + ) + current = _make_transaction_traces( + [_make_trace_line(stack=[0xA, 0xB], gas=0x200)] + ) + result = comparator.compare_transaction_traces(baseline, current, 0) + assert result.equivalent is True + + def test_gas_used_difference_detected( + self, comparator: FieldExclusionTraceComparator + ) -> None: + """gas_used difference is detected.""" + baseline = _make_transaction_traces() + current = _make_transaction_traces() + baseline.gas_used = HexNumber(0x5208) + current.gas_used = HexNumber(0x6000) + result = comparator.compare_transaction_traces(baseline, current, 0) + assert result.equivalent is False + + def test_pc_difference_detected( + self, comparator: FieldExclusionTraceComparator + ) -> None: + """Non-stack, non-gas field diffs are detected.""" + baseline = _make_transaction_traces([_make_trace_line(pc=0)]) + current = _make_transaction_traces([_make_trace_line(pc=5)]) + result = comparator.compare_transaction_traces(baseline, current, 0) + assert result.equivalent is False + assert len(result.differences) == 1 + assert "pc" in result.differences[0].baseline + + def test_op_name_difference_detected( + self, comparator: FieldExclusionTraceComparator + ) -> None: + """op_name differences are detected.""" + baseline = _make_transaction_traces( + [_make_trace_line(op_name="PUSH1")] + ) + current = _make_transaction_traces([_make_trace_line(op_name="PUSH2")]) + result = comparator.compare_transaction_traces(baseline, current, 0) + assert result.equivalent is False + assert "op_name" in result.differences[0].baseline + + def test_depth_difference_detected( + self, comparator: FieldExclusionTraceComparator + ) -> None: + """Depth differences are detected.""" + baseline = _make_transaction_traces([_make_trace_line(depth=1)]) + current = _make_transaction_traces([_make_trace_line(depth=2)]) + result = comparator.compare_transaction_traces(baseline, current, 0) + assert result.equivalent is False + + def test_length_mismatch( + self, comparator: FieldExclusionTraceComparator + ) -> None: + """Different trace lengths are detected.""" + baseline = _make_transaction_traces( + [_make_trace_line(), _make_trace_line()] + ) + current = _make_transaction_traces([_make_trace_line()]) + result = comparator.compare_transaction_traces(baseline, current, 0) + assert result.equivalent is False + assert "trace_length" in result.differences[0].baseline + + def test_output_mismatch( + self, comparator: FieldExclusionTraceComparator + ) -> None: + """Different output field is detected.""" + baseline = _make_transaction_traces(output="0xaa") + current = _make_transaction_traces(output="0xbb") + result = comparator.compare_transaction_traces(baseline, current, 0) + assert result.equivalent is False + assert "0xaa" in result.differences[0].baseline + + +# --------------------------------------------------------------------------- +# GasExhaustionTraceComparator +# --------------------------------------------------------------------------- + + +class TestGasExhaustionTraceComparator: + """Test GasExhaustionTraceComparator.compare_transaction_traces.""" + + @pytest.fixture() + def comparator(self) -> GasExhaustionTraceComparator: + """Return a GasExhaustionTraceComparator.""" + return GasExhaustionTraceComparator() + + def test_no_oog_in_either_is_equivalent( + self, comparator: GasExhaustionTraceComparator + ) -> None: + """Both sides without OOG are equivalent.""" + tx = _make_transaction_traces() + result = comparator.compare_transaction_traces(tx, tx, 0) + assert result.equivalent is True + assert result.differences == [] + + def test_oog_at_same_line_is_equivalent( + self, comparator: GasExhaustionTraceComparator + ) -> None: + """Both sides with OOG at the same line are equivalent.""" + oog_line = _make_trace_line(error="out of gas") + baseline = _make_transaction_traces([_make_trace_line(), oog_line]) + current = _make_transaction_traces([_make_trace_line(), oog_line]) + result = comparator.compare_transaction_traces(baseline, current, 0) + assert result.equivalent is True + + def test_baseline_oog_current_no_oog( + self, comparator: GasExhaustionTraceComparator + ) -> None: + """Baseline has out-of-gas but current does not — different.""" + oog_line = _make_trace_line(error="out of gas") + baseline = _make_transaction_traces([_make_trace_line(), oog_line]) + current = _make_transaction_traces( + [_make_trace_line(), _make_trace_line()] + ) + result = comparator.compare_transaction_traces(baseline, current, 0) + assert result.equivalent is False + assert len(result.differences) == 1 + diff = result.differences[0] + assert diff.trace_line_index == 1 + assert "error=out of gas" in diff.baseline + assert diff.current == "no out-of-gas" + + def test_current_oog_baseline_no_oog( + self, comparator: GasExhaustionTraceComparator + ) -> None: + """Current has out-of-gas but baseline does not — different.""" + oog_line = _make_trace_line(error="out of gas") + baseline = _make_transaction_traces([_make_trace_line()]) + current = _make_transaction_traces([oog_line]) + result = comparator.compare_transaction_traces(baseline, current, 0) + assert result.equivalent is False + diff = result.differences[0] + assert diff.trace_line_index == 0 + assert diff.baseline == "no out-of-gas" + assert "error=out of gas" in diff.current + + def test_oog_at_different_lines( + self, comparator: GasExhaustionTraceComparator + ) -> None: + """OOG at different line indices — different.""" + baseline = _make_transaction_traces( + [ + _make_trace_line(), + _make_trace_line(error="out of gas"), + _make_trace_line(), + ] + ) + current = _make_transaction_traces( + [ + _make_trace_line(), + _make_trace_line(), + _make_trace_line(error="out of gas"), + ] + ) + result = comparator.compare_transaction_traces(baseline, current, 0) + assert result.equivalent is False + + def test_case_insensitive_oog_detection( + self, comparator: GasExhaustionTraceComparator + ) -> None: + """OOG detection is case-insensitive.""" + baseline = _make_transaction_traces( + [_make_trace_line(error="Out Of Gas")] + ) + current = _make_transaction_traces( + [_make_trace_line(error="out of gas")] + ) + result = comparator.compare_transaction_traces(baseline, current, 0) + assert result.equivalent is True + + def test_multiple_oog_points_same( + self, comparator: GasExhaustionTraceComparator + ) -> None: + """Multiple OOG points at same indices are equivalent.""" + baseline = _make_transaction_traces( + [ + _make_trace_line(error="out of gas"), + _make_trace_line(), + _make_trace_line(error="out of gas"), + ] + ) + current = _make_transaction_traces( + [ + _make_trace_line(error="out of gas"), + _make_trace_line(), + _make_trace_line(error="out of gas"), + ] + ) + result = comparator.compare_transaction_traces(baseline, current, 0) + assert result.equivalent is True + + def test_multiple_oog_points_different( + self, comparator: GasExhaustionTraceComparator + ) -> None: + """Different out-of-gas points — each side's line shown.""" + baseline = _make_transaction_traces( + [ + _make_trace_line(error="out of gas"), + _make_trace_line(), + ] + ) + current = _make_transaction_traces( + [ + _make_trace_line(), + _make_trace_line(error="out of gas"), + ] + ) + result = comparator.compare_transaction_traces(baseline, current, 0) + assert result.equivalent is False + assert len(result.differences) == 2 + # Line 0: baseline out-of-gas, current not + assert result.differences[0].trace_line_index == 0 + assert "error=out of gas" in result.differences[0].baseline + assert result.differences[0].current == "no out-of-gas" + # Line 1: current out-of-gas, baseline not + assert result.differences[1].trace_line_index == 1 + assert result.differences[1].baseline == "no out-of-gas" + assert "error=out of gas" in result.differences[1].current + + def test_non_oog_errors_ignored( + self, comparator: GasExhaustionTraceComparator + ) -> None: + """Non-OOG errors like 'stack underflow' are not OOG points.""" + baseline = _make_transaction_traces( + [_make_trace_line(error="stack underflow")] + ) + current = _make_transaction_traces( + [_make_trace_line(error="stack underflow")] + ) + result = comparator.compare_transaction_traces(baseline, current, 0) + assert result.equivalent is True + + def test_transaction_index_in_difference( + self, comparator: GasExhaustionTraceComparator + ) -> None: + """Transaction index is set correctly in differences.""" + baseline = _make_transaction_traces( + [_make_trace_line(error="out of gas")] + ) + current = _make_transaction_traces([_make_trace_line()]) + result = comparator.compare_transaction_traces(baseline, current, 3) + assert result.differences[0].transaction_index == 3 + + def test_multi_transaction_via_compare_traces( + self, comparator: GasExhaustionTraceComparator + ) -> None: + """Integration: multi-tx with OOG diff in one tx only.""" + ok_tx = _make_transaction_traces([_make_trace_line()]) + b_oog_tx = _make_transaction_traces( + [_make_trace_line(error="out of gas")] + ) + c_ok_tx = _make_transaction_traces([_make_trace_line()]) + baseline = _make_traces([ok_tx, b_oog_tx]) + current = _make_traces([ok_tx, c_ok_tx]) + result = comparator.compare_traces(baseline, current) + assert result.equivalent is False + assert all(d.transaction_index == 1 for d in result.differences) diff --git a/packages/testing/src/execution_testing/client_clis/tests/test_trace_report_formatter.py b/packages/testing/src/execution_testing/client_clis/tests/test_trace_report_formatter.py new file mode 100644 index 00000000000..b274bd1e918 --- /dev/null +++ b/packages/testing/src/execution_testing/client_clis/tests/test_trace_report_formatter.py @@ -0,0 +1,215 @@ +"""Tests for trace report formatters.""" + +import json + +import pytest + +from execution_testing.client_clis.trace_comparators import ( + TraceComparisonResult, + TraceDifference, +) +from execution_testing.client_clis.trace_report_formatter import ( + JsonTracesDiffReportFormatter, + TextTracesDiffReportFormatter, +) + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture() +def formatter() -> TextTracesDiffReportFormatter: + """Return a default formatter.""" + return TextTracesDiffReportFormatter() + + +def _make_result( + equivalent: bool = True, + differences: list[TraceDifference] | None = None, +) -> TraceComparisonResult: + """Create a TraceComparisonResult.""" + return TraceComparisonResult( + equivalent=equivalent, + differences=differences or [], + ) + + +def _make_diff( + tx: int = 0, + line: int = 0, + baseline: str = "PUSH1 (pc=0)", + current: str = "PUSH1 (pc=1)", +) -> TraceDifference: + """Create a TraceDifference.""" + return TraceDifference( + transaction_index=tx, + trace_line_index=line, + baseline=baseline, + current=current, + ) + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + + +class TestTextFormatTestResult: + """Test TextTracesDiffReportFormatter.format_test_result.""" + + def test_single_comparator_equivalent( + self, formatter: TextTracesDiffReportFormatter + ) -> None: + """All equivalent returns None.""" + output = formatter.format_test_result( + "test_foo", {"exact": _make_result(equivalent=True)} + ) + assert output is None + + def test_single_comparator_with_differences( + self, formatter: TextTracesDiffReportFormatter + ) -> None: + """Single comparator with diffs shows DIFFERENT and details.""" + diffs = [ + _make_diff( + tx=0, + line=12, + baseline="ADD (gas=0x4e20)", + current="ADD (gas=0x4e10)", + ), + ] + output = formatter.format_test_result( + "test_bar", + {"exact": _make_result(equivalent=False, differences=diffs)}, + ) + assert output is not None + assert "DIFFERENT" in output + assert "0x4e20" in output + assert "0x4e10" in output + + def test_multiple_comparators_mixed( + self, formatter: TextTracesDiffReportFormatter + ) -> None: + """Only non-equivalent comparators are shown.""" + diffs = [_make_diff()] + output = formatter.format_test_result( + "test_baz", + { + "exact": _make_result(equivalent=False, differences=diffs), + "exact-no-gas": _make_result(equivalent=True), + }, + ) + assert output is not None + assert "[exact]" in output + assert "DIFFERENT" in output + assert "exact-no-gas" not in output + assert "EQUIVALENT" not in output + + def test_differences_capped(self) -> None: + """Only the first max_differences diffs are shown.""" + fmt = TextTracesDiffReportFormatter(max_differences=3) + diffs = [_make_diff(line=i) for i in range(10)] + output = fmt.format_test_result( + "test_cap", + {"exact": _make_result(equivalent=False, differences=diffs)}, + ) + assert output is not None + assert "7 more" in output + + +class TestTextFormatSummary: + """Test TextTracesDiffReportFormatter.format_summary.""" + + def test_no_results( + self, formatter: TextTracesDiffReportFormatter + ) -> None: + """Empty results produce minimal output.""" + output = formatter.format_summary({}) + assert "0 tests verified" in output + + def test_multiple_tests_aggregation( + self, formatter: TextTracesDiffReportFormatter + ) -> None: + """Summary correctly counts tests and those with differences.""" + all_results = { + "test_a": {"exact": _make_result(equivalent=True)}, + "test_b": { + "exact": _make_result( + equivalent=False, + differences=[_make_diff()], + ) + }, + } + output = formatter.format_summary(all_results) + assert "2 tests verified" in output + assert "1 with differences" in output + + def test_all_equivalent( + self, formatter: TextTracesDiffReportFormatter + ) -> None: + """When all tests pass, only summary line is shown.""" + all_results = { + "test_a": {"exact": _make_result(equivalent=True)}, + "test_b": {"exact": _make_result(equivalent=True)}, + } + output = formatter.format_summary(all_results) + assert "2 tests verified" in output + assert "0 with differences" in output + assert "test_a" not in output + assert "test_b" not in output + + +# --------------------------------------------------------------------------- +# JSON formatter +# --------------------------------------------------------------------------- + + +class TestJsonTracesDiffReportFormatter: + """Test JsonTracesDiffReportFormatter.write.""" + + def test_writes_json_file(self, tmp_path: pytest.TempPathFactory) -> None: + """Report is written as valid JSON.""" + out = tmp_path / "report.json" # type: ignore[operator] + fmt = JsonTracesDiffReportFormatter(out) + fmt.write( + { + "test_a": { + "exact": _make_result( + equivalent=False, + differences=[_make_diff(tx=0, line=3)], + ), + }, + } + ) + data = json.loads(out.read_text()) + assert "test_a" in data + assert data["test_a"]["exact"]["equivalent"] is False + diffs = data["test_a"]["exact"]["differences"] + assert len(diffs) == 1 + assert diffs[0]["transaction_index"] == 0 + assert diffs[0]["trace_line_index"] == 3 + + def test_equivalent_tests_included( + self, tmp_path: pytest.TempPathFactory + ) -> None: + """Equivalent tests are included in JSON (unlike text report).""" + out = tmp_path / "report.json" # type: ignore[operator] + fmt = JsonTracesDiffReportFormatter(out) + fmt.write( + { + "test_a": {"exact": _make_result(equivalent=True)}, + } + ) + data = json.loads(out.read_text()) + assert data["test_a"]["exact"]["equivalent"] is True + assert data["test_a"]["exact"]["differences"] == [] + + def test_creates_parent_directories( + self, tmp_path: pytest.TempPathFactory + ) -> None: + """Parent directories are created if they don't exist.""" + out = tmp_path / "sub" / "dir" / "report.json" # type: ignore[operator] + fmt = JsonTracesDiffReportFormatter(out) + fmt.write({"test_a": {"exact": _make_result(equivalent=True)}}) + assert out.exists() diff --git a/packages/testing/src/execution_testing/client_clis/trace_comparators.py b/packages/testing/src/execution_testing/client_clis/trace_comparators.py new file mode 100644 index 00000000000..33b78664631 --- /dev/null +++ b/packages/testing/src/execution_testing/client_clis/trace_comparators.py @@ -0,0 +1,310 @@ +"""Trace comparators for verifying EVM execution traces against a baseline.""" + +from abc import ABC, abstractmethod +from dataclasses import dataclass, field +from enum import StrEnum + +from execution_testing.client_clis.cli_types import ( + TraceLine, + Traces, + TransactionTraces, +) + + +class TraceComparatorType(StrEnum): + """Supported trace comparator strategies.""" + + EXACT = "exact" + EXACT_NO_GAS = "exact-no-gas" + EXACT_NO_STACK = "exact-no-stack" + EXACT_NO_STACK_NO_GAS = "exact-no-stack-no-gas" + GAS_EXHAUSTION = "gas-exhaustion" + + +def _format_trace_line_diff( + trace_line: TraceLine, + differing_fields: dict[str, str], +) -> str: + """ + Format a trace line as an assembly-like string with diffs. + + Return the opcode name, with differing field values in brackets + if any non-opcode fields differ. + Example: "PUSH1 (pc=0x3, stack=['0x4'])" + """ + if not differing_fields: + return trace_line.op_name + fields_str = ", ".join(f"{k}={v}" for k, v in differing_fields.items()) + return f"{trace_line.op_name} ({fields_str})" + + +@dataclass +class TraceDifference: + """A difference between baseline and current trace at a specific line.""" + + transaction_index: int + trace_line_index: int + baseline: str + current: str + + +@dataclass +class TransactionCountMismatch(TraceDifference): + """Structural mismatch: different number of transactions.""" + + transaction_index: int = 0 + trace_line_index: int = -1 + baseline: str = "" + current: str = "" + baseline_count: int = 0 + current_count: int = 0 + + +@dataclass +class TraceComparisonResult: + """Result of comparing two Traces objects.""" + + equivalent: bool + differences: list[TraceDifference] = field(default_factory=list) + + +class TraceComparator(ABC): + """Abstract base class for trace comparison strategies.""" + + @property + @abstractmethod + def name(self) -> str: + """Return the comparator's name.""" + ... + + @abstractmethod + def compare_transaction_traces( + self, + baseline: TransactionTraces, + current: TransactionTraces, + transaction_index: int, + ) -> TraceComparisonResult: + """Compare a single transaction's traces.""" + ... + + def compare_traces( + self, + baseline: Traces, + current: Traces, + ) -> TraceComparisonResult: + """Compare two Traces objects by iterating transaction pairs.""" + if len(baseline.root) != len(current.root): + return TraceComparisonResult( + equivalent=False, + differences=[ + TransactionCountMismatch( + baseline_count=len(baseline.root), + current_count=len(current.root), + ) + ], + ) + + all_differences: list[TraceDifference] = [] + for i, (b_tx, c_tx) in enumerate( + zip(baseline.root, current.root, strict=False) + ): + result = self.compare_transaction_traces(b_tx, c_tx, i) + all_differences.extend(result.differences) + + return TraceComparisonResult( + equivalent=len(all_differences) == 0, + differences=all_differences, + ) + + +def _build_result_from_compare( + baseline: TransactionTraces, + current: TransactionTraces, + transaction_index: int, + exclude_fields: set[str] | None = None, + enable_post_processing: bool = False, +) -> TraceComparisonResult: + """ + Build a TraceComparisonResult from TransactionTraces.compare(). + + Convert the raw diff tuples from compare() into TraceDifference + objects with assembly-like strings. + """ + raw_diffs = baseline.compare( + current, + exclude_fields=exclude_fields, + enable_post_processing=enable_post_processing, + ) + if not raw_diffs: + return TraceComparisonResult(equivalent=True) + + # Only report the first difference: once traces diverge, subsequent + # lines will mostly differ too. + diff = raw_diffs[0] + if diff.line_index is None: + b_str = ", ".join(f"{k}={v}" for k, v in diff.baseline_fields.items()) + c_str = ", ".join(f"{k}={v}" for k, v in diff.current_fields.items()) + trace_diff = TraceDifference( + transaction_index=transaction_index, + trace_line_index=-1, + baseline=b_str, + current=c_str, + ) + else: + b_line = baseline.traces[diff.line_index] + c_line = current.traces[diff.line_index] + trace_diff = TraceDifference( + transaction_index=transaction_index, + trace_line_index=diff.line_index, + baseline=_format_trace_line_diff(b_line, diff.baseline_fields), + current=_format_trace_line_diff(c_line, diff.current_fields), + ) + return TraceComparisonResult( + equivalent=False, + differences=[trace_diff], + ) + + +class FieldExclusionTraceComparator(TraceComparator): + """Compare traces field-by-field, optionally excluding fields.""" + + def __init__( + self, + comparator_name: str, + exclude_fields: set[str] | None = None, + enable_post_processing: bool = False, + ) -> None: + self._name = comparator_name + self._exclude_fields = exclude_fields + self._enable_post_processing = enable_post_processing + + @property + def name(self) -> str: + """Return the comparator's name.""" + return self._name + + def compare_transaction_traces( + self, + baseline: TransactionTraces, + current: TransactionTraces, + transaction_index: int, + ) -> TraceComparisonResult: + """Compare trace fields, excluding configured fields.""" + return _build_result_from_compare( + baseline, + current, + transaction_index, + exclude_fields=self._exclude_fields, + enable_post_processing=self._enable_post_processing, + ) + + +def _is_out_of_gas_error(error: str | None) -> bool: + """Return True if the error string indicates an out-of-gas condition.""" + if error is None: + return False + return "out of gas" in error.lower() + + +def _find_gas_exhaustion_points( + tx: TransactionTraces, +) -> list[int]: + """Return trace line indices where an out-of-gas error occurs.""" + return [ + i + for i, line in enumerate(tx.traces) + if _is_out_of_gas_error(line.error) + ] + + +def _format_oog_trace_line( + tx: TransactionTraces, + line_index: int, +) -> str: + """Format a trace line showing its error field for OOG reporting.""" + if line_index >= len(tx.traces): + return "no trace line" + line = tx.traces[line_index] + return _format_trace_line_diff(line, {"error": str(line.error)}) + + +class GasExhaustionTraceComparator(TraceComparator): + """ + Detect differences in gas exhaustion between traces. + + Equivalent when both sides have no out-of-gas errors or when + both run out of gas at the same trace line(s). Different when + the out-of-gas points diverge. + """ + + @property + def name(self) -> str: + """Return the comparator's name.""" + return "gas-exhaustion" + + def compare_transaction_traces( + self, + baseline: TransactionTraces, + current: TransactionTraces, + transaction_index: int, + ) -> TraceComparisonResult: + """Compare gas exhaustion points between two transaction traces.""" + b_set = set(_find_gas_exhaustion_points(baseline)) + c_set = set(_find_gas_exhaustion_points(current)) + + if b_set == c_set: + return TraceComparisonResult(equivalent=True) + + differences: list[TraceDifference] = [] + for line_index in sorted(b_set - c_set): + differences.append( + TraceDifference( + transaction_index=transaction_index, + trace_line_index=line_index, + baseline=_format_oog_trace_line(baseline, line_index), + current="no out-of-gas", + ) + ) + for line_index in sorted(c_set - b_set): + differences.append( + TraceDifference( + transaction_index=transaction_index, + trace_line_index=line_index, + baseline="no out-of-gas", + current=_format_oog_trace_line(current, line_index), + ) + ) + + return TraceComparisonResult( + equivalent=False, + differences=differences, + ) + + +_FIELD_EXCLUSION_CONFIGS: dict[ + TraceComparatorType, + tuple[set[str] | None, bool], +] = { + TraceComparatorType.EXACT: (None, False), + TraceComparatorType.EXACT_NO_GAS: ({"gas"}, True), + TraceComparatorType.EXACT_NO_STACK: ({"stack"}, False), + TraceComparatorType.EXACT_NO_STACK_NO_GAS: ({"gas", "stack"}, False), +} + + +def create_comparator( + comparator_type: TraceComparatorType, +) -> TraceComparator: + """Create a comparator instance from the given type.""" + if comparator_type == TraceComparatorType.GAS_EXHAUSTION: + return GasExhaustionTraceComparator() + if comparator_type in _FIELD_EXCLUSION_CONFIGS: + exclude_fields, post_processing = _FIELD_EXCLUSION_CONFIGS[ + comparator_type + ] + return FieldExclusionTraceComparator( + comparator_type.value, + exclude_fields=exclude_fields, + enable_post_processing=post_processing, + ) + raise ValueError(f"Unknown comparator type: {comparator_type}") diff --git a/packages/testing/src/execution_testing/client_clis/trace_report_formatter.py b/packages/testing/src/execution_testing/client_clis/trace_report_formatter.py new file mode 100644 index 00000000000..fd0d2cd9dae --- /dev/null +++ b/packages/testing/src/execution_testing/client_clis/trace_report_formatter.py @@ -0,0 +1,134 @@ +"""Report formatters for trace comparison results.""" + +import json +from abc import ABC, abstractmethod +from pathlib import Path +from typing import Any + +from execution_testing.client_clis.trace_comparators import ( + TraceComparisonResult, +) + + +class TracesDiffReportFormatter(ABC): + """Abstract base class for formatting trace comparison reports.""" + + @abstractmethod + def format_test_result( + self, + test_id: str, + results: dict[str, TraceComparisonResult], + ) -> str | None: + """ + Format one test's comparison results. + + Return None if there is nothing to report (e.g. all equivalent). + """ + ... + + @abstractmethod + def format_summary( + self, + all_results: dict[str, dict[str, TraceComparisonResult]], + ) -> str: + """Format the aggregated report for all tests.""" + ... + + +class TextTracesDiffReportFormatter(TracesDiffReportFormatter): + """Human-readable plain text formatter.""" + + def __init__(self, max_differences: int = 10) -> None: + """Initialize with a cap on displayed differences per comparator.""" + self.max_differences = max_differences + + def format_test_result( + self, + test_id: str, + results: dict[str, TraceComparisonResult], + ) -> str | None: + """ + Format one test's comparison results. + + Return None if all comparators are equivalent. + """ + diff_lines: list[str] = [] + for name, result in results.items(): + if result.equivalent: + continue + count = len(result.differences) + diff_lines.append(f" [{name}] DIFFERENT ({count} differences)") + shown = result.differences[: self.max_differences] + for diff in shown: + loc = ( + f"tx[{diff.transaction_index}] " + f"line[{diff.trace_line_index}]" + ) + diff_lines.append(f" {loc} baseline: {diff.baseline}") + diff_lines.append(f" {loc} current: {diff.current}") + remaining = count - len(shown) + if remaining > 0: + diff_lines.append(f" ... ({remaining} more)") + if not diff_lines: + return None + return "\n".join([f"{test_id}:"] + diff_lines) + + def format_summary( + self, + all_results: dict[str, dict[str, TraceComparisonResult]], + ) -> str: + """Format the aggregated report for all tests.""" + lines: list[str] = [] + with_diffs = 0 + for test_id, results in all_results.items(): + formatted = self.format_test_result(test_id, results) + if formatted is not None: + lines.append(formatted) + lines.append("") + with_diffs += 1 + + total = len(all_results) + lines.append( + f"Summary: {total} tests verified, {with_diffs} with differences" + ) + return "\n".join(lines) + + +class JsonTracesDiffReportFormatter: + """Write trace comparison results to a JSON file.""" + + def __init__(self, output_path: Path) -> None: + """Initialize with the output file path.""" + self.output_path = output_path + + @staticmethod + def _result_to_dict( + result: TraceComparisonResult, + ) -> dict[str, Any]: + """Convert a TraceComparisonResult to a JSON-serializable dict.""" + return { + "equivalent": result.equivalent, + "differences": [ + { + "transaction_index": d.transaction_index, + "trace_line_index": d.trace_line_index, + "baseline": d.baseline, + "current": d.current, + } + for d in result.differences + ], + } + + def write( + self, + all_results: dict[str, dict[str, TraceComparisonResult]], + ) -> None: + """Write the full report to the JSON file.""" + report: dict[str, Any] = {} + for test_id, comparator_results in all_results.items(): + report[test_id] = { + name: self._result_to_dict(result) + for name, result in comparator_results.items() + } + self.output_path.parent.mkdir(parents=True, exist_ok=True) + self.output_path.write_text(json.dumps(report, indent=2) + "\n") diff --git a/packages/testing/src/execution_testing/client_clis/transition_tool.py b/packages/testing/src/execution_testing/client_clis/transition_tool.py index 19f82bfadfb..99874f296df 100644 --- a/packages/testing/src/execution_testing/client_clis/transition_tool.py +++ b/packages/testing/src/execution_testing/client_clis/transition_tool.py @@ -927,6 +927,8 @@ def evaluate( if self.output_cache is not None: cached_result = self.output_cache.get(current_call_id) if cached_result is not None: + if self.trace and cached_result.result.traces is not None: + self.append_traces(cached_result.result.traces) return self.process_result(cached_result) debug_output_path = self.get_next_transition_tool_output_path( current_call_id