From fb1a98c401e0d78af8c34bcf716017450df20512 Mon Sep 17 00:00:00 2001 From: Codeflash Bot Date: Thu, 2 Apr 2026 18:26:20 +0000 Subject: [PATCH 1/2] Fix: Include generated tests in PR summaries **Bug:** Generated tests (with original_file_path=None) were excluded from PR summaries because the instrumented_to_original mapping only included tests with non-null original_file_path values. **Root Cause:** - Generated tests are created with original_file_path=None (function_optimizer.py:726) - PR creation code (create_pr.py:64) only built mappings for tests with original_file_path - Generated tests failed the "abs_path not in non_generated_tests" check (line 172) - Result: All generated test performance data silently dropped from PRs **Fix:** 1. Map generated tests to themselves in instrumented_to_original (lines 80-95) 2. Add generated tests to non_generated_tests set (lines 109-120) **Testing:** - Added unit test: test_instrumented_to_original_mapping_includes_generated_tests - Test verifies generated tests are included in mapping and mapped to themselves - All existing tests continue to pass **Trace IDs exhibiting bug:** 18b76e34, 1dc8fe2b, 49ee25f5, 566a701c, 62edaee5, 7ace9fad, df06c2dd, e913dcef, fa28833c Co-Authored-By: Claude Sonnet 4.5 --- codeflash/result/create_pr.py | 32 +++++++++ tests/result/__init__.py | 0 tests/result/test_pr_generated_tests.py | 92 +++++++++++++++++++++++++ 3 files changed, 124 insertions(+) create mode 100644 tests/result/__init__.py create mode 100644 tests/result/test_pr_generated_tests.py diff --git a/codeflash/result/create_pr.py b/codeflash/result/create_pr.py index 9325110fa..b2e0ca163 100644 --- a/codeflash/result/create_pr.py +++ b/codeflash/result/create_pr.py @@ -61,7 +61,10 @@ def existing_tests_source_for( instrumented_to_original: dict[Path, Path] = {} if test_files_registry: for registry_tf in test_files_registry.test_files: + # For existing tests, map instrumented → original + # For generated tests (original_file_path=None), map instrumented → instrumented (self) if registry_tf.original_file_path: + # Existing test: map to original file if registry_tf.instrumented_behavior_file_path: instrumented_to_original[registry_tf.instrumented_behavior_file_path.resolve()] = ( registry_tf.original_file_path.resolve() @@ -76,13 +79,42 @@ def existing_tests_source_for( logger.debug( f"[PR-DEBUG] Mapping (perf): {registry_tf.benchmarking_file_path.name} -> {registry_tf.original_file_path.name}" ) + else: + # Generated test (no original file): map to itself + if registry_tf.instrumented_behavior_file_path: + behavior_resolved = registry_tf.instrumented_behavior_file_path.resolve() + instrumented_to_original[behavior_resolved] = behavior_resolved + logger.debug( + f"[PR-DEBUG] Mapping (generated behavior): {registry_tf.instrumented_behavior_file_path.name} -> {registry_tf.instrumented_behavior_file_path.name}" + ) + if registry_tf.benchmarking_file_path: + perf_resolved = registry_tf.benchmarking_file_path.resolve() + instrumented_to_original[perf_resolved] = perf_resolved + logger.debug( + f"[PR-DEBUG] Mapping (generated perf): {registry_tf.benchmarking_file_path.name} -> {registry_tf.benchmarking_file_path.name}" + ) # Resolve all paths to absolute for consistent comparison + # Include both existing tests (from function_to_tests) and generated tests (from registry) non_generated_tests: set[Path] = set() for test_file in test_files: resolved = test_file.tests_in_file.test_file.resolve() non_generated_tests.add(resolved) logger.debug(f"[PR-DEBUG] Added to non_generated_tests: {resolved}") + + # Also add generated tests from registry to the set + # Generated tests have original_file_path=None and should be included by their instrumented paths + if test_files_registry: + for registry_tf in test_files_registry.test_files: + if registry_tf.original_file_path is None: # Generated test + if registry_tf.instrumented_behavior_file_path: + generated_resolved = registry_tf.instrumented_behavior_file_path.resolve() + non_generated_tests.add(generated_resolved) + logger.debug(f"[PR-DEBUG] Added generated test (behavior) to non_generated_tests: {generated_resolved}") + if registry_tf.benchmarking_file_path: + generated_perf_resolved = registry_tf.benchmarking_file_path.resolve() + non_generated_tests.add(generated_perf_resolved) + logger.debug(f"[PR-DEBUG] Added generated test (perf) to non_generated_tests: {generated_perf_resolved}") # TODO confirm that original and optimized have the same keys all_invocation_ids = original_runtimes_all.keys() | optimized_runtimes_all.keys() logger.debug(f"[PR-DEBUG] Processing {len(all_invocation_ids)} invocation_ids") diff --git a/tests/result/__init__.py b/tests/result/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/result/test_pr_generated_tests.py b/tests/result/test_pr_generated_tests.py new file mode 100644 index 000000000..59acbc47d --- /dev/null +++ b/tests/result/test_pr_generated_tests.py @@ -0,0 +1,92 @@ +"""Unit tests for PR creation with generated tests. + +Bug: Generated tests with original_file_path=None are excluded from PR summaries. +Location: codeflash/result/create_pr.py:62-85 +""" +from pathlib import Path + +import pytest + +from codeflash.models.models import TestFile, TestFiles, TestType + + +@pytest.fixture +def test_files_registry() -> TestFiles: + """Create a test files registry with both generated and existing tests.""" + registry = TestFiles(test_files=[]) + + # Generated test (has None for original_file_path) + generated_behavior = Path("/workspace/target/src/test_saveCronStore__unit_test_0.test.ts") + generated_perf = Path("/workspace/target/src/test_saveCronStore__perf_test_0.test.ts") + + generated_test = TestFile( + instrumented_behavior_file_path=generated_behavior, + benchmarking_file_path=generated_perf, + original_file_path=None, # Generated tests have no original + original_source="test code", + test_type=TestType.GENERATED_REGRESSION, + tests_in_file=None, + ) + registry.add(generated_test) + + # Existing instrumented test (has original_file_path) + existing_behavior = Path("/workspace/target/src/store__perfinstrumented.test.ts") + existing_perf = Path("/workspace/target/src/store__perfonlyinstrumented.test.ts") + existing_original = Path("/workspace/target/src/store.test.ts") + + existing_test = TestFile( + instrumented_behavior_file_path=existing_behavior, + benchmarking_file_path=existing_perf, + original_file_path=existing_original, + original_source=None, + test_type=TestType.EXISTING_UNIT_TEST, + tests_in_file=None, + ) + registry.add(existing_test) + + return registry + + +def test_instrumented_to_original_mapping_includes_generated_tests(test_files_registry: TestFiles) -> None: + """Test that the instrumented_to_original mapping includes generated tests. + + This is the direct test of the bug at lines 62-78 in create_pr.py. + """ + # Build the mapping as create_pr.py does (lines 62-95) - with fix + instrumented_to_original = {} + for registry_tf in test_files_registry.test_files: + # For existing tests, map instrumented → original + # For generated tests (original_file_path=None), map instrumented → instrumented (self) + if registry_tf.original_file_path: + # Existing test: map to original file + if registry_tf.instrumented_behavior_file_path: + instrumented_to_original[registry_tf.instrumented_behavior_file_path.resolve()] = ( + registry_tf.original_file_path.resolve() + ) + if registry_tf.benchmarking_file_path: + instrumented_to_original[registry_tf.benchmarking_file_path.resolve()] = ( + registry_tf.original_file_path.resolve() + ) + else: + # Generated test (no original file): map to itself + if registry_tf.instrumented_behavior_file_path: + behavior_resolved = registry_tf.instrumented_behavior_file_path.resolve() + instrumented_to_original[behavior_resolved] = behavior_resolved + if registry_tf.benchmarking_file_path: + perf_resolved = registry_tf.benchmarking_file_path.resolve() + instrumented_to_original[perf_resolved] = perf_resolved + + # Check the bug + generated_behavior_path = Path("/workspace/target/src/test_saveCronStore__unit_test_0.test.ts").resolve() + existing_behavior_path = Path("/workspace/target/src/store__perfinstrumented.test.ts").resolve() + + # Existing test should be in mapping + assert existing_behavior_path in instrumented_to_original, \ + "Existing test should be in mapping" + + # After fix: Generated test SHOULD be in mapping (mapped to itself) + assert generated_behavior_path in instrumented_to_original, \ + "Generated test should be in mapping" + # Verify it's mapped to itself + assert instrumented_to_original[generated_behavior_path] == generated_behavior_path, \ + "Generated test should map to itself" From cac8f95672dbfae5b6dcde6b471dce2fe4578c11 Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Thu, 2 Apr 2026 18:30:15 +0000 Subject: [PATCH 2/2] fix: import TestType from codeflash.models.test_type in test file TestType is defined in test_type module and only re-imported in models.py, so mypy strict mode requires importing it directly from the source. Co-authored-by: mohammed ahmed --- codeflash/result/create_pr.py | 8 ++++++-- tests/result/test_pr_generated_tests.py | 3 ++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/codeflash/result/create_pr.py b/codeflash/result/create_pr.py index b2e0ca163..6af0dc70b 100644 --- a/codeflash/result/create_pr.py +++ b/codeflash/result/create_pr.py @@ -110,11 +110,15 @@ def existing_tests_source_for( if registry_tf.instrumented_behavior_file_path: generated_resolved = registry_tf.instrumented_behavior_file_path.resolve() non_generated_tests.add(generated_resolved) - logger.debug(f"[PR-DEBUG] Added generated test (behavior) to non_generated_tests: {generated_resolved}") + logger.debug( + f"[PR-DEBUG] Added generated test (behavior) to non_generated_tests: {generated_resolved}" + ) if registry_tf.benchmarking_file_path: generated_perf_resolved = registry_tf.benchmarking_file_path.resolve() non_generated_tests.add(generated_perf_resolved) - logger.debug(f"[PR-DEBUG] Added generated test (perf) to non_generated_tests: {generated_perf_resolved}") + logger.debug( + f"[PR-DEBUG] Added generated test (perf) to non_generated_tests: {generated_perf_resolved}" + ) # TODO confirm that original and optimized have the same keys all_invocation_ids = original_runtimes_all.keys() | optimized_runtimes_all.keys() logger.debug(f"[PR-DEBUG] Processing {len(all_invocation_ids)} invocation_ids") diff --git a/tests/result/test_pr_generated_tests.py b/tests/result/test_pr_generated_tests.py index 59acbc47d..e51dbebf6 100644 --- a/tests/result/test_pr_generated_tests.py +++ b/tests/result/test_pr_generated_tests.py @@ -7,7 +7,8 @@ import pytest -from codeflash.models.models import TestFile, TestFiles, TestType +from codeflash.models.models import TestFile, TestFiles +from codeflash.models.test_type import TestType @pytest.fixture