diff --git a/features/feature-spec-discovery.md b/features/feature-spec-discovery.md index 416ff2c..dea0da0 100644 --- a/features/feature-spec-discovery.md +++ b/features/feature-spec-discovery.md @@ -94,6 +94,22 @@ Add shared discovery infrastructure for Issues #125 and #126: centralized parser **When** `PythonTestMiner` executes **Then** it reports `MinerErrorKind.PARSE_ERROR` for parse failures and still returns items from valid files. +### Story 9: TypeScript/JavaScript test-function mining +**Scenario:** As a discovery pipeline, I need to extract Jest/Vitest test signals from TS/JS test files. +**Given** TypeScript/JavaScript test files selected from `FileIndex` +**When** `TypeScriptTestMiner` runs +**Then** it emits `DiscoveredItem(kind=TEST_FUNCTION)` entries for `it(...)` and `test(...)` calls, including nested calls inside `describe(...)` blocks. + +**Scenario:** As a miner maintainer, I need describe context and todo fidelity. +**Given** a nested `describe("Auth", () => { it.todo("pending test") })` block +**When** test items are emitted +**Then** metadata validates against `TestFunctionMeta` with `class_name="Auth"`, `call_style="it"`, and `has_todo=True`. + +**Scenario:** As a pipeline operator, I need resilient parse handling. +**Given** one malformed TypeScript/JavaScript test file and one valid file +**When** `TypeScriptTestMiner` executes +**Then** it reports `MinerErrorKind.PARSE_ERROR` for parse failures and still returns items from valid files. + ## Acceptance Criteria - Language abstraction returns `SupportedLanguage` members for `.py`, `.ts`, `.tsx`, `.js`, `.jsx`, `.mjs` and `None` otherwise. - `LanguageRegistry().parse(path_to_py_file)` returns `(node, SupportedLanguage.PYTHON)` for valid Python input. @@ -124,3 +140,8 @@ Add shared discovery infrastructure for Issues #125 and #126: centralized parser - `PythonTestMiner` uses precomputed frameworks from `ctx.frameworks[SupportedLanguage.PYTHON]` rather than re-detecting frameworks. - Python test metadata validates against `TestFunctionMeta`, including `is_parametrized` and `class_name` values. - Parse failures in individual test files set `MinerResult.error_kind=PARSE_ERROR` without aborting extraction from remaining files. +- `TypeScriptTestMiner` reads candidate files from `ctx.file_index.files_matching("*.test.ts", "*.spec.ts", "*.test.js", "*.spec.js", "*.test.tsx", "*.spec.tsx")` and does not walk the filesystem directly. +- `TypeScriptTestMiner` uses precomputed frameworks from `ctx.frameworks[SupportedLanguage.TYPESCRIPT]` / `ctx.frameworks[SupportedLanguage.JAVASCRIPT]` instead of re-detecting frameworks. +- TypeScript/JavaScript test metadata validates against `TestFunctionMeta`, including `call_style`, `has_todo`, and describe-block `class_name`. +- `.ts` test files emit `language=SupportedLanguage.TYPESCRIPT`; `.js` files emit `language=SupportedLanguage.JAVASCRIPT`. +- Confidence scoring is `0.9` for known framework + `.spec.` filename and `0.7` otherwise. diff --git a/src/specleft/discovery/miners/__init__.py b/src/specleft/discovery/miners/__init__.py index 359cba5..c059ee1 100644 --- a/src/specleft/discovery/miners/__init__.py +++ b/src/specleft/discovery/miners/__init__.py @@ -7,10 +7,12 @@ from specleft.discovery.miners.python.tests import PythonTestMiner from specleft.discovery.miners.shared.docstrings import DocstringMiner from specleft.discovery.miners.shared.readme import ReadmeOverviewMiner +from specleft.discovery.miners.typescript.tests import TypeScriptTestMiner __all__ = [ "DocstringMiner", "PythonTestMiner", "ReadmeOverviewMiner", + "TypeScriptTestMiner", "default_miners", ] diff --git a/src/specleft/discovery/miners/defaults.py b/src/specleft/discovery/miners/defaults.py index 51d8fd6..a6c3a31 100644 --- a/src/specleft/discovery/miners/defaults.py +++ b/src/specleft/discovery/miners/defaults.py @@ -10,6 +10,7 @@ from specleft.discovery.miners.python.tests import PythonTestMiner from specleft.discovery.miners.shared.docstrings import DocstringMiner from specleft.discovery.miners.shared.readme import ReadmeOverviewMiner +from specleft.discovery.miners.typescript.tests import TypeScriptTestMiner if TYPE_CHECKING: from specleft.discovery.pipeline import BaseMiner @@ -17,4 +18,9 @@ def default_miners() -> list[BaseMiner]: """Return default miners in deterministic execution order.""" - return [ReadmeOverviewMiner(), PythonTestMiner(), DocstringMiner()] + return [ + ReadmeOverviewMiner(), + PythonTestMiner(), + TypeScriptTestMiner(), + DocstringMiner(), + ] diff --git a/src/specleft/discovery/miners/typescript/__init__.py b/src/specleft/discovery/miners/typescript/__init__.py index 96fabc5..7235086 100644 --- a/src/specleft/discovery/miners/typescript/__init__.py +++ b/src/specleft/discovery/miners/typescript/__init__.py @@ -4,5 +4,6 @@ """TypeScript/JavaScript-specific discovery miners.""" from specleft.discovery.miners.typescript.jsdoc import extract_jsdoc_items +from specleft.discovery.miners.typescript.tests import TypeScriptTestMiner -__all__ = ["extract_jsdoc_items"] +__all__ = ["TypeScriptTestMiner", "extract_jsdoc_items"] diff --git a/src/specleft/discovery/miners/typescript/tests.py b/src/specleft/discovery/miners/typescript/tests.py new file mode 100644 index 0000000..da344b8 --- /dev/null +++ b/src/specleft/discovery/miners/typescript/tests.py @@ -0,0 +1,274 @@ +# SPDX-License-Identifier: Apache-2.0 +# Copyright (c) 2026 SpecLeft Contributors + +"""TypeScript/JavaScript test-function miner.""" + +from __future__ import annotations + +import time +import uuid +from pathlib import Path +from typing import Any + +from specleft.discovery.context import MinerContext +from specleft.discovery.miners.shared.common import elapsed_ms, line_number, node_text +from specleft.discovery.models import ( + DiscoveredItem, + ItemKind, + MinerErrorKind, + MinerResult, + SupportedLanguage, + TestFunctionMeta, +) + +_TEST_PATTERNS = ( + "*.test.ts", + "*.spec.ts", + "*.test.js", + "*.spec.js", + "*.test.tsx", + "*.spec.tsx", +) +_TEST_CALL_NAMES = frozenset({"it", "test"}) +_CALLBACK_NODE_TYPES = frozenset( + { + "arrow_function", + "function", + "function_expression", + } +) +_STRING_NODE_TYPES = frozenset({"string", "template_string"}) +_KNOWN_FRAMEWORKS = frozenset({"jest", "vitest"}) + + +class TypeScriptTestMiner: + """Extract Jest/Vitest test calls from TypeScript/JavaScript test files.""" + + miner_id = uuid.UUID("aa5151b6-3805-419c-a726-a56755300dda") + name = "typescript_test_functions" + languages = frozenset({SupportedLanguage.TYPESCRIPT, SupportedLanguage.JAVASCRIPT}) + + def mine(self, ctx: MinerContext) -> MinerResult: + started = time.perf_counter() + items: list[DiscoveredItem] = [] + parse_failures: list[Path] = [] + + for rel_path in ctx.file_index.files_matching(*_TEST_PATTERNS): + abs_path = ctx.root / rel_path + parsed = ctx.registry.parse(abs_path) + if parsed is None: + parse_failures.append(rel_path) + continue + + root_node, language = parsed + if language not in self.languages: + continue + + try: + source_bytes = abs_path.read_bytes() + except OSError: + parse_failures.append(rel_path) + continue + + framework = _primary_framework(ctx, language) + items.extend( + _extract_test_items( + root_node=root_node, + source_bytes=source_bytes, + file_path=rel_path, + language=language, + framework=framework, + ) + ) + + error_kind: MinerErrorKind | None = None + error: str | None = None + if parse_failures: + error_kind = MinerErrorKind.PARSE_ERROR + files = ", ".join(path.as_posix() for path in parse_failures) + error = f"Failed to parse TypeScript/JavaScript test files: {files}" + + return MinerResult( + miner_id=self.miner_id, + miner_name=self.name, + items=items, + error=error, + error_kind=error_kind, + duration_ms=elapsed_ms(started), + ) + + +def _primary_framework(ctx: MinerContext, language: SupportedLanguage) -> str: + frameworks = ctx.frameworks.get(language, []) + + if not frameworks and language is SupportedLanguage.JAVASCRIPT: + frameworks = ctx.frameworks.get(SupportedLanguage.TYPESCRIPT, []) + if not frameworks and language is SupportedLanguage.TYPESCRIPT: + frameworks = ctx.frameworks.get(SupportedLanguage.JAVASCRIPT, []) + + return frameworks[0] if frameworks else "unknown" + + +def _extract_test_items( + *, + root_node: Any, + source_bytes: bytes, + file_path: Path, + language: SupportedLanguage, + framework: str, +) -> list[DiscoveredItem]: + items: list[DiscoveredItem] = [] + + def walk(node: Any, describe_stack: list[str]) -> None: + describe_payload = _describe_payload(node, source_bytes) + if describe_payload is not None: + describe_name, callback = describe_payload + if callback is not None: + next_stack = describe_stack + if describe_name: + next_stack = [*describe_stack, describe_name] + walk(callback, next_stack) + return + + call_payload = _test_call_payload(node, source_bytes) + if call_payload is not None: + test_name, call_style, has_todo = call_payload + metadata = TestFunctionMeta( + framework=framework, + class_name=describe_stack[-1] if describe_stack else None, + has_docstring=False, + docstring=None, + is_parametrized=False, + call_style=call_style, + has_todo=has_todo, + ) + items.append( + DiscoveredItem( + kind=ItemKind.TEST_FUNCTION, + name=test_name, + file_path=file_path, + line_number=line_number(node), + language=language, + raw_text=None, + metadata=metadata.model_dump(), + confidence=_confidence_for(file_path, framework), + ) + ) + + for child in getattr(node, "named_children", ()): # pragma: no branch + walk(child, describe_stack) + + walk(root_node, []) + return items + + +def _describe_payload(node: Any, source_bytes: bytes) -> tuple[str | None, Any] | None: + if getattr(node, "type", "") != "call_expression": + return None + + function_node = node.child_by_field_name("function") + if function_node is None: + return None + + call_target, _ = _call_target(function_node, source_bytes) + if call_target != "describe": + return None + + args = _call_arguments(node) + describe_name = _first_string_arg(args, source_bytes) + callback = _callback_arg(args) + return describe_name, callback + + +def _test_call_payload( + node: Any, + source_bytes: bytes, +) -> tuple[str, str, bool] | None: + if getattr(node, "type", "") != "call_expression": + return None + + function_node = node.child_by_field_name("function") + if function_node is None: + return None + + call_target, has_todo = _call_target(function_node, source_bytes) + if call_target not in _TEST_CALL_NAMES: + return None + + name = _first_string_arg(_call_arguments(node), source_bytes) + if name is None: + return None + + return name, call_target, has_todo + + +def _call_target(function_node: Any, source_bytes: bytes) -> tuple[str | None, bool]: + text = node_text(function_node, source_bytes).strip() + + if text in _TEST_CALL_NAMES or text == "describe": + return text, False + + if text in {"it.todo", "test.todo"}: + return text.split(".", maxsplit=1)[0], True + + if getattr(function_node, "type", "") != "member_expression": + return None, False + + object_name = _strip_quotes(_field_value(function_node, "object", source_bytes)) + property_name = _strip_quotes(_field_value(function_node, "property", source_bytes)) + + if object_name in _TEST_CALL_NAMES and property_name == "todo": + return object_name, True + if object_name == "describe": + return object_name, False + + return None, False + + +def _call_arguments(node: Any) -> list[Any]: + arguments_node = node.child_by_field_name("arguments") + if arguments_node is None: + return [] + return list(getattr(arguments_node, "named_children", ())) + + +def _first_string_arg(args: list[Any], source_bytes: bytes) -> str | None: + for arg in args: + if getattr(arg, "type", "") not in _STRING_NODE_TYPES: + continue + value = _clean_string(node_text(arg, source_bytes)) + if value: + return value + return None + + +def _callback_arg(args: list[Any]) -> Any | None: + for arg in args: + if getattr(arg, "type", "") in _CALLBACK_NODE_TYPES: + return arg + return None + + +def _field_value(node: Any, field: str, source_bytes: bytes) -> str: + field_node = node.child_by_field_name(field) + if field_node is None: + return "" + return node_text(field_node, source_bytes).strip() + + +def _clean_string(raw: str) -> str: + stripped = raw.strip() + return _strip_quotes(stripped) + + +def _strip_quotes(value: str) -> str: + for quote in ('"', "'", "`"): + if value.startswith(quote) and value.endswith(quote) and len(value) >= 2: + return value[1:-1].strip() + return value.strip() + + +def _confidence_for(file_path: Path, framework: str) -> float: + if framework in _KNOWN_FRAMEWORKS and ".spec." in file_path.name: + return 0.9 + return 0.7 diff --git a/tests/discovery/miners/test_typescript_tests.py b/tests/discovery/miners/test_typescript_tests.py new file mode 100644 index 0000000..3f96fc6 --- /dev/null +++ b/tests/discovery/miners/test_typescript_tests.py @@ -0,0 +1,343 @@ +# SPDX-License-Identifier: Apache-2.0 +# Copyright (c) 2026 SpecLeft Contributors + +"""Tests for TypeScript/JavaScript test-function discovery miner.""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any + +from specleft.discovery.config import DiscoveryConfig +from specleft.discovery.context import MinerContext +from specleft.discovery.file_index import FileIndex +from specleft.discovery.miners.typescript.tests import TypeScriptTestMiner +from specleft.discovery.models import ( + MinerErrorKind, + SupportedLanguage, + TestFunctionMeta as _TestFunctionMeta, +) + + +@dataclass +class _FakeNode: + type: str + text_value: str = "" + children: list[_FakeNode] = field(default_factory=list) + named_children: list[_FakeNode] = field(default_factory=list) + fields: dict[str, _FakeNode] = field(default_factory=dict) + start_point: tuple[int, int] = (0, 0) + end_point: tuple[int, int] = (0, 0) + + @property + def text(self) -> bytes: + return self.text_value.encode("utf-8") + + def child_by_field_name(self, name: str) -> _FakeNode | None: + return self.fields.get(name) + + +class _RegistryStub: + def __init__( + self, mapping: dict[Path, tuple[Any, SupportedLanguage] | None] + ) -> None: + self._mapping = mapping + self.calls: list[Path] = [] + + def parse(self, file_path: Path) -> tuple[Any, SupportedLanguage] | None: + self.calls.append(file_path) + return self._mapping.get(file_path) + + +def _context( + root: Path, + registry: _RegistryStub, + *, + frameworks: dict[SupportedLanguage, list[str]] | None = None, +) -> MinerContext: + return MinerContext( + root=root, + registry=registry, # type: ignore[arg-type] + file_index=FileIndex(root), + frameworks=frameworks or {}, + config=DiscoveryConfig.default(), + ) + + +def _identifier(value: str, row: int) -> _FakeNode: + return _FakeNode( + type="identifier", + text_value=value, + start_point=(row, 0), + end_point=(row, len(value)), + ) + + +def _property_identifier(value: str, row: int) -> _FakeNode: + return _FakeNode( + type="property_identifier", + text_value=value, + start_point=(row, 0), + end_point=(row, len(value)), + ) + + +def _string(value: str, row: int) -> _FakeNode: + return _FakeNode( + type="string", + text_value=value, + start_point=(row, 0), + end_point=(row, len(value)), + ) + + +def _statement_block(statements: list[_FakeNode], row: int) -> _FakeNode: + return _FakeNode( + type="statement_block", + children=list(statements), + named_children=list(statements), + start_point=(row, 0), + end_point=(row + 1, 0), + ) + + +def _arrow_function(body: _FakeNode, row: int) -> _FakeNode: + return _FakeNode( + type="arrow_function", + children=[body], + named_children=[body], + fields={"body": body}, + start_point=(row, 0), + end_point=(row + 1, 0), + ) + + +def _member_expression(object_name: str, property_name: str, row: int) -> _FakeNode: + object_node = _identifier(object_name, row) + property_node = _property_identifier(property_name, row) + return _FakeNode( + type="member_expression", + children=[object_node, property_node], + named_children=[object_node, property_node], + fields={"object": object_node, "property": property_node}, + start_point=(row, 0), + end_point=(row, len(object_name) + len(property_name) + 1), + ) + + +def _call_expression(callee: _FakeNode, args: list[_FakeNode], row: int) -> _FakeNode: + arguments_node = _FakeNode( + type="arguments", + children=list(args), + named_children=list(args), + start_point=(row, 0), + end_point=(row, 0), + ) + return _FakeNode( + type="call_expression", + children=[callee, arguments_node], + named_children=[callee, arguments_node], + fields={"function": callee, "arguments": arguments_node}, + start_point=(row, 0), + end_point=(row + 1, 0), + ) + + +def _expression_statement(expression: _FakeNode, row: int) -> _FakeNode: + return _FakeNode( + type="expression_statement", + children=[expression], + named_children=[expression], + start_point=(row, 0), + end_point=(row + 1, 0), + ) + + +def _typescript_test_tree() -> _FakeNode: + it_call = _call_expression( + _identifier("it", 3), + [ + _string("'logs in valid user'", 3), + _arrow_function(_statement_block([], 3), 3), + ], + 3, + ) + todo_call = _call_expression( + _member_expression("it", "todo", 4), + [_string("'pending test'", 4)], + 4, + ) + test_call = _call_expression( + _identifier("test", 5), + [ + _string("'does thing'", 5), + _arrow_function(_statement_block([], 5), 5), + ], + 5, + ) + helper_call = _call_expression( + _identifier("expect", 6), + [_string("'not a test call'", 6)], + 6, + ) + + block = _statement_block( + [ + _expression_statement(it_call, 3), + _expression_statement(todo_call, 4), + _expression_statement(test_call, 5), + _expression_statement(helper_call, 6), + ], + 2, + ) + describe_call = _call_expression( + _identifier("describe", 2), + [ + _string("'Auth'", 2), + _arrow_function(block, 2), + ], + 2, + ) + + root_statement = _expression_statement(describe_call, 2) + return _FakeNode( + type="program", + children=[root_statement], + named_children=[root_statement], + start_point=(0, 0), + end_point=(8, 0), + ) + + +def _javascript_test_tree() -> _FakeNode: + call = _call_expression( + _identifier("test", 1), + [ + _string("'handles js path'", 1), + _arrow_function(_statement_block([], 1), 1), + ], + 1, + ) + statement = _expression_statement(call, 1) + return _FakeNode( + type="program", + children=[statement], + named_children=[statement], + start_point=(0, 0), + end_point=(3, 0), + ) + + +def _single_test_tree(name: str, row: int = 1) -> _FakeNode: + call = _call_expression( + _identifier("test", row), + [ + _string(f"'{name}'", row), + _arrow_function(_statement_block([], row), row), + ], + row, + ) + statement = _expression_statement(call, row) + return _FakeNode( + type="program", + children=[statement], + named_children=[statement], + start_point=(0, 0), + end_point=(row + 2, 0), + ) + + +def test_typescript_test_miner_extracts_describe_context_and_call_styles( + tmp_path: Path, +) -> None: + ts_file = tmp_path / "tests" / "auth.spec.ts" + js_file = tmp_path / "tests" / "helpers.test.js" + ignored = tmp_path / "src" / "helpers.ts" + + ts_file.parent.mkdir(parents=True) + ignored.parent.mkdir(parents=True) + + ts_file.write_text("describe('Auth', () => { it('x', () => {}) })\n") + js_file.write_text("test('js', () => {})\n") + ignored.write_text("export const helper = 1\n") + + registry = _RegistryStub( + { + ts_file: (_typescript_test_tree(), SupportedLanguage.TYPESCRIPT), + js_file: (_javascript_test_tree(), SupportedLanguage.JAVASCRIPT), + ignored: (_javascript_test_tree(), SupportedLanguage.TYPESCRIPT), + } + ) + + result = TypeScriptTestMiner().mine( + _context( + tmp_path, + registry, + frameworks={ + SupportedLanguage.TYPESCRIPT: ["jest"], + SupportedLanguage.JAVASCRIPT: ["vitest"], + }, + ) + ) + + assert result.error is None + assert result.error_kind is None + assert registry.calls == [ts_file, js_file] + assert len(result.items) == 4 + assert all( + isinstance(item.typed_meta(), _TestFunctionMeta) for item in result.items + ) + + by_name = {item.name: item for item in result.items} + assert set(by_name) == { + "logs in valid user", + "pending test", + "does thing", + "handles js path", + } + + assert by_name["logs in valid user"].metadata["class_name"] == "Auth" + assert by_name["pending test"].metadata["class_name"] == "Auth" + assert by_name["pending test"].metadata["has_todo"] is True + assert by_name["pending test"].metadata["call_style"] == "it" + assert by_name["does thing"].metadata["call_style"] == "test" + + assert by_name["logs in valid user"].language == SupportedLanguage.TYPESCRIPT + assert by_name["handles js path"].language == SupportedLanguage.JAVASCRIPT + + assert by_name["logs in valid user"].metadata["framework"] == "jest" + assert by_name["handles js path"].metadata["framework"] == "vitest" + + assert by_name["logs in valid user"].confidence == 0.9 + assert by_name["pending test"].confidence == 0.9 + assert by_name["handles js path"].confidence == 0.7 + + +def test_typescript_test_miner_reports_parse_failures_and_keeps_valid_items( + tmp_path: Path, +) -> None: + good_file = tmp_path / "tests" / "ok.test.ts" + bad_file = tmp_path / "tests" / "bad.spec.js" + + good_file.parent.mkdir(parents=True) + good_file.write_text("test('ok', () => {})\n") + bad_file.write_text("test('broken'\n") + + registry = _RegistryStub( + { + good_file: (_single_test_tree("ok path"), SupportedLanguage.TYPESCRIPT), + bad_file: None, + } + ) + + result = TypeScriptTestMiner().mine(_context(tmp_path, registry)) + + assert result.error_kind == MinerErrorKind.PARSE_ERROR + assert result.error is not None + assert "tests/bad.spec.js" in result.error + assert len(result.items) == 1 + assert result.items[0].name == "ok path" + assert result.items[0].metadata["framework"] == "unknown" + assert result.items[0].metadata["call_style"] == "test" + assert result.items[0].confidence == 0.7