diff --git a/features/feature-spec-discovery.md b/features/feature-spec-discovery.md index 623634a..6e64157 100644 --- a/features/feature-spec-discovery.md +++ b/features/feature-spec-discovery.md @@ -1,7 +1,7 @@ -# Feature Spec: Discovery language registry and file indexing +# Feature Spec: Discovery pipeline foundations ## Purpose -Add shared discovery infrastructure for Issue #125: centralized parser abstraction and one-pass filesystem indexing. +Add shared discovery infrastructure for Issues #125 and #126: centralized parser abstraction, one-pass filesystem indexing, framework/config detection, and pipeline orchestration. ## User Stories @@ -33,6 +33,35 @@ Add shared discovery infrastructure for Issue #125: centralized parser abstracti **When** calling `detect_project_languages(index)` **Then** it returns detected languages above the ratio threshold, computed against total indexed files. +### Story 4: Discovery configuration and framework detection +**Scenario:** As a pipeline maintainer, I need project-local discovery settings. +**Given** a repository root with `[tool.specleft.discovery]` in `pyproject.toml` +**When** I load `DiscoveryConfig.from_pyproject(root)` +**Then** it should return configured values with safe defaults for missing/invalid fields. + +**Scenario:** As a discovery pipeline, I need framework signals shared across miners. +**Given** a repository with `pytest` configuration and matching test files +**When** I call `FrameworkDetector().detect(root, file_index)` +**Then** it should return `{SupportedLanguage.PYTHON: ["pytest"]}`. + +### Story 5: Orchestrated miner execution +**Scenario:** As a discovery pipeline, I need deterministic and resilient miner execution. +**Given** a set of registered miners +**When** one miner raises an exception +**Then** the pipeline records the error in that miner result and continues running the remaining miners. + +**Scenario:** As a pipeline consumer, I need correct filtering semantics. +**Given** detected project languages and miner language scopes +**When** a miner has no overlap with detected languages +**Then** it is skipped silently. +**And** language-agnostic miners (`languages = frozenset()`) always run. + +### Story 6: Default pipeline wiring +**Scenario:** As a command entrypoint (`specleft discover` / `specleft start`), I need one constructor that wires everything. +**Given** a project root +**When** I call `build_default_pipeline(root).run()` +**Then** a `DiscoveryReport` is returned with run duration, detected languages, miner results, and total item counts. + ## Acceptance Criteria - Language abstraction returns `SupportedLanguage` members for `.py`, `.ts`, `.tsx`, `.js`, `.jsx`, `.mjs` and `None` otherwise. - `LanguageRegistry().parse(path_to_py_file)` returns `(node, SupportedLanguage.PYTHON)` for valid Python input. @@ -41,5 +70,17 @@ Add shared discovery infrastructure for Issue #125: centralized parser abstracti - Grammar/parser handling is cached and does not recreate parser objects per call. - `FileIndex` builds once per root and exposes query helpers used by miners. - `detect_project_languages()` thresholds are applied against total indexed files, not only supported-language files. -- Tests cover registry parsing, caching behavior, index filtering, and language detection thresholding. -- Feature spec is updated to document the new discovery layer behavior for issue #125. +- `DiscoveryConfig.from_pyproject(root)` loads custom settings from `[tool.specleft.discovery]`. +- `DiscoveryConfig.from_pyproject(root)` returns defaults when the section is missing. +- `FrameworkDetector.detect()` returns `{PYTHON: ["pytest"]}` on the SpecLeft repo. +- `FrameworkDetector` is called once per pipeline run and the result is shared through one `MinerContext`. +- `MinerContext` is constructed once and reused for all miner calls in that run. +- Per-miner exceptions are captured into `MinerResult.error`/`error_kind` without stopping the run. +- `DiscoveryReport.total_items` excludes items from miners that errored. +- Miners with no language overlap are skipped; language-agnostic miners always run. +- `register()` raises `ValueError` for duplicate `miner_id` UUIDs. +- `MinerResult.miner_id` and `miner_name` in output are populated from the miner instance. +- `build_default_pipeline(root).run()` returns a valid `DiscoveryReport` even when all registered miners fail. +- Integration on the SpecLeft repository produces `report.total_items > 0`. +- Tests cover config parsing, framework detection, pipeline registration/filtering/error isolation, and default pipeline integration. +- Feature spec is updated to document the discovery layer behavior introduced in issues #125 and #126. diff --git a/src/specleft/commands/features.py b/src/specleft/commands/features.py index e670049..c360ef3 100644 --- a/src/specleft/commands/features.py +++ b/src/specleft/commands/features.py @@ -815,7 +815,7 @@ def features_add( _ensure_interactive(interactive) if interactive: - title_input = click.prompt("Feature title", type=str).strip() + title_input = click.prompt("Feature title").strip() default_feature_id = generate_feature_id(title_input) feature_id_input = click.prompt( "Feature ID", @@ -1036,8 +1036,8 @@ def features_add_scenario( _ensure_interactive(interactive) if interactive: - feature_input = click.prompt("Feature ID", type=str).strip() - title_input = click.prompt("Scenario title", type=str).strip() + feature_input = click.prompt("Feature ID").strip() + title_input = click.prompt("Scenario title").strip() default_scenario_id = generate_scenario_id(title_input) scenario_id_input = click.prompt( "Scenario ID", diff --git a/src/specleft/discovery/__init__.py b/src/specleft/discovery/__init__.py index 75b2a66..096509a 100644 --- a/src/specleft/discovery/__init__.py +++ b/src/specleft/discovery/__init__.py @@ -2,6 +2,14 @@ from specleft.discovery.models import * # noqa: F401,F403 +from specleft.discovery.config import DiscoveryConfig +from specleft.discovery.context import MinerContext from specleft.discovery.file_index import DEFAULT_EXCLUDE_DIRS, FileIndex +from specleft.discovery.framework_detector import FrameworkDetector from specleft.discovery.language_detect import detect_project_languages from specleft.discovery.language_registry import SUPPORTED_EXTENSIONS, LanguageRegistry +from specleft.discovery.pipeline import ( + BaseMiner, + DiscoveryPipeline, + build_default_pipeline, +) diff --git a/src/specleft/discovery/config.py b/src/specleft/discovery/config.py new file mode 100644 index 0000000..371112b --- /dev/null +++ b/src/specleft/discovery/config.py @@ -0,0 +1,121 @@ +# SPDX-License-Identifier: Apache-2.0 +# Copyright (c) 2026 SpecLeft Contributors + +"""User-facing configuration for discovery orchestration.""" + +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path +from typing import Any + +from specleft.discovery.file_index import DEFAULT_EXCLUDE_DIRS + + +@dataclass(frozen=True) +class DiscoveryConfig: + """Configuration for discovery pipeline and miners.""" + + exclude_dirs: frozenset[str] = DEFAULT_EXCLUDE_DIRS + source_dirs: tuple[str, ...] = ("src", "lib", "app", "core") + max_git_commits: int = 200 + + @classmethod + def from_pyproject(cls, root: Path) -> DiscoveryConfig: + """Load discovery config from ``[tool.specleft.discovery]`` if present.""" + data = _load_pyproject(root) + section = _extract_discovery_section(data) + if not section: + return cls.default() + + default = cls.default() + + raw_exclude_dirs = section.get("exclude_dirs") + exclude_dirs = ( + frozenset(value for value in raw_exclude_dirs if isinstance(value, str)) + if isinstance(raw_exclude_dirs, list) + else default.exclude_dirs + ) + if not exclude_dirs: + exclude_dirs = default.exclude_dirs + + raw_source_dirs = section.get("source_dirs") + source_dirs = ( + tuple(value for value in raw_source_dirs if isinstance(value, str)) + if isinstance(raw_source_dirs, list) + else default.source_dirs + ) + if not source_dirs: + source_dirs = default.source_dirs + + raw_max_git_commits = section.get("max_git_commits") + if isinstance(raw_max_git_commits, int) and raw_max_git_commits > 0: + max_git_commits = raw_max_git_commits + else: + max_git_commits = default.max_git_commits + + return cls( + exclude_dirs=exclude_dirs, + source_dirs=source_dirs, + max_git_commits=max_git_commits, + ) + + @classmethod + def default(cls) -> DiscoveryConfig: + """Return config with all defaults.""" + return cls() + + +def _extract_discovery_section(data: dict[str, Any]) -> dict[str, Any]: + tool = data.get("tool") + if not isinstance(tool, dict): + return {} + + specleft = tool.get("specleft") + if not isinstance(specleft, dict): + return {} + + discovery = specleft.get("discovery") + if not isinstance(discovery, dict): + return {} + + return discovery + + +def _load_pyproject(root: Path) -> dict[str, Any]: + pyproject_path = root / "pyproject.toml" + if not pyproject_path.is_file(): + return {} + + try: + raw = pyproject_path.read_bytes() + except OSError: + return {} + + toml_module = _resolve_toml_loader() + if toml_module is None: + return {} + + try: + parsed = toml_module.loads(raw.decode("utf-8")) + except Exception: + return {} + + if not isinstance(parsed, dict): + return {} + + return parsed + + +def _resolve_toml_loader() -> Any | None: + try: + import tomllib + + return tomllib + except ModuleNotFoundError: + try: + import tomli # type: ignore[import-not-found] + + return tomli + except ModuleNotFoundError: + return None diff --git a/src/specleft/discovery/context.py b/src/specleft/discovery/context.py new file mode 100644 index 0000000..16527a5 --- /dev/null +++ b/src/specleft/discovery/context.py @@ -0,0 +1,25 @@ +# SPDX-License-Identifier: Apache-2.0 +# Copyright (c) 2026 SpecLeft Contributors + +"""Shared miner context built once per pipeline run.""" + +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path + +from specleft.discovery.config import DiscoveryConfig +from specleft.discovery.file_index import FileIndex +from specleft.discovery.language_registry import LanguageRegistry +from specleft.discovery.models import SupportedLanguage + + +@dataclass(frozen=True) +class MinerContext: + """Immutable context passed to every miner.""" + + root: Path + registry: LanguageRegistry + file_index: FileIndex + frameworks: dict[SupportedLanguage, list[str]] + config: DiscoveryConfig diff --git a/src/specleft/discovery/framework_detector.py b/src/specleft/discovery/framework_detector.py new file mode 100644 index 0000000..669dcf6 --- /dev/null +++ b/src/specleft/discovery/framework_detector.py @@ -0,0 +1,136 @@ +# SPDX-License-Identifier: Apache-2.0 +# Copyright (c) 2026 SpecLeft Contributors + +"""Framework detection orchestrator and shared detection context.""" + +from __future__ import annotations + +from dataclasses import dataclass +from functools import cached_property +from pathlib import Path +from typing import cast + +from specleft.discovery.file_index import FileIndex +from specleft.discovery.frameworks import io +from specleft.discovery.frameworks.python.policies import PythonFrameworkPolicy +from specleft.discovery.frameworks.types import LanguagePolicy +from specleft.discovery.frameworks.typescript.policies import TypeScriptFrameworkPolicy +from specleft.discovery.models import SupportedLanguage + + +class FrameworkDetector: + """Detect test frameworks by combining manifest and file-pattern signals.""" + + def __init__(self, policies: tuple[LanguagePolicy, ...] | None = None) -> None: + self._policies = policies if policies is not None else _default_policies() + + def detect( + self, + root: Path, + file_index: FileIndex, + ) -> dict[SupportedLanguage, list[str]]: + """Detect framework names by language.""" + ctx = DetectionContext(root=root, file_index=file_index) + detected: dict[SupportedLanguage, list[str]] = {} + + for policy in self._policies: + frameworks = policy.detect(ctx) + if frameworks: + detected[policy.language] = frameworks + + return detected + + +@dataclass(frozen=True) +class DetectionContext: + """Cached evidence shared by all framework policies and rules.""" + + root: Path + file_index: FileIndex + + @cached_property + def pyproject(self) -> dict[str, object]: + return io.load_pyproject(self.root) + + @cached_property + def package_json(self) -> dict[str, object]: + return io.load_package_json(self.root) + + @cached_property + def requirements_lines(self) -> tuple[str, ...]: + lines: list[str] = [] + for requirements_file in sorted(self.root.glob("requirements*.txt")): + raw = io.read_text(requirements_file) + if raw is None: + continue + lines.extend(line.strip().lower() for line in raw.splitlines()) + return tuple(lines) + + @cached_property + def python_test_files(self) -> list[Path]: + return [ + path + for path in self.file_index.files_matching("test_*.py") + if io.is_project_file(path) + ] + + @cached_property + def conftest_files(self) -> list[Path]: + return [ + path + for path in self.file_index.files_matching("conftest.py") + if io.is_project_file(path) + ] + + @cached_property + def has_unittest_testcases(self) -> bool: + for python_file in self.python_test_files: + source = io.read_text(self.root / python_file) + if source is None: + continue + if io.contains_unittest_testcase(source): + return True + + return False + + @cached_property + def typescript_manifest_frameworks(self) -> set[str]: + return io.manifest_typescript_frameworks(self.package_json) + + @cached_property + def jest_configs(self) -> list[Path]: + return self.file_index.files_matching( + "jest.config.js", + "jest.config.ts", + "jest.config.mjs", + "jest.config.cjs", + "jest.config.json", + ) + + @cached_property + def vite_configs(self) -> list[Path]: + return self.file_index.files_matching( + "vite.config.js", + "vite.config.ts", + "vite.config.mjs", + "vite.config.cjs", + ) + + @cached_property + def vitest_tests(self) -> list[Path]: + return self.file_index.files_matching( + "*.test.ts", + "*.test.tsx", + "*.test.js", + "*.test.jsx", + ) + + +def _default_policies() -> tuple[LanguagePolicy, ...]: + return cast( + tuple[LanguagePolicy, ...], + (PythonFrameworkPolicy(), TypeScriptFrameworkPolicy()), + ) + + +__all__ = ["DetectionContext", "FrameworkDetector"] diff --git a/src/specleft/discovery/frameworks/__init__.py b/src/specleft/discovery/frameworks/__init__.py new file mode 100644 index 0000000..7672edd --- /dev/null +++ b/src/specleft/discovery/frameworks/__init__.py @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: Apache-2.0 +# Copyright (c) 2026 SpecLeft Contributors + +"""Framework detection building blocks.""" + +from specleft.discovery.frameworks.types import ( + FrameworkRule, + FrameworkSignals, + LanguagePolicy, +) + +__all__ = ["FrameworkSignals", "FrameworkRule", "LanguagePolicy"] diff --git a/src/specleft/discovery/frameworks/io.py b/src/specleft/discovery/frameworks/io.py new file mode 100644 index 0000000..748ceb3 --- /dev/null +++ b/src/specleft/discovery/frameworks/io.py @@ -0,0 +1,153 @@ +# SPDX-License-Identifier: Apache-2.0 +# Copyright (c) 2026 SpecLeft Contributors + +"""IO and parsing helpers for framework detection.""" + +from __future__ import annotations + +import json +import re +from pathlib import Path +from typing import Any + +_PYTEST_REQUIREMENT_PATTERN = re.compile(r"^\s*pytest(?:\b|[<>=!~])") +_UNITTEST_CLASS_PATTERN = re.compile( + r"class\s+\w+\s*\(\s*(?:\w+\.)?TestCase\s*\)\s*:", +) + + +def is_pytest_requirement_line(line: str) -> bool: + """Return whether a requirements line references pytest.""" + return _PYTEST_REQUIREMENT_PATTERN.match(line) is not None + + +def contains_unittest_testcase(source: str) -> bool: + """Return whether source contains an explicit unittest TestCase class.""" + if "TestCase" not in source: + return False + if "unittest.TestCase" in source: + return True + return _UNITTEST_CLASS_PATTERN.search(source) is not None + + +def manifest_signals_pytest(pyproject: dict[str, Any]) -> bool: + """Return whether pyproject manifest indicates pytest usage.""" + tool = pyproject.get("tool") + if isinstance(tool, dict): + pytest_tool = tool.get("pytest") + if isinstance(pytest_tool, dict) and isinstance( + pytest_tool.get("ini_options"), + dict, + ): + return True + + build_system = pyproject.get("build-system") + if not isinstance(build_system, dict): + return False + + requires = build_system.get("requires") + if not isinstance(requires, list): + return False + + return any(isinstance(dep, str) and "pytest" in dep.lower() for dep in requires) + + +def manifest_typescript_frameworks(package_json: dict[str, Any]) -> set[str]: + """Return TS/JS frameworks inferred from package.json dependencies.""" + framework_names: set[str] = set() + dependency_groups = ( + package_json.get("dependencies"), + package_json.get("devDependencies"), + ) + + for group in dependency_groups: + if not isinstance(group, dict): + continue + + lower_keys = {str(key).lower() for key in group} + if {"jest", "@jest/core"} & lower_keys: + framework_names.add("jest") + if {"vitest", "@vitest/ui"} & lower_keys: + framework_names.add("vitest") + + return framework_names + + +def read_text(path: Path) -> str | None: + """Read UTF-8 text from path, returning None on file/decode errors.""" + try: + return path.read_text(encoding="utf-8") + except OSError: + return None + except UnicodeDecodeError: + return None + + +def load_pyproject(root: Path) -> dict[str, Any]: + """Load pyproject TOML as dict, returning {} when unavailable/invalid.""" + path = root / "pyproject.toml" + if not path.is_file(): + return {} + + raw = read_text(path) + if raw is None: + return {} + + toml_module = resolve_toml_loader() + if toml_module is None: + return {} + + try: + parsed = toml_module.loads(raw) + except Exception: + return {} + + if not isinstance(parsed, dict): + return {} + + return parsed + + +def load_package_json(root: Path) -> dict[str, Any]: + """Load package.json as dict, returning {} when unavailable/invalid.""" + path = root / "package.json" + if not path.is_file(): + return {} + + raw = read_text(path) + if raw is None: + return {} + + try: + parsed = json.loads(raw) + except json.JSONDecodeError: + return {} + + if not isinstance(parsed, dict): + return {} + + return parsed + + +def is_project_file(path: Path) -> bool: + """Return whether an indexed path should be considered project-owned.""" + if not path.parts: + return False + if path.parts[0].startswith("."): + return False + return "site-packages" not in path.parts + + +def resolve_toml_loader() -> Any | None: + """Return TOML module object (`tomllib` or fallback `tomli`).""" + try: + import tomllib + + return tomllib + except ModuleNotFoundError: + try: + import tomli # type: ignore[import-not-found] + + return tomli + except ModuleNotFoundError: + return None diff --git a/src/specleft/discovery/frameworks/python/__init__.py b/src/specleft/discovery/frameworks/python/__init__.py new file mode 100644 index 0000000..55b26cf --- /dev/null +++ b/src/specleft/discovery/frameworks/python/__init__.py @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: Apache-2.0 +# Copyright (c) 2026 SpecLeft Contributors + +"""Python framework detection rules and policies.""" + +from specleft.discovery.frameworks.python.policies import PythonFrameworkPolicy +from specleft.discovery.frameworks.python.rules import PytestRule, UnittestRule + +__all__ = ["PythonFrameworkPolicy", "PytestRule", "UnittestRule"] diff --git a/src/specleft/discovery/frameworks/python/policies.py b/src/specleft/discovery/frameworks/python/policies.py new file mode 100644 index 0000000..b0511b4 --- /dev/null +++ b/src/specleft/discovery/frameworks/python/policies.py @@ -0,0 +1,45 @@ +# SPDX-License-Identifier: Apache-2.0 +# Copyright (c) 2026 SpecLeft Contributors + +"""Python framework policy resolution.""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import TYPE_CHECKING, cast + +from specleft.discovery.frameworks.python.rules import PytestRule, UnittestRule +from specleft.discovery.frameworks.types import FrameworkRule +from specleft.discovery.models import SupportedLanguage + +if TYPE_CHECKING: + from specleft.discovery.framework_detector import DetectionContext + + +@dataclass(frozen=True) +class PythonFrameworkPolicy: + """Resolve Python frameworks with explicit ambiguity handling.""" + + language: SupportedLanguage = SupportedLanguage.PYTHON + rules: tuple[FrameworkRule, ...] = cast( + tuple[FrameworkRule, ...], + (PytestRule(), UnittestRule()), + ) + + def detect(self, ctx: DetectionContext) -> list[str]: + signals = {rule.name: rule.signals(ctx) for rule in self.rules} + pytest_signals = signals.get("pytest") + + # Explicit pytest config with no test files is ambiguous. + if pytest_signals and pytest_signals.manifest and not ctx.python_test_files: + return ["unknown"] + + frameworks: list[str] = [] + for rule in self.rules: + evidence = signals[rule.name] + if (evidence.manifest or evidence.pattern or evidence.confirmed) and ( + evidence.confirmed + ): + frameworks.append(rule.name) + + return frameworks diff --git a/src/specleft/discovery/frameworks/python/rules.py b/src/specleft/discovery/frameworks/python/rules.py new file mode 100644 index 0000000..0f2e5d9 --- /dev/null +++ b/src/specleft/discovery/frameworks/python/rules.py @@ -0,0 +1,48 @@ +# SPDX-License-Identifier: Apache-2.0 +# Copyright (c) 2026 SpecLeft Contributors + +"""Python framework detection rules.""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import TYPE_CHECKING + +from specleft.discovery.frameworks import io +from specleft.discovery.frameworks.types import FrameworkSignals +from specleft.discovery.models import SupportedLanguage + +if TYPE_CHECKING: + from specleft.discovery.framework_detector import DetectionContext + + +@dataclass(frozen=True) +class PytestRule: + """Detect pytest using manifest + file pattern confirmation.""" + + name: str = "pytest" + language: SupportedLanguage = SupportedLanguage.PYTHON + + def signals(self, ctx: DetectionContext) -> FrameworkSignals: + manifest = io.manifest_signals_pytest(ctx.pyproject) or any( + io.is_pytest_requirement_line(line) for line in ctx.requirements_lines + ) + pattern = bool(ctx.python_test_files or ctx.conftest_files) + return FrameworkSignals(manifest=manifest, pattern=pattern, confirmed=pattern) + + +@dataclass(frozen=True) +class UnittestRule: + """Detect unittest using requirement hints + class confirmation.""" + + name: str = "unittest" + language: SupportedLanguage = SupportedLanguage.PYTHON + + def signals(self, ctx: DetectionContext) -> FrameworkSignals: + manifest = any("unittest" in line for line in ctx.requirements_lines) + confirmed = ctx.has_unittest_testcases + return FrameworkSignals( + manifest=manifest, + pattern=confirmed, + confirmed=confirmed, + ) diff --git a/src/specleft/discovery/frameworks/types.py b/src/specleft/discovery/frameworks/types.py new file mode 100644 index 0000000..d198e7e --- /dev/null +++ b/src/specleft/discovery/frameworks/types.py @@ -0,0 +1,40 @@ +# SPDX-License-Identifier: Apache-2.0 +# Copyright (c) 2026 SpecLeft Contributors + +"""Shared types for framework detection rules and policies.""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import TYPE_CHECKING, Protocol + +from specleft.discovery.models import SupportedLanguage + +if TYPE_CHECKING: + from specleft.discovery.framework_detector import DetectionContext + + +@dataclass(frozen=True) +class FrameworkSignals: + """Signals collected for one framework candidate.""" + + manifest: bool = False + pattern: bool = False + confirmed: bool = False + + +class FrameworkRule(Protocol): + """Rule contract for one framework within one language.""" + + name: str + language: SupportedLanguage + + def signals(self, ctx: DetectionContext) -> FrameworkSignals: ... + + +class LanguagePolicy(Protocol): + """Policy contract for resolving frameworks of a language.""" + + language: SupportedLanguage + + def detect(self, ctx: DetectionContext) -> list[str]: ... diff --git a/src/specleft/discovery/frameworks/typescript/__init__.py b/src/specleft/discovery/frameworks/typescript/__init__.py new file mode 100644 index 0000000..65ef29d --- /dev/null +++ b/src/specleft/discovery/frameworks/typescript/__init__.py @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: Apache-2.0 +# Copyright (c) 2026 SpecLeft Contributors + +"""TypeScript framework detection rules and policies.""" + +from specleft.discovery.frameworks.typescript.policies import TypeScriptFrameworkPolicy +from specleft.discovery.frameworks.typescript.rules import JestRule, VitestRule + +__all__ = ["TypeScriptFrameworkPolicy", "JestRule", "VitestRule"] diff --git a/src/specleft/discovery/frameworks/typescript/policies.py b/src/specleft/discovery/frameworks/typescript/policies.py new file mode 100644 index 0000000..72f476e --- /dev/null +++ b/src/specleft/discovery/frameworks/typescript/policies.py @@ -0,0 +1,36 @@ +# SPDX-License-Identifier: Apache-2.0 +# Copyright (c) 2026 SpecLeft Contributors + +"""TypeScript/JavaScript framework policy resolution.""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import TYPE_CHECKING, cast + +from specleft.discovery.frameworks.types import FrameworkRule +from specleft.discovery.frameworks.typescript.rules import JestRule, VitestRule +from specleft.discovery.models import SupportedLanguage + +if TYPE_CHECKING: + from specleft.discovery.framework_detector import DetectionContext + + +@dataclass(frozen=True) +class TypeScriptFrameworkPolicy: + """Resolve TS/JS frameworks where file patterns are source of truth.""" + + language: SupportedLanguage = SupportedLanguage.TYPESCRIPT + rules: tuple[FrameworkRule, ...] = cast( + tuple[FrameworkRule, ...], + (JestRule(), VitestRule()), + ) + + def detect(self, ctx: DetectionContext) -> list[str]: + signals = {rule.name: rule.signals(ctx) for rule in self.rules} + + pattern_hits = [rule.name for rule in self.rules if signals[rule.name].pattern] + if pattern_hits: + return pattern_hits + + return [rule.name for rule in self.rules if signals[rule.name].manifest] diff --git a/src/specleft/discovery/frameworks/typescript/rules.py b/src/specleft/discovery/frameworks/typescript/rules.py new file mode 100644 index 0000000..f4fd88c --- /dev/null +++ b/src/specleft/discovery/frameworks/typescript/rules.py @@ -0,0 +1,41 @@ +# SPDX-License-Identifier: Apache-2.0 +# Copyright (c) 2026 SpecLeft Contributors + +"""TypeScript/JavaScript framework detection rules.""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import TYPE_CHECKING + +from specleft.discovery.frameworks.types import FrameworkSignals +from specleft.discovery.models import SupportedLanguage + +if TYPE_CHECKING: + from specleft.discovery.framework_detector import DetectionContext + + +@dataclass(frozen=True) +class JestRule: + """Detect Jest via manifest and config-file patterns.""" + + name: str = "jest" + language: SupportedLanguage = SupportedLanguage.TYPESCRIPT + + def signals(self, ctx: DetectionContext) -> FrameworkSignals: + manifest = "jest" in ctx.typescript_manifest_frameworks + pattern = bool(ctx.jest_configs) + return FrameworkSignals(manifest=manifest, pattern=pattern, confirmed=pattern) + + +@dataclass(frozen=True) +class VitestRule: + """Detect Vitest via manifest and vite+test patterns.""" + + name: str = "vitest" + language: SupportedLanguage = SupportedLanguage.TYPESCRIPT + + def signals(self, ctx: DetectionContext) -> FrameworkSignals: + manifest = "vitest" in ctx.typescript_manifest_frameworks + pattern = bool(ctx.vite_configs and ctx.vitest_tests) + return FrameworkSignals(manifest=manifest, pattern=pattern, confirmed=pattern) diff --git a/src/specleft/discovery/pipeline.py b/src/specleft/discovery/pipeline.py new file mode 100644 index 0000000..ed4d9fe --- /dev/null +++ b/src/specleft/discovery/pipeline.py @@ -0,0 +1,245 @@ +# SPDX-License-Identifier: Apache-2.0 +# Copyright (c) 2026 SpecLeft Contributors + +"""Discovery pipeline orchestration.""" + +from __future__ import annotations + +import time +import uuid +from pathlib import Path +from typing import Protocol + +from specleft.discovery.config import DiscoveryConfig +from specleft.discovery.context import MinerContext +from specleft.discovery.file_index import FileIndex +from specleft.discovery.framework_detector import FrameworkDetector +from specleft.discovery.language_detect import detect_project_languages +from specleft.discovery.language_registry import LanguageRegistry +from specleft.discovery.models import ( + DiscoveryReport, + DiscoveredItem, + DocstringMeta, + ItemKind, + MinerErrorKind, + MinerResult, + SupportedLanguage, +) + + +class BaseMiner(Protocol): + """Protocol for pipeline miner implementations.""" + + miner_id: uuid.UUID + name: str + languages: frozenset[SupportedLanguage] + + def mine(self, ctx: MinerContext) -> MinerResult: ... + + +class DiscoveryPipeline: + """Run registered miners with shared context and failure isolation.""" + + def __init__( + self, + root: Path, + languages: list[SupportedLanguage] | None = None, + *, + config: DiscoveryConfig | None = None, + registry: LanguageRegistry | None = None, + file_index: FileIndex | None = None, + framework_detector: FrameworkDetector | None = None, + ) -> None: + self._root = root.resolve() + self._config = config if config is not None else DiscoveryConfig.default() + self._registry = registry if registry is not None else LanguageRegistry() + self._file_index = ( + file_index + if file_index is not None + else FileIndex(self._root, exclude_dirs=self._config.exclude_dirs) + ) + self._framework_detector = ( + framework_detector + if framework_detector is not None + else FrameworkDetector() + ) + self._miners: dict[uuid.UUID, BaseMiner] = {} + self._languages = _normalize_languages(languages) + + def register(self, miner: BaseMiner) -> None: + """Register a miner by unique UUID.""" + if miner.miner_id in self._miners: + raise ValueError(f"Miner UUID already registered: {miner.miner_id}") + self._miners[miner.miner_id] = miner + + def run(self) -> DiscoveryReport: + """Execute all eligible miners and aggregate their results.""" + started = time.perf_counter() + + detected_languages = self._languages + if detected_languages is None: + detected_languages = detect_project_languages(self._file_index) + + frameworks = self._framework_detector.detect(self._root, self._file_index) + context = MinerContext( + root=self._root, + registry=self._registry, + file_index=self._file_index, + frameworks=frameworks, + config=self._config, + ) + + language_set = set(detected_languages) + miner_results: list[MinerResult] = [] + errors: list[str] = [] + + for miner in self._miners.values(): + if miner.languages and miner.languages.isdisjoint(language_set): + continue + + miner_started = time.perf_counter() + try: + raw_result = miner.mine(context) + miner_duration_ms = _elapsed_ms(miner_started) + result = raw_result.model_copy( + update={ + "miner_id": miner.miner_id, + "miner_name": miner.name, + "duration_ms": miner_duration_ms, + } + ) + except Exception as exc: + miner_duration_ms = _elapsed_ms(miner_started) + error_kind = _error_kind_for(exc) + error_message = f"{miner.name}: {exc}" + result = MinerResult( + miner_id=miner.miner_id, + miner_name=miner.name, + items=[], + error=error_message, + error_kind=error_kind, + duration_ms=miner_duration_ms, + ) + + miner_results.append(result) + if result.error is not None: + errors.append(result.error) + + total_items = sum( + len(result.items) for result in miner_results if result.error is None + ) + duration_ms = _elapsed_ms(started) + + return DiscoveryReport( + project_root=self._root, + languages_detected=detected_languages, + miner_results=miner_results, + total_items=total_items, + errors=errors, + duration_ms=duration_ms, + ) + + +def build_default_pipeline( + root: Path, + languages: list[SupportedLanguage] | None = None, + config: DiscoveryConfig | None = None, +) -> DiscoveryPipeline: + """Build pipeline with default config and built-in miners registered.""" + resolved_config = ( + config if config is not None else DiscoveryConfig.from_pyproject(root) + ) + pipeline = DiscoveryPipeline(root, languages=languages, config=resolved_config) + + for miner in _default_miners(): + pipeline.register(miner) + + return pipeline + + +def _elapsed_ms(started: float) -> int: + return max(0, int((time.perf_counter() - started) * 1000)) + + +def _error_kind_for(exc: Exception) -> MinerErrorKind: + if isinstance(exc, PermissionError): + return MinerErrorKind.PERMISSION + if isinstance(exc, TimeoutError): + return MinerErrorKind.TIMEOUT + return MinerErrorKind.UNKNOWN + + +def _normalize_languages( + languages: list[SupportedLanguage] | None, +) -> list[SupportedLanguage] | None: + if languages is None: + return None + + seen: set[SupportedLanguage] = set() + normalized: list[SupportedLanguage] = [] + for language in languages: + if language in seen: + continue + seen.add(language) + normalized.append(language) + return normalized + + +class _ReadmeMiner: + """Minimal built-in miner used as default pipeline baseline.""" + + miner_id = uuid.UUID("2f87e7a5-a362-4adc-a005-84457b6abc04") + name = "readme_overview" + languages: frozenset[SupportedLanguage] = frozenset() + + def mine(self, ctx: MinerContext) -> MinerResult: + readme_paths = ( + Path("README.md"), + Path("README.rst"), + Path("README.txt"), + ) + + items: list[DiscoveredItem] = [] + for rel_path in readme_paths: + abs_path = ctx.root / rel_path + if not abs_path.is_file(): + continue + + try: + raw_text = abs_path.read_text(encoding="utf-8") + except OSError: + continue + except UnicodeDecodeError: + continue + + first_line = next( + (line.strip() for line in raw_text.splitlines() if line.strip()), + "Project overview", + ) + item = DiscoveredItem( + kind=ItemKind.DOCSTRING, + name="project_overview", + file_path=rel_path, + line_number=1, + language=None, + raw_text=first_line, + metadata=DocstringMeta( + target_kind="module", + target_name="README", + text=first_line, + ).model_dump(), + confidence=0.3, + ) + items.append(item) + break + + return MinerResult( + miner_id=self.miner_id, + miner_name=self.name, + items=items, + duration_ms=0, + ) + + +def _default_miners() -> list[BaseMiner]: + return [_ReadmeMiner()] diff --git a/tests/discovery/test_config.py b/tests/discovery/test_config.py new file mode 100644 index 0000000..0d68569 --- /dev/null +++ b/tests/discovery/test_config.py @@ -0,0 +1,64 @@ +# SPDX-License-Identifier: Apache-2.0 +# Copyright (c) 2026 SpecLeft Contributors + +"""Tests for discovery pipeline configuration.""" + +from __future__ import annotations + +from pathlib import Path + +from specleft.discovery.config import DiscoveryConfig +from specleft.discovery.file_index import DEFAULT_EXCLUDE_DIRS + + +def test_discovery_config_from_pyproject_loads_custom_values(tmp_path: Path) -> None: + (tmp_path / "pyproject.toml").write_text(""" +[tool.specleft.discovery] +exclude_dirs = [".git", "node_modules", "dist"] +source_dirs = ["src", "lib", "service"] +max_git_commits = 50 +""".strip()) + + config = DiscoveryConfig.from_pyproject(tmp_path) + + assert config.exclude_dirs == frozenset({".git", "node_modules", "dist"}) + assert config.source_dirs == ("src", "lib", "service") + assert config.max_git_commits == 50 + + +def test_discovery_config_from_pyproject_uses_defaults_when_section_missing( + tmp_path: Path, +) -> None: + (tmp_path / "pyproject.toml").write_text(""" +[tool.black] +line-length = 88 +""".strip()) + + config = DiscoveryConfig.from_pyproject(tmp_path) + + assert config.exclude_dirs == DEFAULT_EXCLUDE_DIRS + assert config.source_dirs == ("src", "lib", "app", "core") + assert config.max_git_commits == 200 + + +def test_discovery_config_default_returns_default_values() -> None: + config = DiscoveryConfig.default() + + assert config.exclude_dirs == DEFAULT_EXCLUDE_DIRS + assert config.source_dirs == ("src", "lib", "app", "core") + assert config.max_git_commits == 200 + + +def test_discovery_config_invalid_values_fall_back_to_defaults(tmp_path: Path) -> None: + (tmp_path / "pyproject.toml").write_text(""" +[tool.specleft.discovery] +exclude_dirs = [] +source_dirs = [] +max_git_commits = -1 +""".strip()) + + config = DiscoveryConfig.from_pyproject(tmp_path) + + assert config.exclude_dirs == DEFAULT_EXCLUDE_DIRS + assert config.source_dirs == ("src", "lib", "app", "core") + assert config.max_git_commits == 200 diff --git a/tests/discovery/test_framework_detector.py b/tests/discovery/test_framework_detector.py new file mode 100644 index 0000000..3f015a1 --- /dev/null +++ b/tests/discovery/test_framework_detector.py @@ -0,0 +1,89 @@ +# SPDX-License-Identifier: Apache-2.0 +# Copyright (c) 2026 SpecLeft Contributors + +"""Tests for discovery framework detection.""" + +from __future__ import annotations + +from pathlib import Path + +from specleft.discovery.file_index import FileIndex +from specleft.discovery.framework_detector import FrameworkDetector +from specleft.discovery.models import SupportedLanguage + + +def test_framework_detector_detects_pytest_from_pyproject_and_patterns( + tmp_path: Path, +) -> None: + (tmp_path / "pyproject.toml").write_text(""" +[tool.pytest.ini_options] +addopts = ["-q"] +""".strip()) + tests_dir = tmp_path / "tests" + tests_dir.mkdir() + (tests_dir / "test_sample.py").write_text( + "def test_ok() -> None:\n assert True\n" + ) + + index = FileIndex(tmp_path) + frameworks = FrameworkDetector().detect(tmp_path, index) + + assert frameworks == {SupportedLanguage.PYTHON: ["pytest"]} + + +def test_framework_detector_returns_unknown_for_pytest_manifest_without_tests( + tmp_path: Path, +) -> None: + (tmp_path / "pyproject.toml").write_text(""" +[tool.pytest.ini_options] +addopts = ["-q"] +""".strip()) + (tmp_path / "src").mkdir() + (tmp_path / "src" / "module.py").write_text("def value() -> int:\n return 1\n") + + index = FileIndex(tmp_path) + frameworks = FrameworkDetector().detect(tmp_path, index) + + assert frameworks == {SupportedLanguage.PYTHON: ["unknown"]} + + +def test_framework_detector_detects_jest_and_vitest_by_patterns( + tmp_path: Path, +) -> None: + (tmp_path / "package.json").write_text( + '{"devDependencies": {"jest": "^29.0.0", "vitest": "^1.0.0"}}' + ) + (tmp_path / "jest.config.ts").write_text("export default {}\n") + (tmp_path / "vite.config.ts").write_text("export default {}\n") + (tmp_path / "src").mkdir() + (tmp_path / "src" / "math.test.ts").write_text("describe('x', () => {})\n") + + index = FileIndex(tmp_path) + frameworks = FrameworkDetector().detect(tmp_path, index) + + assert frameworks == {SupportedLanguage.TYPESCRIPT: ["jest", "vitest"]} + + +def test_framework_detector_detects_specleft_repo_pytest_only() -> None: + root = Path(__file__).resolve().parents[2] + index = FileIndex(root) + + frameworks = FrameworkDetector().detect(root, index) + + assert frameworks == {SupportedLanguage.PYTHON: ["pytest"]} + + +def test_framework_detector_supports_injected_policies(tmp_path: Path) -> None: + class _CustomPolicy: + language = SupportedLanguage.PYTHON + + def detect(self, ctx: object) -> list[str]: + _ = ctx + return ["custom-framework"] + + index = FileIndex(tmp_path) + detector = FrameworkDetector(policies=(_CustomPolicy(),)) + + frameworks = detector.detect(tmp_path, index) + + assert frameworks == {SupportedLanguage.PYTHON: ["custom-framework"]} diff --git a/tests/discovery/test_pipeline.py b/tests/discovery/test_pipeline.py new file mode 100644 index 0000000..e4de6e6 --- /dev/null +++ b/tests/discovery/test_pipeline.py @@ -0,0 +1,303 @@ +# SPDX-License-Identifier: Apache-2.0 +# Copyright (c) 2026 SpecLeft Contributors + +"""Tests for discovery pipeline orchestration.""" + +from __future__ import annotations + +import uuid +from pathlib import Path + +import pytest + +import specleft.discovery.pipeline as pipeline_module +from specleft.discovery.config import DiscoveryConfig +from specleft.discovery.context import MinerContext +from specleft.discovery.models import ( + DiscoveryReport, + DiscoveredItem, + DocstringMeta, + ItemKind, + MinerErrorKind, + MinerResult, + SupportedLanguage, +) +from specleft.discovery.pipeline import DiscoveryPipeline, build_default_pipeline + + +def _doc_item(name: str) -> DiscoveredItem: + return DiscoveredItem( + kind=ItemKind.DOCSTRING, + name=name, + file_path=Path("README.md"), + line_number=1, + language=None, + raw_text=name, + metadata=DocstringMeta( + target_kind="module", + target_name="README", + text=name, + ).model_dump(), + confidence=0.5, + ) + + +class _StaticMiner: + def __init__( + self, + miner_id: uuid.UUID, + name: str, + languages: frozenset[SupportedLanguage], + items: list[DiscoveredItem], + ) -> None: + self.miner_id = miner_id + self.name = name + self.languages = languages + self._items = items + + def mine(self, ctx: MinerContext) -> MinerResult: + _ = ctx + return MinerResult( + miner_id=uuid.UUID("11111111-1111-1111-1111-111111111111"), + miner_name="ignored", + items=list(self._items), + duration_ms=0, + ) + + +class _FailingMiner: + def __init__( + self, + miner_id: uuid.UUID, + name: str, + languages: frozenset[SupportedLanguage], + message: str, + ) -> None: + self.miner_id = miner_id + self.name = name + self.languages = languages + self._message = message + + def mine(self, ctx: MinerContext) -> MinerResult: + _ = ctx + raise RuntimeError(self._message) + + +class _RecordingMiner: + def __init__( + self, miner_id: uuid.UUID, name: str, contexts: list[MinerContext] + ) -> None: + self.miner_id = miner_id + self.name = name + self.languages: frozenset[SupportedLanguage] = frozenset() + self._contexts = contexts + + def mine(self, ctx: MinerContext) -> MinerResult: + self._contexts.append(ctx) + return MinerResult( + miner_id=self.miner_id, + miner_name=self.name, + items=[_doc_item(self.name)], + duration_ms=0, + ) + + +class _FrameworkDetectorSpy: + def __init__(self) -> None: + self.calls = 0 + + def detect( + self, + root: Path, + file_index: object, + ) -> dict[SupportedLanguage, list[str]]: + _ = file_index + self.calls += 1 + assert root.is_absolute() + return {SupportedLanguage.PYTHON: ["pytest"]} + + +def test_register_raises_for_duplicate_miner_uuid(tmp_path: Path) -> None: + pipeline = DiscoveryPipeline(tmp_path) + + duplicate_id = uuid.UUID("cccccccc-cccc-cccc-cccc-cccccccccccc") + first = _StaticMiner(duplicate_id, "first", frozenset(), [_doc_item("one")]) + second = _StaticMiner(duplicate_id, "second", frozenset(), [_doc_item("two")]) + + pipeline.register(first) + with pytest.raises(ValueError, match="already registered"): + pipeline.register(second) + + +def test_pipeline_calls_framework_detector_once_and_shares_context( + tmp_path: Path, +) -> None: + (tmp_path / "README.md").write_text("# Demo\n") + + contexts: list[MinerContext] = [] + detector = _FrameworkDetectorSpy() + pipeline = DiscoveryPipeline(tmp_path, framework_detector=detector) + pipeline.register( + _RecordingMiner( + uuid.UUID("aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"), + "recording_a", + contexts, + ) + ) + pipeline.register( + _RecordingMiner( + uuid.UUID("bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb"), + "recording_b", + contexts, + ) + ) + + report = pipeline.run() + + assert detector.calls == 1 + assert len(contexts) == 2 + assert contexts[0] is contexts[1] + assert contexts[0].frameworks == {SupportedLanguage.PYTHON: ["pytest"]} + assert report.total_items == 2 + + +def test_pipeline_captures_miner_exception_and_continues(tmp_path: Path) -> None: + pipeline = DiscoveryPipeline(tmp_path, languages=[SupportedLanguage.PYTHON]) + pipeline.register( + _StaticMiner( + uuid.UUID("11111111-2222-3333-4444-555555555555"), + "ok_miner", + frozenset({SupportedLanguage.PYTHON}), + [_doc_item("one"), _doc_item("two")], + ) + ) + pipeline.register( + _FailingMiner( + uuid.UUID("66666666-7777-8888-9999-000000000000"), + "bad_miner", + frozenset({SupportedLanguage.PYTHON}), + "boom", + ) + ) + + report = pipeline.run() + + assert len(report.miner_results) == 2 + assert report.total_items == 2 + + error_result = next(r for r in report.miner_results if r.miner_name == "bad_miner") + assert error_result.error == "bad_miner: boom" + assert error_result.error_kind == MinerErrorKind.UNKNOWN + assert report.errors == ["bad_miner: boom"] + + +def test_pipeline_language_filter_skips_non_overlapping_miners(tmp_path: Path) -> None: + pipeline = DiscoveryPipeline(tmp_path, languages=[SupportedLanguage.PYTHON]) + + pipeline.register( + _StaticMiner( + uuid.UUID("10000000-0000-0000-0000-000000000000"), + "python_miner", + frozenset({SupportedLanguage.PYTHON}), + [_doc_item("python")], + ) + ) + pipeline.register( + _StaticMiner( + uuid.UUID("20000000-0000-0000-0000-000000000000"), + "agnostic_miner", + frozenset(), + [_doc_item("agnostic")], + ) + ) + pipeline.register( + _StaticMiner( + uuid.UUID("30000000-0000-0000-0000-000000000000"), + "typescript_miner", + frozenset({SupportedLanguage.TYPESCRIPT}), + [_doc_item("typescript")], + ) + ) + + report = pipeline.run() + + names = [result.miner_name for result in report.miner_results] + assert names == ["python_miner", "agnostic_miner"] + assert report.total_items == 2 + + +def test_pipeline_sets_result_miner_id_and_name_from_miner(tmp_path: Path) -> None: + expected_id = uuid.UUID("12345678-1234-1234-1234-1234567890ab") + expected_name = "identity_miner" + + pipeline = DiscoveryPipeline(tmp_path, languages=[SupportedLanguage.PYTHON]) + pipeline.register( + _StaticMiner( + expected_id, + expected_name, + frozenset({SupportedLanguage.PYTHON}), + [_doc_item("x")], + ) + ) + + report = pipeline.run() + + result = report.miner_results[0] + assert result.miner_id == expected_id + assert result.miner_name == expected_name + + +def test_build_default_pipeline_returns_report_even_if_all_miners_error( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + (tmp_path / "pyproject.toml").write_text("[tool.specleft]\n") + + miners = [ + _FailingMiner( + uuid.UUID("aaaaaaaa-1111-1111-1111-111111111111"), + "fail_one", + frozenset(), + "first", + ), + _FailingMiner( + uuid.UUID("bbbbbbbb-2222-2222-2222-222222222222"), + "fail_two", + frozenset(), + "second", + ), + ] + monkeypatch.setattr(pipeline_module, "_default_miners", lambda: miners) + + pipeline = build_default_pipeline(tmp_path) + report = pipeline.run() + + assert isinstance(report, DiscoveryReport) + assert len(report.miner_results) == 2 + assert all(result.error is not None for result in report.miner_results) + assert report.total_items == 0 + + +def test_build_default_pipeline_uses_config_from_pyproject(tmp_path: Path) -> None: + (tmp_path / "pyproject.toml").write_text(""" +[tool.specleft.discovery] +exclude_dirs = [".git", "dist"] +source_dirs = ["src", "service"] +max_git_commits = 42 +""".strip()) + + pipeline = build_default_pipeline(tmp_path, config=None) + + assert pipeline._config == DiscoveryConfig( # pyright: ignore[reportPrivateUsage] + exclude_dirs=frozenset({".git", "dist"}), + source_dirs=("src", "service"), + max_git_commits=42, + ) + + +def test_build_default_pipeline_integration_on_specleft_repo_has_items() -> None: + root = Path(__file__).resolve().parents[2] + + report = build_default_pipeline(root).run() + + assert report.total_items > 0