From 044c48e860df75018b2c644c5c04b2407753145a Mon Sep 17 00:00:00 2001 From: Avi Fenesh Date: Sun, 22 Mar 2026 23:49:32 +0200 Subject: [PATCH 1/6] chore: update plugin references from git-map/repo-map to repo-intel Both git-map and repo-map plugins are being consolidated into repo-intel. Updates /repo-map init suggestions to /repo-intel init, and updates getPluginRoot('git-map') and lib/git-map/queries to repo-intel equivalents. --- agents/exploration-agent.md | 4 ++-- agents/implementation-agent.md | 2 +- agents/planning-agent.md | 10 +++++----- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/agents/exploration-agent.md b/agents/exploration-agent.md index 251ad1a..0741700 100644 --- a/agents/exploration-agent.md +++ b/agents/exploration-agent.md @@ -54,7 +54,7 @@ const repoMap = require(path.join(pluginRoot, 'lib/repo-map')); const map = repoMap.load(process.cwd()); if (!map) { - console.log('Repo map not found. Consider: /repo-map init'); + console.log('Repo map not found. Consider: /repo-intel init'); } else { console.log(`Repo map loaded: ${Object.keys(map.files).length} files, ${map.stats.totalSymbols} symbols`); } @@ -109,7 +109,7 @@ if (fs.existsSync(mapFile)) { console.log(`Repo intel loaded: hotspots=${repoIntel.hotspots?.length || 0}, bugspots=${repoIntel.bugspots?.length || 0}`); } else { - console.log('Repo intel not found. Consider: /git-map build'); + console.log('Repo intel not found. Consider: /repo-intel init'); } ``` diff --git a/agents/implementation-agent.md b/agents/implementation-agent.md index ee0975f..761e0c7 100644 --- a/agents/implementation-agent.md +++ b/agents/implementation-agent.md @@ -91,7 +91,7 @@ const map = repoMap.load(process.cwd()); if (map) { console.log(`Repo map loaded: ${map.stats.totalSymbols} symbols`); } else { - console.log('Repo map not found. Consider /repo-map init for faster lookups.'); + console.log('Repo map not found. Consider /repo-intel init for faster lookups.'); } ``` diff --git a/agents/planning-agent.md b/agents/planning-agent.md index 9847361..825d389 100644 --- a/agents/planning-agent.md +++ b/agents/planning-agent.md @@ -76,7 +76,7 @@ const repoMap = require(path.join(pluginRoot, 'lib/repo-map')); const map = repoMap.load(process.cwd()); if (!map) { - console.log('Repo map missing. Suggest /repo-map init if needed.'); + console.log('Repo map missing. Suggest /repo-intel init if needed.'); } ``` @@ -201,19 +201,19 @@ If the exploration output includes repo-intel data (hotspots, bugspots, coupling const { getPluginRoot } = require('./lib/cross-platform'); const path = require('path'); -const pluginRoot = getPluginRoot('git-map'); +const pluginRoot = getPluginRoot('repo-intel'); let repoIntel = null; if (pluginRoot) { try { - const queries = require(path.join(pluginRoot, 'lib/git-map/queries')); + const queries = require(path.join(pluginRoot, 'lib/repo-intel/queries')); repoIntel = queries; console.log('[OK] repo-intel available - augmenting risk assessment'); } catch { - console.log('[WARN] git-map plugin found but queries failed to load'); + console.log('[WARN] repo-intel plugin found but queries failed to load'); } } else { - console.log('[WARN] git-map plugin not installed - using heuristic risk assessment only'); + console.log('[WARN] repo-intel plugin not installed - using heuristic risk assessment only'); } ``` From 95edaad98df413d30a58d02322cbf7b2558500b0 Mon Sep 17 00:00:00 2001 From: Avi Fenesh Date: Wed, 25 Mar 2026 19:13:42 +0200 Subject: [PATCH 2/6] refactor: decouple delivery agents to prepare-delivery plugin Move delivery-validator, test-coverage-checker agents and orchestrate-review, validate-delivery skills to prepare-delivery plugin. next-task now references these as prepare-delivery:* cross-plugin agents. Phases 8-11 functionality unchanged - just owned by prepare-delivery now. --- AGENTS.md | 12 ++++++++---- CLAUDE.md | 12 ++++++++---- commands/next-task.md | 6 +++--- components.json | 6 +----- hooks/hooks.json | 2 +- 5 files changed, 21 insertions(+), 17 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 33bc4bb..b49ec56 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -6,20 +6,24 @@ - ci-fixer - ci-monitor -- delivery-validator - exploration-agent - implementation-agent - planning-agent - simple-fixer - task-discoverer -- test-coverage-checker - worktree-manager ## Skills - discover-tasks -- orchestrate-review -- validate-delivery + +## Cross-Plugin Agents (from prepare-delivery) + +Phases 8-11 use agents/skills from the prepare-delivery plugin: +- `prepare-delivery:delivery-validator` +- `prepare-delivery:test-coverage-checker` +- `prepare-delivery:orchestrate-review` (skill) +- `prepare-delivery:validate-delivery` (skill) ## Commands diff --git a/CLAUDE.md b/CLAUDE.md index 33bc4bb..b49ec56 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -6,20 +6,24 @@ - ci-fixer - ci-monitor -- delivery-validator - exploration-agent - implementation-agent - planning-agent - simple-fixer - task-discoverer -- test-coverage-checker - worktree-manager ## Skills - discover-tasks -- orchestrate-review -- validate-delivery + +## Cross-Plugin Agents (from prepare-delivery) + +Phases 8-11 use agents/skills from the prepare-delivery plugin: +- `prepare-delivery:delivery-validator` +- `prepare-delivery:test-coverage-checker` +- `prepare-delivery:orchestrate-review` (skill) +- `prepare-delivery:validate-delivery` (skill) ## Commands diff --git a/commands/next-task.md b/commands/next-task.md index 682d9ff..57ee3f3 100644 --- a/commands/next-task.md +++ b/commands/next-task.md @@ -363,7 +363,7 @@ await Task({ ## Phase 8: Pre-Review Gates -**Parallel**: `deslop:deslop-agent` (Task) + `next-task:test-coverage-checker` (Task) + `/simplify` (Skill, orchestrator) +**Parallel**: `deslop:deslop-agent` (Task) + `prepare-delivery:test-coverage-checker` (Task) + `/simplify` (Skill, orchestrator) ```javascript workflowState.startPhase('pre-review-gates'); @@ -407,7 +407,7 @@ Thoroughness: normal Return structured results between === DESLOP_RESULT === markers.` }), - Task({ subagent_type: "next-task:test-coverage-checker", prompt: `Validate test coverage.${testGapsContext}` }), + Task({ subagent_type: "prepare-delivery:test-coverage-checker", prompt: `Validate test coverage.${testGapsContext}` }), Skill({ name: "simplify" }) ]); @@ -582,7 +582,7 @@ workflowState.completePhase({ approved, iterations, remaining }); ```javascript workflowState.startPhase('delivery-validation'); const result = await Task({ - subagent_type: "next-task:delivery-validator", + subagent_type: "prepare-delivery:delivery-validator", prompt: `Validate completion. Check: tests pass, build passes, requirements met.` }); if (!result.approved) { diff --git a/components.json b/components.json index 4c19835..d1ce16b 100644 --- a/components.json +++ b/components.json @@ -2,19 +2,15 @@ "agents": [ "ci-fixer", "ci-monitor", - "delivery-validator", "exploration-agent", "implementation-agent", "planning-agent", "simple-fixer", "task-discoverer", - "test-coverage-checker", "worktree-manager" ], "skills": [ - "discover-tasks", - "orchestrate-review", - "validate-delivery" + "discover-tasks" ], "commands": [ "delivery-approval", diff --git a/hooks/hooks.json b/hooks/hooks.json index 9cecbb1..cd60b20 100644 --- a/hooks/hooks.json +++ b/hooks/hooks.json @@ -5,7 +5,7 @@ "hooks": [ { "type": "prompt", - "prompt": "\n## Workflow Enforcement - SubagentStop Hook\n\nA subagent has completed. Determine and execute the next workflow phase.\n\n\n### Verification Gates\n\nBefore proceeding to any next phase, verify the required previous steps completed.\n\n---\n\n## Gate 0: Before Task Discovery (Phase 2)\n\n**Required**: Policy decisions must be cached in preference file.\n\n```javascript\nconst sourceCache = require(path.join(pluginRoot, 'lib/sources/source-cache'));\nconst pref = sourceCache.getPreference();\nconst state = workflowState.readState();\nif (!pref && !state?.policy?.taskSource) {\n console.error('[BLOCKED] Cannot proceed to task discovery - no policy cached');\n console.error('Phase 1 must capture user decisions before Phase 2 starts');\n return { blocked: true, reason: 'preference-not-cached' };\n}\nconsole.log(`[VERIFIED] Policy cached: source=${pref?.source || state.policy.taskSource.source}`);\n```\n\n**Forbidden**:\n- Proceeding to task-discoverer without user policy decisions\n- Skipping the AskUserQuestion step in Phase 1\n\n---\n\n## Gate 1: Before Exploration (Phase 4)\n\n**Required**: Worktree must have been created via `next-task:worktree-manager` agent.\n\n```javascript\nconst state = workflowState.readState();\nif (!state.worktree || !state.worktree.path) {\n console.error('[BLOCKED] Cannot proceed to exploration - worktree not created');\n return { blocked: true, reason: 'worktree-missing' };\n}\nconsole.log(`[VERIFIED] Worktree exists: ${state.worktree.path}`);\n```\n\n**Forbidden**:\n- Using `git checkout -b` directly\n- Using `git branch` directly\n- Proceeding to exploration without worktree verification\n\n---\n\n## Gate 2: Before Delivery Validation (Phase 10)\n\n**Required**: Review loop must have run with proper iterations.\n\n```javascript\nconst state = workflowState.readState();\nconst reviewResult = state.phases?.reviewLoop;\nif (!reviewResult || reviewResult.iterations < 1) {\n console.error('[BLOCKED] Review loop must run at least 1 iteration');\n return { blocked: true, reason: 'review-iterations-zero' };\n}\nif (!reviewResult.approved && !reviewResult.orchestratorOverride) {\n console.error('[BLOCKED] Review not approved');\n return { blocked: true, reason: 'review-not-approved' };\n}\nconsole.log(`[VERIFIED] Review: ${reviewResult.iterations} iterations`);\n```\n\n**Forbidden**:\n- Skipping to delivery without running review loop\n- Running review with 0 iterations\n\n---\n\n## Gate 3: Before ship:ship Merge\n\n**Required**: All PR comments must be addressed.\n\nEnforced in ship:ship command:\n1. Phase 4 CI & Review Monitor Loop must run\n2. 3-minute wait for auto-reviewers must complete\n3. All comments must be addressed before merge\n\n\n---\n\n\n### Decision Tree\n\n1. **worktree-manager completed**: Verify worktree path, then run exploration-agent\n2. **implementation-agent completed**: Run deslop:deslop-agent + test-coverage-checker + /simplify (parallel)\n3. **pre-review gates completed**: Run review loop (min 1 iteration), then delivery-validator\n4. **delivery-validator completed**: If approved, run sync-docs:sync-docs-agent. If not, return to implementation.\n5. **sync-docs:sync-docs-agent completed**: Invoke ship:ship command\n\n\n---\n\n\n### Enforcement\n\nEvery step exists for a reason. Taking shortcuts defeats the purpose of automation.\n\n- Do not skip worktree-manager (enables parallel task isolation)\n- Do not skip review iterations (catches bugs humans miss)\n- Do not skip 3-minute wait in ship:ship (auto-reviewers need time)\n- Do not skip addressing PR comments (blocks merge)\n\nIf you think a step is unnecessary, you are wrong.\n\n\n---\n\n\n### Workflow Sequence\n\n0. [GATE] policy-cached (preference file must exist)\n1. task-discoverer\n2. [GATE] worktree-manager (must use agent)\n3. [VERIFY] worktree exists\n4. exploration-agent\n5. planning-agent\n6. implementation-agent\n7. pre-review gates (deslop + test-coverage + /simplify)\n8. review loop (1+ iterations)\n9. [GATE] delivery-validator\n10. sync-docs:sync-docs-agent\n11. ship:ship command (must run Phase 4 loop)\n\n\nReturn: {\"ok\": true, \"nextPhase\": \"\", \"verified\": [\"\"]}\n" + "prompt": "\n## Workflow Enforcement - SubagentStop Hook\n\nA subagent has completed. Determine and execute the next workflow phase.\n\n\n### Verification Gates\n\nBefore proceeding to any next phase, verify the required previous steps completed.\n\n---\n\n## Gate 0: Before Task Discovery (Phase 2)\n\n**Required**: Policy decisions must be cached in preference file.\n\n```javascript\nconst sourceCache = require(path.join(pluginRoot, 'lib/sources/source-cache'));\nconst pref = sourceCache.getPreference();\nconst state = workflowState.readState();\nif (!pref && !state?.policy?.taskSource) {\n console.error('[BLOCKED] Cannot proceed to task discovery - no policy cached');\n console.error('Phase 1 must capture user decisions before Phase 2 starts');\n return { blocked: true, reason: 'preference-not-cached' };\n}\nconsole.log(`[VERIFIED] Policy cached: source=${pref?.source || state.policy.taskSource.source}`);\n```\n\n**Forbidden**:\n- Proceeding to task-discoverer without user policy decisions\n- Skipping the AskUserQuestion step in Phase 1\n\n---\n\n## Gate 1: Before Exploration (Phase 4)\n\n**Required**: Worktree must have been created via `next-task:worktree-manager` agent.\n\n```javascript\nconst state = workflowState.readState();\nif (!state.worktree || !state.worktree.path) {\n console.error('[BLOCKED] Cannot proceed to exploration - worktree not created');\n return { blocked: true, reason: 'worktree-missing' };\n}\nconsole.log(`[VERIFIED] Worktree exists: ${state.worktree.path}`);\n```\n\n**Forbidden**:\n- Using `git checkout -b` directly\n- Using `git branch` directly\n- Proceeding to exploration without worktree verification\n\n---\n\n## Gate 2: Before Delivery Validation (Phase 10)\n\n**Required**: Review loop must have run with proper iterations.\n\n```javascript\nconst state = workflowState.readState();\nconst reviewResult = state.phases?.reviewLoop;\nif (!reviewResult || reviewResult.iterations < 1) {\n console.error('[BLOCKED] Review loop must run at least 1 iteration');\n return { blocked: true, reason: 'review-iterations-zero' };\n}\nif (!reviewResult.approved && !reviewResult.orchestratorOverride) {\n console.error('[BLOCKED] Review not approved');\n return { blocked: true, reason: 'review-not-approved' };\n}\nconsole.log(`[VERIFIED] Review: ${reviewResult.iterations} iterations`);\n```\n\n**Forbidden**:\n- Skipping to delivery without running review loop\n- Running review with 0 iterations\n\n---\n\n## Gate 3: Before ship:ship Merge\n\n**Required**: All PR comments must be addressed.\n\nEnforced in ship:ship command:\n1. Phase 4 CI & Review Monitor Loop must run\n2. 3-minute wait for auto-reviewers must complete\n3. All comments must be addressed before merge\n\n\n---\n\n\n### Decision Tree\n\n1. **worktree-manager completed**: Verify worktree path, then run exploration-agent\n2. **implementation-agent completed**: Run deslop:deslop-agent + prepare-delivery:test-coverage-checker + /simplify (parallel)\n3. **pre-review gates completed**: Run review loop (min 1 iteration), then prepare-delivery:delivery-validator\n4. **prepare-delivery:delivery-validator completed**: If approved, run sync-docs:sync-docs-agent. If not, return to implementation.\n5. **sync-docs:sync-docs-agent completed**: Invoke ship:ship command\n\n\n---\n\n\n### Enforcement\n\nEvery step exists for a reason. Taking shortcuts defeats the purpose of automation.\n\n- Do not skip worktree-manager (enables parallel task isolation)\n- Do not skip review iterations (catches bugs humans miss)\n- Do not skip 3-minute wait in ship:ship (auto-reviewers need time)\n- Do not skip addressing PR comments (blocks merge)\n\nIf you think a step is unnecessary, you are wrong.\n\n\n---\n\n\n### Workflow Sequence\n\n0. [GATE] policy-cached (preference file must exist)\n1. task-discoverer\n2. [GATE] worktree-manager (must use agent)\n3. [VERIFY] worktree exists\n4. exploration-agent\n5. planning-agent\n6. implementation-agent\n7. pre-review gates (deslop + prepare-delivery:test-coverage-checker + /simplify)\n8. review loop (1+ iterations)\n9. [GATE] prepare-delivery:delivery-validator\n10. sync-docs:sync-docs-agent\n11. ship:ship command (must run Phase 4 loop)\n\n\nReturn: {\"ok\": true, \"nextPhase\": \"\", \"verified\": [\"\"]}\n" } ] } From ec118b75edc721c0100fc4f573e2f9f2441d8310 Mon Sep 17 00:00:00 2001 From: Avi Fenesh Date: Wed, 25 Mar 2026 19:45:14 +0200 Subject: [PATCH 3/6] fix: delete orphaned agent/skill files after decoupling Remove delivery-validator, test-coverage-checker agents and orchestrate-review, validate-delivery skills that were copied to prepare-delivery but not deleted here. The files were not in components.json but could shadow the prepare-delivery versions. Also fix stale Phase 10 comment (next-task: -> prepare-delivery:). --- agents/delivery-validator.md | 118 ------ agents/test-coverage-checker.md | 589 ----------------------------- commands/next-task.md | 2 +- skills/orchestrate-review/SKILL.md | 362 ------------------ skills/validate-delivery/SKILL.md | 329 ---------------- 5 files changed, 1 insertion(+), 1399 deletions(-) delete mode 100644 agents/delivery-validator.md delete mode 100644 agents/test-coverage-checker.md delete mode 100644 skills/orchestrate-review/SKILL.md delete mode 100644 skills/validate-delivery/SKILL.md diff --git a/agents/delivery-validator.md b/agents/delivery-validator.md deleted file mode 100644 index 074660a..0000000 --- a/agents/delivery-validator.md +++ /dev/null @@ -1,118 +0,0 @@ ---- -name: delivery-validator -description: Validate task completion autonomously. Use this agent after review approval to run validation checks and either approve for shipping or return to implementation with fix instructions. -tools: - - Skill - - Bash(git:*) - - Bash(npm:*) - - Read - - Grep - - Glob -model: sonnet ---- - -# Delivery Validator Agent - -Autonomously validate that the task is complete and ready to ship. -This is NOT manual approval - it's an autonomous validation gate. - -## Execution - -You MUST execute the `validate-delivery` skill to perform validation. The skill contains: -- Review status check -- Test runner detection and execution -- Build verification -- Requirements comparison -- Regression detection -- Fix instructions generator - -## Validation Checks - -| Check | What it validates | -|-------|-------------------| -| reviewClean | Review approved or override | -| testsPassing | Test suite passes | -| buildPassing | Build completes | -| requirementsMet | Task requirements implemented | -| noRegressions | No tests lost | -| diffRisk | Risk-weighted analysis of changed files (optional, advisory) | - -## Your Role - -1. Invoke the `validate-delivery` skill -2. Load task context from workflow state -3. Run all 5 validation checks -4. Run diff-risk scoring (optional - only when repo-intel map exists) -5. Aggregate results with risk annotations -6. If all pass: approve for shipping (include risk summary if available) -7. If any fail: return fix instructions - -## Decision Logic - -**All checks pass:** -- Update state with `deliveryApproved: true` -- STOP - SubagentStop hook triggers sync-docs:sync-docs-agent - -**Any check fails:** -- Update state with failure and fix instructions -- STOP - workflow returns to implementation phase - -## [CRITICAL] Workflow Position - -``` -Phase 9 review loop (MUST have approved) - ↓ -delivery-validator (YOU ARE HERE) - ↓ - STOP after validation - ↓ - SubagentStop hook triggers sync-docs:sync-docs-agent -``` - -**MUST NOT do:** -- Create PRs -- Push to remote -- Invoke ship:ship -- Skip sync-docs:sync-docs-agent - -## State Updates - -```javascript -// On success -workflowState.completePhase({ approved: true, checks }); - -// On failure -workflowState.failPhase('Validation failed', { failedChecks, fixInstructions }); -``` - -## Output Format - -```json -{ - "approved": true|false, - "checks": { ... }, - "failedChecks": [], - "fixInstructions": [], - "riskSummary": "3 files at elevated risk (>0.5), 1 file at high risk (>0.7)" -} -``` - -## Constraints - -- Do not bypass the skill - it contains the authoritative patterns -- NO manual approval required -- Fully autonomous retry loop on failure -- STOP after validation - hooks handle next phase - -## Quality Multiplier - -Uses **sonnet** model because: -- Validation checks are structured and deterministic -- Comparing requirements needs moderate reasoning -- Faster than opus, sufficient for validation logic - -## Integration Points - -This agent is invoked by: -- Phase 10 of `/next-task` workflow -- After Phase 9 review loop approval diff --git a/agents/test-coverage-checker.md b/agents/test-coverage-checker.md deleted file mode 100644 index 6353ace..0000000 --- a/agents/test-coverage-checker.md +++ /dev/null @@ -1,589 +0,0 @@ ---- -name: test-coverage-checker -description: Validate test coverage quality for new code. Use this agent before the first review round to verify tests exist, are meaningful, and actually exercise the new code (not just path matching). -tools: - - Bash(git:*) - - Read - - Grep - - Glob -model: sonnet ---- - -# Test Coverage Checker Agent - -Validate that new work has appropriate, meaningful test coverage. -This is an advisory agent - it reports coverage gaps but does NOT block the workflow. - -**Important**: This agent validates test QUALITY, not just test EXISTENCE. A test file -that exists but doesn't meaningfully exercise the new code is flagged as a gap. - -## Scope - -Analyze files in: `git diff --name-only origin/${BASE_BRANCH}..HEAD` - -## Phase 1: Get Changed Files - -```bash -# Get base branch -BASE_BRANCH=$(git symbolic-ref refs/remotes/origin/HEAD 2>/dev/null | sed 's@^refs/remotes/origin/@@' || echo "main") - -# Get changed source files (exclude test files) -CHANGED_SOURCE=$(git diff --name-only origin/${BASE_BRANCH}..HEAD 2>/dev/null | \ - grep -E '\.(js|ts|jsx|tsx|py|rs|go|rb|java|kt|swift|cpp|c|cs)$' | \ - grep -v -E '(test|spec|_test|Test)\.') - -# Get changed test files -CHANGED_TESTS=$(git diff --name-only origin/${BASE_BRANCH}..HEAD 2>/dev/null | \ - grep -E '(test|spec|_test|Test)\.') - -echo "SOURCE_FILES=$CHANGED_SOURCE" -echo "TEST_FILES=$CHANGED_TESTS" -``` - -## Phase 1.5: Gather Repo Intel (If Available) - -Check for repo-intel data to enrich coverage checks with historical risk signals. This step is optional - the checker works without it. - -```javascript -const { binary } = require('@agentsys/lib'); -const fs = require('fs'); -const path = require('path'); - -const { getStateDirPath } = require('@agentsys/lib/platform/state-dir'); -const cwd = process.cwd(); -const mapFile = path.join(getStateDirPath(cwd), 'repo-intel.json'); - -let repoIntel = null; - -if (fs.existsSync(mapFile)) { - repoIntel = {}; - - // Test gaps: hot files with no co-changing test file - try { - const json = binary.runAnalyzer([ - 'repo-intel', 'query', 'test-gaps', - '--top', '20', '--map-file', mapFile, cwd - ]); - repoIntel.testGaps = JSON.parse(json); - } catch (e) { repoIntel.testGaps = null; } - - // Bugspots: files with highest bug-fix density - try { - const json = binary.runAnalyzer([ - 'repo-intel', 'query', 'bugspots', - '--top', '20', '--map-file', mapFile, cwd - ]); - repoIntel.bugspots = JSON.parse(json); - } catch (e) { repoIntel.bugspots = null; } - - console.log(`Repo intel loaded: testGaps=${repoIntel.testGaps?.length || 0}, bugspots=${repoIntel.bugspots?.length || 0}`); -} else { - console.log('Repo intel not found - proceeding without risk weighting'); -} - -// Build lookup maps for quick access during coverage checks -const testGapSet = new Set( - (repoIntel?.testGaps || []).map(g => g.path) -); -const bugspotMap = new Map( - (repoIntel?.bugspots || []).map(b => [b.path, b]) -); -``` - -## Phase 2: Detect Test Conventions - -Detect the project's test file naming convention: - -```bash -# Check for common test patterns -if ls tests/ 2>/dev/null | head -1; then - echo "TEST_DIR=tests" -elif ls __tests__/ 2>/dev/null | head -1; then - echo "TEST_DIR=__tests__" -elif ls test/ 2>/dev/null | head -1; then - echo "TEST_DIR=test" -elif ls spec/ 2>/dev/null | head -1; then - echo "TEST_DIR=spec" -fi - -# Check naming convention -if ls **/*.test.* 2>/dev/null | head -1; then - echo "TEST_PATTERN=.test." -elif ls **/*.spec.* 2>/dev/null | head -1; then - echo "TEST_PATTERN=.spec." -elif ls **/test_*.* 2>/dev/null | head -1; then - echo "TEST_PATTERN=test_" -fi -``` - -## Phase 3: Map Source to Test Files - -For each source file, find corresponding test file: - -```javascript -const testMappings = { - // JavaScript/TypeScript patterns - 'src/foo.ts': ['tests/foo.test.ts', '__tests__/foo.test.ts', 'src/foo.test.ts', 'src/__tests__/foo.test.ts'], - 'lib/bar.js': ['tests/bar.test.js', 'lib/bar.test.js', 'test/bar.test.js'], - - // Python patterns - 'src/module.py': ['tests/test_module.py', 'test/test_module.py', 'src/test_module.py'], - - // Rust patterns - 'src/lib.rs': ['tests/lib_test.rs', 'src/lib_tests.rs'], - - // Go patterns - 'pkg/handler.go': ['pkg/handler_test.go'] -}; - -function findTestFile(sourceFile) { - const basename = sourceFile.split('/').pop().replace(/\.[^.]+$/, ''); - const dir = sourceFile.split('/').slice(0, -1).join('/'); - const ext = sourceFile.split('.').pop(); - - // Generate possible test file locations - const candidates = [ - `tests/${basename}.test.${ext}`, - `tests/${basename}.spec.${ext}`, - `test/${basename}.test.${ext}`, - `__tests__/${basename}.test.${ext}`, - `${dir}/${basename}.test.${ext}`, - `${dir}/${basename}.spec.${ext}`, - `${dir}/__tests__/${basename}.test.${ext}`, - // Python style - `tests/test_${basename}.${ext}`, - `test/test_${basename}.${ext}`, - // Go style (test in same dir) - `${dir}/${basename}_test.${ext}` - ]; - - return candidates; -} -``` - -## Phase 4: Check Coverage - -For each changed source file: -1. Find corresponding test file -2. Check if test file exists -3. If source modified, check if test was also modified -4. Analyze new functions/classes for test coverage - -```javascript -const gaps = []; -const covered = []; - -for (const sourceFile of changedSourceFiles) { - const testCandidates = findTestFile(sourceFile); - const existingTest = testCandidates.find(t => fileExists(t)); - - if (!existingTest) { - gaps.push({ - file: sourceFile, - reason: 'No test file found', - candidates: testCandidates.slice(0, 3) - }); - continue; - } - - // Check if test was updated along with source - const testModified = changedTestFiles.includes(existingTest); - - if (!testModified) { - gaps.push({ - file: sourceFile, - reason: 'Source modified but test file not updated', - testFile: existingTest - }); - } else { - covered.push({ - file: sourceFile, - testFile: existingTest - }); - } -} -``` - -## Phase 4.5: Risk-Weighted Validation - -When repo-intel data is available, apply stricter checks to historically risky files. - -```javascript -const riskIssues = []; - -for (const sourceFile of changedSourceFiles) { - const inTestGaps = testGapSet.has(sourceFile); - const bugspot = bugspotMap.get(sourceFile); - const existingTest = findTestFile(sourceFile).find(t => fileExists(t)); - const testModified = existingTest && changedTestFiles.includes(existingTest); - - // FAIL: file appears in both test-gaps AND bugspots with no new test added - if (inTestGaps && bugspot && !testModified) { - riskIssues.push({ - file: sourceFile, - severity: 'critical', - type: 'high-risk-untested', - bugFixRate: bugspot.bugFixRate, - message: `${sourceFile} has ${(bugspot.bugFixRate * 100).toFixed(0)}% bug-fix rate AND no co-changing test file in git history - test coverage is required` - }); - } - - // WARN: file is in bugspots (bugFixRate > 0.3) and test only does basic assertions - if (bugspot && bugspot.bugFixRate > 0.3 && testModified && existingTest) { - const testContent = await readFile(existingTest); - const trivialPatterns = [ - /expect\s*\(\s*true\s*\)/, - /expect\s*\(\s*1\s*\)\s*\.toBe\s*\(\s*1\s*\)/, - /assert\s*\(\s*True\s*\)/, - /\.toBeDefined\s*\(\s*\)/ - ]; - const hasTrivialOnly = trivialPatterns.some(p => p.test(testContent)); - - if (hasTrivialOnly) { - riskIssues.push({ - file: sourceFile, - severity: 'warning', - type: 'weak-test-for-risky-file', - bugFixRate: bugspot.bugFixRate, - testFile: existingTest, - message: `${sourceFile} has ${(bugspot.bugFixRate * 100).toFixed(0)}% bug-fix rate but test only has trivial assertions - test quality is critical` - }); - } - } - - // INFO: annotate bugspot context for the coverage report - if (bugspot) { - riskIssues.push({ - file: sourceFile, - severity: 'info', - type: 'bugspot-context', - bugFixRate: bugspot.bugFixRate, - bugFixes: bugspot.bugFixes, - totalChanges: bugspot.totalChanges, - message: `${sourceFile} has ${(bugspot.bugFixRate * 100).toFixed(0)}% bug-fix rate (${bugspot.bugFixes}/${bugspot.totalChanges} changes are fixes) - test quality is critical` - }); - } -} -``` - -## Phase 5: Analyze New Exports - -Check for new functions/classes that might need tests: - -```javascript -async function findNewExports(file) { - // Get diff for the file - const diff = await exec(`git diff origin/${BASE_BRANCH}..HEAD -- ${file}`); - - // Find added function/class declarations - const newExports = []; - const patterns = [ - /^\+\s*export\s+(function|const|class|async function)\s+(\w+)/gm, - /^\+\s*export\s+default\s+(function|class)\s*(\w*)/gm, - /^\+\s*module\.exports\s*=\s*\{([^}]+)\}/gm, - /^\+\s*def\s+(\w+)\(/gm, // Python - /^\+\s*pub\s+fn\s+(\w+)/gm, // Rust - /^\+\s*func\s+(\w+)/gm // Go - ]; - - for (const pattern of patterns) { - let match; - while ((match = pattern.exec(diff)) !== null) { - newExports.push(match[2] || match[1]); - } - } - - return newExports; -} -``` - -## Phase 6: Validate Test Quality - -**Critical**: Don't just check if test files exist - verify tests actually exercise the new code. - -```javascript -async function validateTestQuality(sourceFile, testFile, newExports) { - const testContent = await readFile(testFile); - const sourceContent = await readFile(sourceFile); - const issues = []; - - // 1. Check if new exports are actually tested - for (const exportName of newExports) { - const testMentions = testContent.match(new RegExp(exportName, 'g')); - if (!testMentions || testMentions.length === 0) { - issues.push({ - type: 'untested-export', - export: exportName, - message: `New export '${exportName}' is not referenced in test file` - }); - } - } - - // 2. Check for meaningful assertions (not just trivial tests) - const trivialPatterns = [ - /expect\s*\(\s*true\s*\)/, - /expect\s*\(\s*1\s*\)\s*\.toBe\s*\(\s*1\s*\)/, - /assert\s*\(\s*True\s*\)/, - /\.toBeDefined\s*\(\s*\)/ // Only toBeDefined without other checks - ]; - - for (const pattern of trivialPatterns) { - if (pattern.test(testContent)) { - issues.push({ - type: 'trivial-assertion', - message: 'Test contains trivial assertions that don\'t validate behavior' - }); - break; - } - } - - // 3. Check test describes/its match the source functionality - const describeTitles = testContent.match(/describe\s*\(\s*['"`]([^'"`]+)['"`]/g) || []; - const itTitles = testContent.match(/it\s*\(\s*['"`]([^'"`]+)['"`]/g) || []; - - if (describeTitles.length === 0 && itTitles.length === 0) { - issues.push({ - type: 'no-test-structure', - message: 'Test file lacks describe/it blocks - may not be a real test' - }); - } - - // 4. Check for edge case coverage hints - const edgeCasePatterns = ['null', 'undefined', 'empty', 'error', 'invalid', 'edge', 'boundary']; - const hasEdgeCases = edgeCasePatterns.some(p => testContent.toLowerCase().includes(p)); - - if (!hasEdgeCases && newExports.length > 0) { - issues.push({ - type: 'missing-edge-cases', - message: 'Tests may lack edge case coverage (no null/error/boundary tests detected)', - severity: 'warning' - }); - } - - // 5. Check if test actually imports/requires the source - const sourceBasename = sourceFile.split('/').pop().replace(/\.[^.]+$/, ''); - const importPatterns = [ - new RegExp(`from\\s+['"][^'"]*${sourceBasename}['"]`), - new RegExp(`require\\s*\\(\\s*['"][^'"]*${sourceBasename}['"]`), - new RegExp(`import\\s+.*${sourceBasename}`) - ]; - - const importsSource = importPatterns.some(p => p.test(testContent)); - if (!importsSource) { - issues.push({ - type: 'no-source-import', - message: `Test file doesn't appear to import '${sourceBasename}'`, - severity: 'critical' - }); - } - - return { - testFile, - sourceFile, - quality: issues.length === 0 ? 'good' : issues.some(i => i.severity === 'critical') ? 'poor' : 'needs-improvement', - issues - }; -} -``` - -## Phase 7: Analyze Test Coverage Depth - -Check if tests cover the actual logic paths in the new code: - -```javascript -async function analyzeTestDepth(sourceFile, testFile, diff) { - const analysis = { - sourceComplexity: 'unknown', - testCoverage: 'unknown', - suggestions: [] - }; - - // Extract conditionals and branches from new code - const newBranches = []; - const branchPatterns = [ - /^\+.*if\s*\(/gm, - /^\+.*else\s*\{/gm, - /^\+.*\?\s*.*:/gm, // Ternary - /^\+.*switch\s*\(/gm, - /^\+.*case\s+/gm, - /^\+.*catch\s*\(/gm - ]; - - for (const pattern of branchPatterns) { - const matches = diff.match(pattern) || []; - newBranches.push(...matches); - } - - if (newBranches.length > 3) { - analysis.sourceComplexity = 'high'; - analysis.suggestions.push('New code has multiple branches - ensure each path is tested'); - } - - // Check for async/await patterns that need error testing - const hasAsync = /^\+.*async\s+|^\+.*await\s+/m.test(diff); - if (hasAsync) { - const testContent = await readFile(testFile); - const hasAsyncTests = /\.rejects|\.resolves|async.*expect|try.*catch.*expect/i.test(testContent); - - if (!hasAsyncTests) { - analysis.suggestions.push('New async code detected - add tests for promise rejection scenarios'); - } - } - - return analysis; -} -``` - -## Output Format (JSON) - -```json -{ - "scope": "new-work-only", - "coverage": { - "filesAnalyzed": 5, - "filesWithTests": 3, - "filesMissingTests": 2, - "coveragePercent": 60 - }, - "gaps": [ - { - "file": "src/new-feature.ts", - "reason": "No test file found", - "candidates": ["tests/new-feature.test.ts", "__tests__/new-feature.test.ts"], - "newExports": ["handleFeature", "FeatureConfig"] - }, - { - "file": "src/modified.ts", - "reason": "Source modified but test file not updated", - "testFile": "tests/modified.test.ts", - "newExports": ["newFunction"] - } - ], - "qualityIssues": [ - { - "file": "src/api-client.ts", - "testFile": "tests/api-client.test.ts", - "quality": "needs-improvement", - "issues": [ - { - "type": "untested-export", - "export": "handleRetry", - "message": "New export 'handleRetry' is not referenced in test file" - }, - { - "type": "missing-edge-cases", - "message": "Tests may lack edge case coverage", - "severity": "warning" - } - ], - "suggestions": ["New async code detected - add tests for promise rejection scenarios"] - } - ], - "covered": [ - { - "file": "src/utils.ts", - "testFile": "tests/utils.test.ts", - "quality": "good" - } - ], - "riskIssues": [ - { - "file": "src/auth.ts", - "severity": "critical", - "type": "high-risk-untested", - "bugFixRate": 0.45, - "message": "src/auth.ts has 45% bug-fix rate AND no co-changing test file in git history - test coverage is required" - }, - { - "file": "src/parser.ts", - "severity": "warning", - "type": "weak-test-for-risky-file", - "bugFixRate": 0.35, - "testFile": "tests/parser.test.ts", - "message": "src/parser.ts has 35% bug-fix rate but test only has trivial assertions - test quality is critical" - } - ], - "summary": { - "status": "quality-issues-found", - "recommendation": "2 files missing tests, 1 file has tests but doesn't exercise new code", - "riskSummary": "1 critical risk file lacks tests, 1 high-bug-rate file has weak tests" - } -} -``` - -## Report Output - -```markdown -## Test Coverage Report - -### Summary -| Metric | Value | -|--------|-------| -| Files Analyzed | ${filesAnalyzed} | -| Files with Tests | ${filesWithTests} | -| Files Missing Tests | ${filesMissingTests} | -| Tests with Quality Issues | ${qualityIssues.length} | -| Effective Coverage | ${effectiveCoveragePercent}% | - -### Missing Test Files -${gaps.map(g => ` -**${g.file}** -- Reason: ${g.reason} -- New exports: ${g.newExports?.join(', ') || 'N/A'} -${g.candidates ? `- Suggested test location: ${g.candidates[0]}` : ''} -`).join('\n')} - -### Test Quality Issues -${qualityIssues.map(q => ` -**${q.file}** → ${q.testFile} (Quality: ${q.quality}) -${q.issues.map(i => `- [WARN] ${i.message}`).join('\n')} -${q.suggestions?.map(s => `- [TIP] ${s}`).join('\n') || ''} -`).join('\n')} - -### Risk-Weighted Findings (repo-intel) -${riskIssues.length > 0 ? riskIssues.filter(r => r.severity !== 'info').map(r => `- [${r.severity === 'critical' ? 'CRITICAL' : 'WARN'}] ${r.message}`).join('\n') : 'No repo-intel data available or no risk overlaps found'} - -### Well-Covered Files -${covered.filter(c => c.quality === 'good').map(c => `- [OK] ${c.file} -> ${c.testFile}`).join('\n')} - -### Recommendation -${summary.recommendation} -${summary.riskSummary ? `\n**Risk**: ${summary.riskSummary}` : ''} -``` - -## Behavior - -- **Advisory only** - Does NOT block workflow -- Reports coverage gaps to Phase 9 review loop -- Suggestions included in PR description -- Implementation-agent may optionally add tests based on findings - -## Integration Points - -This agent is called: -1. **Before first review round** - In parallel with deslop:deslop-agent -2. Results passed to Phase 9 review loop for context - -## Success Criteria - -- Correctly identifies test file conventions -- Maps source files to test files -- Detects new exports that need testing -- **Validates tests actually exercise the new code** (not just path matching) -- **Flags trivial or meaningless tests** (e.g., `expect(true).toBe(true)`) -- **Checks for edge case coverage** in tests -- **Verifies tests import the source file** they claim to test -- **Fails validation for files in both test-gaps AND bugspots with no new test** (when repo-intel available) -- **Warns on high-bugrate files with only trivial test assertions** (when repo-intel available) -- **Includes risk context** (bug-fix rate, test-gap status) in coverage report -- Works fully without repo-intel - risk checks are additive, not required -- Provides actionable recommendations -- Does NOT block workflow on missing tests - -## Model Choice: Sonnet - -This agent uses **sonnet** because: -- Test quality validation requires understanding code relationships -- Pattern detection needs more than simple matching -- Analyzing test meaningfulness requires moderate reasoning -- Advisory role means occasional misses are acceptable diff --git a/commands/next-task.md b/commands/next-task.md index 57ee3f3..a89fa29 100644 --- a/commands/next-task.md +++ b/commands/next-task.md @@ -577,7 +577,7 @@ workflowState.completePhase({ approved, iterations, remaining }); ## Phase 10: Delivery Validation -**Agent**: `next-task:delivery-validator` (sonnet) +**Agent**: `prepare-delivery:delivery-validator` (sonnet) ```javascript workflowState.startPhase('delivery-validation'); diff --git a/skills/orchestrate-review/SKILL.md b/skills/orchestrate-review/SKILL.md deleted file mode 100644 index 5e3a7b7..0000000 --- a/skills/orchestrate-review/SKILL.md +++ /dev/null @@ -1,362 +0,0 @@ ---- -name: orchestrate-review -version: 5.1.0 -description: "Use when user asks to \"deep review the code\", \"thorough code review\", \"multi-pass review\", or when orchestrating the Phase 9 review loop. Provides review pass definitions (code quality, security, performance, test coverage), signal detection patterns, and iteration algorithms." -metadata: - short-description: "Multi-pass code review orchestration" ---- - -# Orchestrate Review - -Multi-pass code review with parallel Task agents, finding aggregation, and iteration until clean. - -## Scope-Based Specialist Selection - -Select conditional specialists based on the review scope: -- **User request**: Detect signals from content user refers to (files, directory, module) -- **Workflow (Phase 9)**: Detect signals from changed files only -- **Project audit**: Detect signals from project structure as a whole - -## Review Passes - -Spawn parallel `general-purpose` Task agents (model: `sonnet`), one per pass: - -### Core (Always) -```javascript -const corePasses = [ - { id: 'code-quality', role: 'code quality reviewer', - focus: ['Style and consistency', 'Best practices', 'Bugs and logic errors', 'Error handling', 'Maintainability', 'Duplication'] }, - { id: 'security', role: 'security reviewer', - focus: ['Auth/authz flaws', 'Input validation', 'Injection risks', 'Secrets exposure', 'Insecure defaults'] }, - { id: 'performance', role: 'performance reviewer', - focus: ['N+1 queries', 'Blocking operations', 'Hot path inefficiencies', 'Memory leaks'] }, - { id: 'test-coverage', role: 'test coverage reviewer', - focus: ['Missing tests', 'Edge case coverage', 'Test quality', 'Integration needs', 'Mock appropriateness'] } -]; -``` - -### Conditional (Signal-Based) -```javascript -if (signals.hasDb) passes.push({ id: 'database', role: 'database specialist', - focus: ['Query performance', 'Indexes/transactions', 'Migration safety', 'Data integrity'] }); -if (signals.needsArchitecture) passes.push({ id: 'architecture', role: 'architecture reviewer', - focus: ['Module boundaries', 'Dependency direction', 'Cross-layer coupling', 'Pattern consistency'] }); -if (signals.hasApi) passes.push({ id: 'api', role: 'api designer', - focus: ['REST conventions', 'Error/status consistency', 'Pagination/filters', 'Versioning'] }); -if (signals.hasFrontend) passes.push({ id: 'frontend', role: 'frontend specialist', - focus: ['Component boundaries', 'State management', 'Accessibility', 'Render performance'] }); -if (signals.hasBackend) passes.push({ id: 'backend', role: 'backend specialist', - focus: ['Service boundaries', 'Domain logic', 'Concurrency/idempotency', 'Background job safety'] }); -if (signals.hasDevops) passes.push({ id: 'devops', role: 'devops reviewer', - focus: ['CI/CD safety', 'Secrets handling', 'Build/test pipelines', 'Deploy config'] }); -``` - -## Signal Detection - -```javascript -const signals = { - hasDb: files.some(f => /(db|migrations?|schema|prisma|typeorm|sql)/i.test(f)), - hasApi: files.some(f => /(api|routes?|controllers?|handlers?)/i.test(f)), - hasFrontend: files.some(f => /\.(tsx|jsx|vue|svelte)$/.test(f)), - hasBackend: files.some(f => /(server|backend|services?|domain)/i.test(f)), - hasDevops: files.some(f => /(\.github\/workflows|Dockerfile|k8s|terraform)/i.test(f)), - needsArchitecture: files.length > 20 // 20+ files typically indicates cross-module changes -}; -``` - -## File Risk Prioritization - -Before starting review passes, score changed files by composite risk using `diff-risk` from agent-analyzer. This orders files so reviewers focus on highest-risk code first within each pass. - -This step is optional - if repo-intel is unavailable, proceed with the default file ordering. - -### Pre-check: Ensure Repo-Intel - -```javascript -const fs = require('fs'); -const path = require('path'); - -const { getStateDirPath } = require('@agentsys/lib/platform/state-dir'); -const cwd = process.cwd(); -const mapFile = path.join(getStateDirPath(cwd), 'repo-intel.json'); - -if (!fs.existsSync(mapFile)) { - const response = await AskUserQuestion({ - questions: [{ - question: 'Generate repo-intel?', - description: 'No repo-intel map found. Generating one enables risk-scored file ordering for review passes. Takes ~5 seconds.', - options: [ - { label: 'Yes, generate it', value: 'yes' }, - { label: 'Skip', value: 'no' } - ] - }] - }); - - if (response === 'yes' || response?.['Generate repo-intel?'] === 'yes') { - try { - const { binary } = require('@agentsys/lib'); - const output = binary.runAnalyzer(['repo-intel', 'init', cwd]); - const stateDirPath = path.join(cwd, stateDir); - if (!fs.existsSync(stateDirPath)) fs.mkdirSync(stateDirPath, { recursive: true }); - fs.writeFileSync(mapFile, output); - } catch (e) { - // Binary not available - proceed without - } - } -} -``` - -### Scoring Changed Files - -```javascript -const { binary } = require('@agentsys/lib'); - -// stateDir and mapFile already defined above - -let riskScoredFiles = null; - -if (fs.existsSync(mapFile)) { - const fileList = changedFiles.join(','); - try { - const json = binary.runAnalyzer([ - 'repo-intel', 'query', 'diff-risk', - '--files', fileList, - '--map-file', mapFile, - cwd - ]); - riskScoredFiles = JSON.parse(json); - // riskScoredFiles is sorted by riskScore descending - } catch (e) { - // diff-risk failed - proceed with default ordering - console.log('[WARN] diff-risk unavailable, using default file order'); - } -} -``` - -### Applying Risk Order - -When `riskScoredFiles` is available, use it to reorder the file list passed to each review pass: - -```javascript -// Replace the default file list with risk-ordered list -if (riskScoredFiles) { - files = riskScoredFiles.map(r => r.path); -} -``` - -### High-Risk File Scrutiny - -Files with `riskScore > 0.5` should receive extra scrutiny. When passing files to review agents, annotate high-risk files so reviewers know to look more carefully: - -```javascript -function formatFileList(files, riskScoredFiles) { - if (!riskScoredFiles) return files.join('\n'); - const riskMap = new Map(riskScoredFiles.map(r => [r.path, r])); - return files.map(f => { - const risk = riskMap.get(f); - if (risk && risk.riskScore > 0.5) { - return `${f} [HIGH RISK: score=${risk.riskScore.toFixed(2)}, bugFixRate=${risk.bugFixRate.toFixed(2)}, aiRatio=${risk.aiRatio.toFixed(2)}]`; - } - return f; - }).join('\n'); -} -``` - -The formatted file list replaces `${files.join('\n')}` in the task prompt template below. - -## Task Prompt Template - -``` -You are a ${pass.role}. Review these changed files (ordered by risk, highest first): -${formatFileList(files, riskScoredFiles)} - -Files marked [HIGH RISK] have elevated bug-fix rates, single-author ownership, or high AI-contribution ratios. Give these files extra scrutiny. - -Focus: ${pass.focus.map(f => `- ${f}`).join('\n')} - -Return JSON: -{ - "pass": "${pass.id}", - "findings": [{ - "file": "path.ts", - "line": 42, - "severity": "critical|high|medium|low", - "description": "Issue", - "suggestion": "Fix", - "confidence": "high|medium|low", - "falsePositive": false - }] -} - -Example findings (diverse passes and severities): - -// Security - high severity -{ "file": "src/auth/login.ts", "line": 89, "severity": "high", - "description": "Password comparison uses timing-vulnerable string equality", - "suggestion": "Use crypto.timingSafeEqual() instead of ===", - "confidence": "high", "falsePositive": false } - -// Code quality - medium severity -{ "file": "src/utils/helpers.ts", "line": 45, "severity": "medium", - "description": "Duplicated validation logic exists in src/api/validators.ts:23", - "suggestion": "Extract to shared lib/validation.ts", - "confidence": "high", "falsePositive": false } - -// Performance - low severity -{ "file": "src/config.ts", "line": 12, "severity": "low", - "description": "Magic number 3600 should be named constant", - "suggestion": "const CACHE_TTL_SECONDS = 3600;", - "confidence": "medium", "falsePositive": false } - -// False positive example -{ "file": "src/crypto/hash.ts", "line": 78, "severity": "high", - "description": "Non-constant time comparison", - "suggestion": "N/A - intentional for non-secret data", - "confidence": "low", "falsePositive": true } - -Report all issues with confidence >= medium. Empty findings array if clean. -``` - -## Aggregation - -```javascript -function aggregateFindings(results) { - const items = []; - for (const {pass, findings = []} of results) { - for (const f of findings) { - items.push({ - id: `${pass}:${f.file}:${f.line}:${f.description}`, - pass, ...f, - status: f.falsePositive ? 'false-positive' : 'open' - }); - } - } - - // Deduplicate by id - const deduped = [...new Map(items.map(i => [i.id, i])).values()]; - - // Group by severity - const bySeverity = {critical: [], high: [], medium: [], low: []}; - deduped.forEach(i => !i.falsePositive && bySeverity[i.severity || 'low'].push(i)); - - const totals = Object.fromEntries(Object.entries(bySeverity).map(([k, v]) => [k, v.length])); - - return { - items: deduped, - bySeverity, - totals, - openCount: Object.values(totals).reduce((a, b) => a + b, 0) - }; -} -``` - -## Iteration Loop - -**Security Note**: Fixes are applied by the orchestrator using standard Edit tool permissions. Critical/high severity findings should be reviewed before applying - do not blindly apply LLM-suggested fixes to security-sensitive code. The orchestrator validates each fix against the original issue. - -```javascript -// 5 iterations balances thoroughness vs cost; 1 stall (2 consecutive identical-hash iterations) indicates fixes aren't progressing -const MAX_ITERATIONS = 5, MAX_STALLS = 1; -let iteration = 1, stallCount = 0, lastHash = null; - -while (iteration <= MAX_ITERATIONS) { - // 1. Spawn parallel Task agents - const results = await Promise.all(passes.map(pass => Task({ - subagent_type: 'general-purpose', - model: 'sonnet', - prompt: /* see template above */ - }))); - - // 2. Aggregate findings - const findings = aggregateFindings(results); - - // 3. Check if done - if (findings.openCount === 0) { - workflowState.completePhase({ approved: true, iterations: iteration }); - break; - } - - // 4. Fix issues (severity order: critical → high → medium → low) - // Orchestrator reviews each suggestion before applying via Edit tool - for (const issue of [...findings.bySeverity.critical, ...findings.bySeverity.high, - ...findings.bySeverity.medium, ...findings.bySeverity.low]) { - if (!issue.falsePositive) { - // Read file, locate issue.line, validate suggestion, apply via Edit tool - // For complex fixes, use simple-fixer agent pattern - } - } - - // 5. Commit - exec(`git add . && git commit -m "fix: review feedback (iteration ${iteration})"`); - - // 6. Post-iteration deslop - Task({ subagent_type: 'deslop:deslop-agent', model: 'sonnet' }); - - // 7. Stall detection - const hash = crypto.createHash('sha256') - .update(JSON.stringify(findings.items.filter(i => !i.falsePositive))) - .digest('hex'); - stallCount = hash === lastHash ? stallCount + 1 : 0; - lastHash = hash; - - // 8. Check limits - if (stallCount >= MAX_STALLS || iteration >= MAX_ITERATIONS) { - const reason = stallCount >= MAX_STALLS ? 'stall-detected' : 'iteration-limit'; - console.log(`[BLOCKED] Review loop ended: ${reason}. Remaining: ${JSON.stringify(findings.totals)}`); - // Ask the user before advancing - do not silently proceed to delivery-validation - const question = `Review loop blocked (${reason}). Open issues remain. How should we proceed?`; - const response = AskUserQuestion({ - questions: [{ - question, - header: 'Review Blocked', - multiSelect: false, - options: [ - { label: 'Override and proceed', description: 'Advance to delivery-validation with unresolved issues (risky)' }, - { label: 'Abort workflow', description: 'Stop here; open issues must be fixed manually' } - ] - }] - }); - // AskUserQuestion returns { answers: { [questionText]: selectedLabel } } - const choice = response.answers?.[question] ?? response[question]; - if (choice === 'Override and proceed') { - workflowState.completePhase({ - approved: false, blocked: true, overridden: true, - reason, remaining: findings.totals - }); - } else { - workflowState.failPhase(`Review blocked: ${reason}. ${JSON.stringify(findings.totals)} issues remain.`); - } - break; - } - - iteration++; -} -``` - -## Review Queue - -Store state at `{stateDir}/review-queue-{timestamp}.json`: -```json -{ - "status": "open|resolved|blocked", - "scope": { "type": "diff", "files": ["..."] }, - "passes": ["code-quality", "security"], - "items": [], - "iteration": 0, - "stallCount": 0 -} -``` - -Delete when approved. Keep when blocked for orchestrator inspection. - -## Cross-Platform Compatibility - -This skill uses `Task({ subagent_type: ... })` which is Claude Code syntax. For other platforms: - -| Platform | Equivalent Syntax | -|----------|-------------------| -| Claude Code | `Task({ subagent_type: 'general-purpose', model: 'sonnet', prompt: ... })` | -| OpenCode | `spawn_agent({ type: 'general-purpose', model: 'sonnet', prompt: ... })` | -| Codex CLI | `$agent general-purpose --model sonnet --prompt "..."` | - -The aggregation and iteration logic remains the same across platforms - only the agent spawning syntax differs. diff --git a/skills/validate-delivery/SKILL.md b/skills/validate-delivery/SKILL.md deleted file mode 100644 index 828cdda..0000000 --- a/skills/validate-delivery/SKILL.md +++ /dev/null @@ -1,329 +0,0 @@ ---- -name: validate-delivery -description: "Use when user asks to \"validate delivery\", \"check readiness\", or \"verify completion\". Runs tests, build, and requirement checks with pass/fail instructions." -version: 5.2.0 ---- - -# validate-delivery - -Autonomously validate that a task is complete and ready to ship. - -## Validation Checks - -### Check 1: Review Status - -```javascript -function checkReviewStatus(reviewResults) { - if (!reviewResults) return { passed: false, reason: 'No review results' }; - if (reviewResults.approved) return { passed: true }; - if (reviewResults.override) return { passed: true, override: true }; - return { passed: false, reason: 'Review not approved' }; -} -``` - -### Check 2: Tests Pass - -```bash -# Detect test runner and run -if grep -q '"test"' package.json; then - npm test; TEST_EXIT_CODE=$? -elif [ -f "pytest.ini" ]; then - pytest; TEST_EXIT_CODE=$? -elif [ -f "Cargo.toml" ]; then - cargo test; TEST_EXIT_CODE=$? -elif [ -f "go.mod" ]; then - go test ./...; TEST_EXIT_CODE=$? -else - TEST_EXIT_CODE=0 # No tests -fi -``` - -### Check 3: Build Passes - -```bash -if grep -q '"build"' package.json; then - npm run build; BUILD_EXIT_CODE=$? -elif [ -f "Cargo.toml" ]; then - cargo build --release; BUILD_EXIT_CODE=$? -elif [ -f "go.mod" ]; then - go build ./...; BUILD_EXIT_CODE=$? -else - BUILD_EXIT_CODE=0 # No build step -fi -``` - -### Check 4: Requirements Met - -```javascript -async function checkRequirementsMet(task, changedFiles) { - const requirements = extractRequirements(task.description); - const results = []; - - for (const req of requirements) { - const implemented = await verifyRequirement(req, changedFiles); - results.push({ requirement: req, implemented }); - } - - return { - passed: results.every(r => r.implemented), - requirements: results - }; -} - -function extractRequirements(description) { - const reqs = []; - // Extract bullet points: - Item - const bullets = description.match(/^[-*]\s+(.+)$/gm); - if (bullets) reqs.push(...bullets.map(m => m.replace(/^[-*]\s+/, ''))); - // Extract numbered items: 1. Item - const numbered = description.match(/^\d+\.\s+(.+)$/gm); - if (numbered) reqs.push(...numbered.map(m => m.replace(/^\d+\.\s+/, ''))); - return [...new Set(reqs)].slice(0, 10); -} -``` - -### Check 5: No Regressions - -```bash -# Compare test counts before/after changes -git stash -BEFORE=$(npm test 2>&1 | grep -oE '[0-9]+ passing' | grep -oE '[0-9]+') -git stash pop -AFTER=$(npm test 2>&1 | grep -oE '[0-9]+ passing' | grep -oE '[0-9]+') -[ "$AFTER" -lt "$BEFORE" ] && REGRESSION=true || REGRESSION=false -``` - -### Pre-check: Ensure Repo-Intel (before Check 6) - -```javascript -const fs = require('fs'); -const path = require('path'); - -const { getStateDirPath } = require('@agentsys/lib/platform/state-dir'); -const cwd = process.cwd(); -const mapFile = path.join(getStateDirPath(cwd), 'repo-intel.json'); - -if (!fs.existsSync(mapFile)) { - const response = await AskUserQuestion({ - questions: [{ - question: 'Generate repo-intel?', - description: 'No repo-intel map found. Generating one enables diff-risk scoring for changed files. Takes ~5 seconds.', - options: [ - { label: 'Yes, generate it', value: 'yes' }, - { label: 'Skip', value: 'no' } - ] - }] - }); - - if (response === 'yes' || response?.['Generate repo-intel?'] === 'yes') { - try { - const { binary } = require('@agentsys/lib'); - const output = binary.runAnalyzer(['repo-intel', 'init', cwd]); - const stateDirPath = path.join(cwd, stateDir); - if (!fs.existsSync(stateDirPath)) fs.mkdirSync(stateDirPath, { recursive: true }); - fs.writeFileSync(mapFile, output); - } catch (e) { - // Binary not available - proceed without - } - } -} -``` - -### Check 6: Diff Risk (optional, advisory) - -Score changed files by composite risk using repo-intel. This check is advisory - it never causes pass/fail on its own, but raises the bar for high-risk files. - -```javascript -const { binary } = require('@agentsys/lib'); - -// stateDir and mapFile already defined in pre-check above - -function checkDiffRisk(cwd) { - if (!fs.existsSync(mapFile)) { - return { available: false, reason: 'No repo-intel map found' }; - } - - // Get changed files via git - const changedFiles = getChangedFiles(cwd); // git diff --name-only origin/main..HEAD - if (!changedFiles.length) { - return { available: false, reason: 'No changed files' }; - } - const fileList = changedFiles.join(','); - - try { - const json = binary.runAnalyzer([ - 'repo-intel', 'query', 'diff-risk', - '--files', fileList, - '--map-file', mapFile, - cwd - ]); - const risks = JSON.parse(json); - - const elevated = risks.filter(r => r.riskScore > 0.5); - const high = risks.filter(r => r.riskScore > 0.7); - const requiresTestEvidence = risks.filter(r => r.riskScore > 0.6); - const requiresHumanReview = risks.filter(r => r.riskScore > 0.7); - - const parts = []; - if (elevated.length > 0) parts.push(`${elevated.length} files at elevated risk (>0.5)`); - if (high.length > 0) parts.push(`${high.length} files at high risk (>0.7)`); - const summary = parts.length > 0 ? parts.join(', ') : 'All files at normal risk'; - - return { - available: true, - risks, - summary, - requiresTestEvidence: requiresTestEvidence.map(r => r.path), - requiresHumanReview: requiresHumanReview.map(r => r.path), - }; - } catch (e) { - return { available: false, reason: 'diff-risk query failed' }; - } -} -``` - -#### Risk Escalation Rules - -Risk scores modify validation strictness but never override pass/fail: - -- **riskScore > 0.6**: Require explicit test coverage evidence for those files. "Tests pass" alone is insufficient - the validator must confirm that tests specifically exercise the high-risk files, not just that the suite passes. -- **riskScore > 0.7**: Flag for additional human review before shipping. Include `requiresHumanReview` in the output so the orchestrator can prompt for confirmation. - -## Aggregate Results - -```javascript -const diffRisk = checkDiffRisk(cwd); - -const checks = { - reviewClean: checkReviewStatus(reviewResults), - testsPassing: { passed: TEST_EXIT_CODE === 0 }, - buildPassing: { passed: BUILD_EXIT_CODE === 0 }, - requirementsMet: await checkRequirementsMet(task, changedFiles), - noRegressions: { passed: !REGRESSION }, - diffRisk: diffRisk.available - ? { passed: true, advisory: true, summary: diffRisk.summary, - requiresTestEvidence: diffRisk.requiresTestEvidence, - requiresHumanReview: diffRisk.requiresHumanReview } - : { passed: true, advisory: true, skipped: true, reason: diffRisk.reason } -}; - -// diffRisk is always passed:true - it is advisory only and never blocks shipping -const allPassed = Object.values(checks).every(c => c.passed); -const failedChecks = Object.entries(checks) - .filter(([_, v]) => !v.passed) - .map(([k]) => k); -``` - -## Decision and Output - -### If All Pass - -```javascript -const riskSummary = diffRisk.available ? diffRisk.summary : null; - -workflowState.completePhase({ - approved: true, - checks, - summary: 'All validation checks passed', - ...(riskSummary && { riskSummary }) -}); - -return { approved: true, checks, ...(riskSummary && { riskSummary }) }; -``` - -### If Any Fail - -```javascript -const fixInstructions = generateFixInstructions(checks, failedChecks); -const riskSummary = diffRisk.available ? diffRisk.summary : null; - -workflowState.failPhase('Validation failed', { - approved: false, - failedChecks, - fixInstructions, - ...(riskSummary && { riskSummary }) -}); - -return { approved: false, failedChecks, fixInstructions, ...(riskSummary && { riskSummary }) }; -``` - -## Fix Instructions Generator - -```javascript -function generateFixInstructions(checks, failedChecks) { - const instructions = []; - - if (failedChecks.includes('testsPassing')) { - instructions.push({ action: 'Fix failing tests', command: 'npm test' }); - } - if (failedChecks.includes('buildPassing')) { - instructions.push({ action: 'Fix build errors', command: 'npm run build' }); - } - if (failedChecks.includes('requirementsMet')) { - const unmet = checks.requirementsMet.requirements - .filter(r => !r.implemented) - .map(r => r.requirement); - instructions.push({ action: 'Implement missing', details: unmet.join(', ') }); - } - - // Risk-aware instructions (advisory - added alongside other instructions) - if (checks.diffRisk && !checks.diffRisk.skipped) { - if (checks.diffRisk.requiresTestEvidence?.length > 0) { - instructions.push({ - action: 'Verify test coverage for high-risk files', - details: checks.diffRisk.requiresTestEvidence.join(', '), - advisory: true - }); - } - if (checks.diffRisk.requiresHumanReview?.length > 0) { - instructions.push({ - action: 'Flag for human review before shipping', - details: checks.diffRisk.requiresHumanReview.join(', '), - advisory: true - }); - } - } - - return instructions; -} -``` - -## Output Format - -```json -{ - "approved": true|false, - "checks": { - "reviewClean": { "passed": true }, - "testsPassing": { "passed": true }, - "buildPassing": { "passed": true }, - "requirementsMet": { "passed": true }, - "noRegressions": { "passed": true }, - "diffRisk": { - "passed": true, - "advisory": true, - "summary": "3 files at elevated risk (>0.5), 1 file at high risk (>0.7)", - "requiresTestEvidence": ["lib/core.js"], - "requiresHumanReview": ["lib/auth.js"] - } - }, - "failedChecks": [], - "fixInstructions": [], - "riskSummary": "3 files at elevated risk (>0.5), 1 file at high risk (>0.7)" -} -``` - -When repo-intel is unavailable, `diffRisk` shows as skipped: - -```json -{ - "diffRisk": { "passed": true, "advisory": true, "skipped": true, "reason": "No repo-intel map found" } -} -``` - -## Constraints - -- NO human intervention - fully autonomous -- Returns structured JSON for orchestrator -- Generates specific fix instructions on failure -- Workflow retries automatically after fixes From 1010cbd619d0230c597a5d2714e4dc33d974f8de Mon Sep 17 00:00:00 2001 From: Avi Fenesh Date: Wed, 25 Mar 2026 20:11:03 +0200 Subject: [PATCH 4/6] fix: update remaining stale agent references after decoupling Fix bare delivery-validator and test-coverage-checker references missed in the initial decoupling commit. Now all references use the prepare-delivery: prefix consistently. Updated: implementation-agent.md, next-task.md (gates table), README.md, lib/adapter-transforms.js --- README.md | 9 +++++---- agents/implementation-agent.md | 8 ++++---- commands/next-task.md | 4 ++-- lib/adapter-transforms.js | 2 +- 4 files changed, 12 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index e408393..434a2cd 100644 --- a/README.md +++ b/README.md @@ -45,9 +45,9 @@ The workflow has 12 phases. Phases 1-6 involve the user; phases 7-12 run autonom | 5 | Planning | planning-agent | Opus | | 6 | User Approval | - | - | | 7 | Implementation | implementation-agent | Opus | -| 8 | Pre-Review Gates | deslop-agent + test-coverage-checker | Sonnet | +| 8 | Pre-Review Gates | deslop:deslop-agent + prepare-delivery:test-coverage-checker | Sonnet | | 9 | Review Loop | 4+ parallel reviewers | Sonnet | -| 10 | Delivery Validation | delivery-validator | Sonnet | +| 10 | Delivery Validation | prepare-delivery:delivery-validator | Sonnet | | 11 | Docs Update | sync-docs-agent | Sonnet | | 12 | Ship | ship:ship | - | @@ -87,8 +87,9 @@ The state directory is platform-aware: `.claude/`, `.opencode/`, or `.codex/`. | Skill | Purpose | |-------|---------| | discover-tasks | Fetch, filter, score, and present tasks for selection | -| orchestrate-review | Multi-pass parallel review with aggregation and iteration | -| validate-delivery | Autonomous completion checks - tests, build, requirements, regressions | + +Phases 8-11 use skills from the [prepare-delivery](https://github.com/agent-sh/prepare-delivery) plugin: +`orchestrate-review`, `validate-delivery`, `check-test-coverage`. ## Requirements diff --git a/agents/implementation-agent.md b/agents/implementation-agent.md index 761e0c7..2de2e55 100644 --- a/agents/implementation-agent.md +++ b/agents/implementation-agent.md @@ -387,11 +387,11 @@ implementation-agent (YOU ARE HERE) ↓ SubagentStop hook triggers automatically ↓ - Pre-review gates: deslop:deslop-agent + test-coverage-checker + Pre-review gates: deslop:deslop-agent + prepare-delivery:test-coverage-checker ↓ Phase 9 review loop (must approve) ↓ - delivery-validator (must approve) + prepare-delivery:delivery-validator (must approve) ↓ sync-docs:sync-docs-agent ↓ @@ -434,9 +434,9 @@ ${gitLog} --- [STOP] STOPPING HERE - SubagentStop hook will trigger pre-review gates - → deslop:deslop-agent + test-coverage-checker (parallel) + → deslop:deslop-agent + prepare-delivery:test-coverage-checker (parallel) → Phase 9 review loop - → delivery-validator + → prepare-delivery:delivery-validator → sync-docs:sync-docs-agent → ship:ship ``` diff --git a/commands/next-task.md b/commands/next-task.md index a89fa29..7fc62c9 100644 --- a/commands/next-task.md +++ b/commands/next-task.md @@ -70,7 +70,7 @@ Each phase must complete before the next starts: | Gate | Requirement | |------|-------------| | Implementation | Agent completes all plan steps | -| Pre-Review | deslop-agent + test-coverage-checker + /simplify (parallel) | +| Pre-Review | deslop:deslop-agent + prepare-delivery:test-coverage-checker + /simplify (parallel) | | Review Loop | Must approve (no open issues or override) | | Delivery | Tests pass, build passes | | Docs | Documentation updated | @@ -78,7 +78,7 @@ Each phase must complete before the next starts: **Forbidden actions for agents:** - No agent may create PRs or push to remote (only ship:ship) -- No agent may skip Phase 9, delivery-validator, or docs update +- No agent may skip Phase 9, prepare-delivery:delivery-validator, or docs update ## Arguments diff --git a/lib/adapter-transforms.js b/lib/adapter-transforms.js index e62e573..f63e22b 100644 --- a/lib/adapter-transforms.js +++ b/lib/adapter-transforms.js @@ -129,7 +129,7 @@ function transformBodyForOpenCode(content, repoRoot) { const note = ` > **OpenCode Note**: Invoke agents using \`@agent-name\` syntax. > Available agents: task-discoverer, exploration-agent, planning-agent, -> implementation-agent, deslop-agent, delivery-validator, sync-docs-agent, consult-agent +> implementation-agent, deslop-agent, prepare-delivery:delivery-validator, sync-docs-agent, consult-agent > Example: \`@exploration-agent analyze the codebase\` `; From 5cc19680c64e9e530a5b921d46861a51b91c0cfc Mon Sep 17 00:00:00 2001 From: Avi Fenesh Date: Wed, 25 Mar 2026 20:11:40 +0200 Subject: [PATCH 5/6] fix: update delivery-approval.md reference to prepare-delivery:delivery-validator --- commands/delivery-approval.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/commands/delivery-approval.md b/commands/delivery-approval.md index c465157..3c46700 100644 --- a/commands/delivery-approval.md +++ b/commands/delivery-approval.md @@ -9,7 +9,7 @@ model: sonnet # /delivery-approval - Delivery Validation Validate that the current work is complete and ready to ship. -This command runs the same validation as the workflow's delivery-validator agent. +This command runs the same validation as the `prepare-delivery:delivery-validator` agent. ## Arguments From 028d574300bdbe4f5a77ae013fe730c921012e0b Mon Sep 17 00:00:00 2001 From: Avi Fenesh Date: Wed, 25 Mar 2026 20:18:07 +0200 Subject: [PATCH 6/6] docs: sync documentation with code changes --- CHANGELOG.md | 4 ++++ README.md | 1 + 2 files changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5e07c10..aeb04ed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,9 @@ ### Changed +- Decouple delivery-validator and test-coverage-checker agents and orchestrate-review and validate-delivery skills to the prepare-delivery plugin - next-task now references these as `prepare-delivery:*` +- Update all internal cross-plugin references from `next-task:delivery-validator` and `next-task:test-coverage-checker` to `prepare-delivery:*` equivalents +- Rename git-map/repo-map plugin references to repo-intel across agent prompts and user-facing suggestions - Integrate repo-intel across /next-task workflow (hotspots, bugspots, diff-risk, test-gaps) - Downgrade exploration-agent from Opus to Sonnet - pre-fetched repo-intel data makes the agent a data curator rather than a deep analyst; planning-agent remains Opus - Replace inline state dir detections with `getStateDirPath()` from agent-core for consistent platform-aware paths @@ -23,6 +26,7 @@ ### Docs +- Add prepare-delivery to Cross-Plugin Dependencies table in README - Upgrade README with 12-phase workflow diagram, agent model table, and task sources reference ([#18](https://github.com/agent-sh/next-task/pull/18)) ## [1.1.0] - 2026-03-14 diff --git a/README.md b/README.md index 434a2cd..2fd7063 100644 --- a/README.md +++ b/README.md @@ -103,6 +103,7 @@ Phases 8-11 use skills from the [prepare-delivery](https://github.com/agent-sh/p | Plugin | Used in | |--------|---------| | [deslop](https://github.com/agent-sh/deslop) | Phase 8 - AI slop cleanup | +| [prepare-delivery](https://github.com/agent-sh/prepare-delivery) | Phases 8-10 - test coverage, review orchestration, delivery validation | | [sync-docs](https://github.com/agent-sh/sync-docs) | Phase 11 - documentation sync | | [ship](https://github.com/agent-sh/ship) | Phase 12 - PR creation, CI, merge |