Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions harness/adf-inspector.ts
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,9 @@ export function printSnapshot(snapshot: AdfSnapshot, previous?: AdfSnapshot): vo
export function detectAccumulationIssues(snapshots: AdfSnapshot[]): string[] {
const issues: string[] = [];
if (snapshots.length < 2) return issues;
const MIN_ABSOLUTE_GROWTH = 10;
const MIN_BASELINE_ITEMS = 3;
const MAX_SECTION_ITEMS = 20;

const first = snapshots[0];
const last = snapshots[snapshots.length - 1];
Expand All @@ -131,13 +134,13 @@ export function detectAccumulationIssues(snapshots: AdfSnapshot[]): string[] {
const growth = mod.totalItems - start.totalItems;
const growthRate = start.totalItems > 0 ? growth / start.totalItems : growth;

if (growthRate > 2) {
if (growth >= MIN_ABSOLUTE_GROWTH && start.totalItems >= MIN_BASELINE_ITEMS && growthRate > 2) {
issues.push(`${mod.module}: grew ${growth} items (${(growthRate * 100).toFixed(0)}% increase) — possible accumulation`);
}

// Check any single section that got very large
for (const sec of mod.sections) {
if (sec.itemCount > 15) {
if (sec.itemCount > MAX_SECTION_ITEMS) {
issues.push(`${mod.module} > ${sec.key}: ${sec.itemCount} items — section may need pruning`);
}
}
Expand Down
138 changes: 138 additions & 0 deletions harness/corpus/sdlc.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
/**
* SDLC-focused scenarios — validate that ADF modules stay updated and portable
* as project guidance evolves from requirements through release.
*/

import type { Scenario } from '../types';

export const sdlcScenarios: Scenario[] = [
{
id: 'fullstack-sdlc-handoff-portability',
archetype: 'fullstack',
description: 'Rules evolve across SDLC phases while remaining portable through ADF modules',
manifest: {
onDemand: [
{ path: 'frontend.adf', triggers: ['react', 'component', 'ui', 'css', 'vite', 'tsx'] },
{ path: 'backend.adf', triggers: ['api', 'endpoint', 'route', 'handler', 'database', 'auth', 'zod', 'request', 'response'] },
{ path: 'infra.adf', triggers: ['deploy', 'release', 'rollback', 'ci', 'pipeline', 'docker', 'env', 'artifact'] },
{ path: 'qa.adf', triggers: ['test', 'testing', 'playwright', 'contract', 'smoke', 'verification', 'evidence', 'auditability'] },
],
},
sessions: [
{
label: 'session-1: requirements',
inject: `
## API Requirements

- Every API endpoint must publish request and response schemas
- Auth is required for all write endpoints
- Route handlers must return structured error codes
- Database migrations must be reviewed before merge
`,
expected: { 'backend.adf': 4 },
},
{
label: 'session-2: design',
inject: `
## System Design

- React UI components must map one-to-one to approved design tokens
- API handlers must validate all payloads with Zod
- Route naming must stay stable across versions
- Frontend component props must be typed in TSX files
`,
expected: { 'frontend.adf': 2, 'backend.adf': 2 },
},
{
label: 'session-3: implementation',
inject: `
## Implementation Rules

- API route files live under \`app/api/\` and use one handler per endpoint
- Database writes must run inside transactions
- Auth checks execute before any handler business logic
- Build artifacts are generated only in CI pipeline jobs
`,
expected: { 'backend.adf': 3, 'infra.adf': 1 },
},
{
label: 'session-4: verification',
inject: `
## Verification

- CI pipeline must run unit, integration, and Playwright suites on every PR
- API contract tests validate request and response schema compatibility
- Deploy preview environments must run smoke checks before approval
- Test artifacts are uploaded from CI for auditability
`,
expected: { 'qa.adf': 4 },
},
{
label: 'session-5: release and portability handoff',
inject: `
## Release Handoff

- Deploy jobs must consume versioned artifacts from the pipeline only
- Rollback instructions must be validated in staging before production release
- Environment configuration uses env keys defined in the deployment checklist
- Release evidence includes CI run ID, artifact hash, and deployment timestamp
`,
expected: { 'infra.adf': 4 },
},
],
},
{
id: 'fullstack-sdlc-generic-checklist-routing',
archetype: 'fullstack',
description: 'Generic SDLC handoff headings still separate verification evidence from release operations',
manifest: {
onDemand: [
{ path: 'frontend.adf', triggers: ['react', 'component', 'ui', 'css', 'vite', 'tsx'] },
{ path: 'backend.adf', triggers: ['api', 'endpoint', 'route', 'handler', 'database', 'auth', 'zod', 'request', 'response'] },
{ path: 'infra.adf', triggers: ['deploy', 'release', 'rollback', 'ci', 'pipeline', 'docker', 'env', 'artifact'] },
{ path: 'qa.adf', triggers: ['test', 'testing', 'playwright', 'contract', 'smoke', 'verification', 'evidence', 'auditability'] },
],
},
sessions: [
{
label: 'session-1: generic checklist handoff',
inject: `
## Checklist

- Playwright smoke tests must pass before release approval
- Contract test evidence is attached to the deployment record for auditability
- Release artifact hashes are recorded before deploy starts
- Rollback drills must use the staged deploy artifact from the pipeline
`,
expected: { 'qa.adf': 2, 'infra.adf': 2 },
},
],
},
{
id: 'fullstack-sdlc-mixed-qa-backend-signals',
archetype: 'fullstack',
description: 'Mixed backend and QA wording in a generic checklist should still route by dominant verification vs API intent',
manifest: {
onDemand: [
{ path: 'frontend.adf', triggers: ['react', 'component', 'ui', 'css', 'vite', 'tsx'] },
{ path: 'backend.adf', triggers: ['api', 'endpoint', 'route', 'handler', 'database', 'auth', 'zod', 'request', 'response'] },
{ path: 'infra.adf', triggers: ['deploy', 'release', 'rollback', 'ci', 'pipeline', 'docker', 'env', 'artifact'] },
{ path: 'qa.adf', triggers: ['test', 'testing', 'playwright', 'contract', 'smoke', 'verification', 'evidence', 'auditability'] },
],
},
sessions: [
{
label: 'session-1: mixed checklist bullets',
inject: `
## Checklist

- API contract test evidence must be attached to the release review for auditability
- Request and response schema contract tests must pass before merging backend changes
- Endpoint smoke tests run in CI before deploy approval
- API handler error responses are verified against contract fixtures
`,
expected: { 'qa.adf': 3, 'backend.adf': 1 },
},
],
},
];
117 changes: 115 additions & 2 deletions harness/runner.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ import * as os from 'node:os';
import * as path from 'node:path';
import { execFileSync } from 'node:child_process';

import type { Scenario, TidyOutput, ScenarioResult, HarnessReport } from './types';
import { buildMigrationPlan, parseMarkdownSections, type TriggerMap } from '../packages/adf/src';
import type { Scenario, TidyOutput, ScenarioResult, HarnessReport, StaticSessionAudit, StaticItemRoute } from './types';
import { evaluateSession, printSessionResult } from './evaluator';
import { generateScenarios, getArchetypeManifest } from './ollama';
import { REAL_REPOS } from './corpus/real-repos';
Expand All @@ -31,6 +32,7 @@ import { workerScenarios } from './corpus/worker';
import { backendScenarios } from './corpus/backend';
import { fullstackScenarios } from './corpus/fullstack';
import { edgeCaseScenarios } from './corpus/edge-cases';
import { sdlcScenarios } from './corpus/sdlc';

// ============================================================================
// Config
Expand All @@ -44,6 +46,7 @@ const ALL_STATIC: Scenario[] = [
...backendScenarios,
...fullstackScenarios,
...edgeCaseScenarios,
...sdlcScenarios,
];

const OLLAMA_ARCHETYPES = ['worker', 'backend', 'fullstack'];
Expand Down Expand Up @@ -158,7 +161,12 @@ function runTidy(repoDir: string, dryRun = true): TidyOutput {
function runStaticScenario(scenario: Scenario): ScenarioResult {
const tmp = makeTempRepo(scenario);
const sessionResults = [];
const sessionAudits: StaticSessionAudit[] = [];
const snapshots: AdfSnapshot[] = [];
let prevSnapshot: AdfSnapshot | undefined;
let scenarioPass = true;
const baseClaude = THIN_POINTER.trim();
const aiDir = path.join(tmp, '.ai');

for (const session of scenario.sessions) {
// Each session: inject onto thin pointer, dry-run to evaluate, then apply
Expand All @@ -173,18 +181,123 @@ function runStaticScenario(scenario: Scenario): ScenarioResult {

// Apply tidy (non-dry-run) to route content into ADF modules, restoring
// CLAUDE.md to thin pointer so the next session sees a clean baseline.
runTidy(tmp, false);
const applyOutput = runTidy(tmp, false);

const postClaude = fs.readFileSync(path.join(tmp, 'CLAUDE.md'), 'utf-8').trim();
const claudeRestored = postClaude === baseClaude;
if (!claudeRestored) {
scenarioPass = false;
console.log(' portability warning: CLAUDE.md was not restored to thin pointer state');
}

const snapshot = inspectAdfModules(aiDir, session.label, prevSnapshot);
snapshots.push(snapshot);
prevSnapshot = snapshot;
const itemRoutes = previewItemRoutes(session.inject, scenario);

sessionAudits.push({
sessionLabel: session.label,
dryRunExtracted: tidyOutput.totalExtracted,
appliedModulesModified: applyOutput.modulesModified,
claudeRestored,
adfTotalItems: snapshot.totalItemsAcrossAllModules,
modulesGrew: snapshot.grew,
itemRoutes,
});

if (!sessionResult.pass) {
console.log(' item routing preview:');
for (const item of itemRoutes) {
const matches = item.matchedTriggers.length > 0 ? ` | matches=${item.matchedTriggers.join(', ')} score=${item.matchScore}` : '';
console.log(` [${item.heading || 'preamble'} -> ${item.headingModule}] ${item.targetModule} (${item.targetSection}) :: ${item.content}${matches}`);
}
}
}

const accumulationIssues = detectAccumulationIssues(snapshots);
if (accumulationIssues.length > 0) {
console.log(' accumulation warnings:');
for (const issue of accumulationIssues) console.log(` - ${issue}`);
}

return {
scenarioId: scenario.id,
archetype: scenario.archetype,
description: scenario.description,
sessions: sessionResults,
staticAudit: {
sessions: sessionAudits,
accumulationIssues,
},
pass: scenarioPass,
};
}

function previewItemRoutes(inject: string, scenario: Scenario): StaticItemRoute[] {
const triggerMap: TriggerMap = {};
for (const entry of scenario.manifest.onDemand) {
if (entry.triggers.length > 0) {
triggerMap[entry.path] = entry.triggers.map(trigger => trigger.toLowerCase());
}
}

const sections = parseMarkdownSections(inject);
const plan = buildMigrationPlan(sections, undefined, triggerMap);

return plan.items.map(item => ({
heading: item.sourceHeading,
content: item.element.content,
headingModule: previewHeadingModule(item.sourceHeading),
targetModule: item.classification.targetModule,
targetSection: item.classification.targetSection,
decision: item.classification.decision,
reason: item.classification.reason,
...scoreItemAgainstTriggers(item.element.content, triggerMap),
}));
}

function previewHeadingModule(heading: string): string {
const lower = heading.toLowerCase();
if (/\b(design.system|ui|frontend|css|component|react|vue|svelte|next|nextjs|tailwind|shadcn|radix|storybook|vite|vitest|playwright|remix|nuxt|astro)\b/.test(lower)) {
return 'frontend.adf';
}
if (/\b(qa|quality|test|testing|verification|validate|validation|contract|smoke|evidence|audit)\b/.test(lower)) {
return 'qa.adf';
}
if (/\b(auth|authentication|authorization|security|secret|token|permission|cors|rate.limit|jwt|oauth|clerk|nextauth|lucia|session|cookie|csrf|xss|password|bcrypt)\b/.test(lower)) {
return 'security.adf';
}
if (/\b(deploy|deployment|infrastructure|infra|ci|cd|pipeline|config|configuration|environment|env|docker|wrangler|cloudflare|vercel|netlify|railway|fly|render|github.actions|kv|d1|r2|queue|durable.object)\b/.test(lower)) {
return 'infra.adf';
}
if (/\b(api|backend|server|database|db|endpoint|query|migration|handler|prisma|drizzle|mongoose|postgres|postgresql|mysql|sqlite|express|fastify|hono|trpc|zod|graphql)\b/.test(lower)) {
return 'backend.adf';
}
return 'core.adf';
}

function scoreItemAgainstTriggers(text: string, triggerMap: TriggerMap): Pick<StaticItemRoute, 'matchedTriggers' | 'matchScore'> {
const lower = text.toLowerCase();
let matchedTriggers: string[] = [];
let matchScore = 0;

for (const triggers of Object.values(triggerMap)) {
const currentMatches = triggers.filter(trigger =>
new RegExp(`\\b${escapeRegex(trigger)}(?:s|ed|ing|ment|tion|ity|ication)?\\b`, 'i').test(lower),
);
if (currentMatches.length > matchScore) {
matchedTriggers = currentMatches;
matchScore = currentMatches.length;
}
}

return { matchedTriggers, matchScore };
}

function escapeRegex(str: string): string {
return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}

// ============================================================================
// Ollama Scenario Runner (exploratory — no expected routing)
// ============================================================================
Expand Down
28 changes: 28 additions & 0 deletions harness/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,37 @@ export interface ScenarioResult {
archetype: string;
description: string;
sessions: SessionResult[];
staticAudit?: StaticScenarioAudit;
pass: boolean;
}

export interface StaticSessionAudit {
sessionLabel: string;
dryRunExtracted: number;
appliedModulesModified: string[];
claudeRestored: boolean;
adfTotalItems: number;
modulesGrew: string[];
itemRoutes: StaticItemRoute[];
}

export interface StaticScenarioAudit {
sessions: StaticSessionAudit[];
accumulationIssues: string[];
}

export interface StaticItemRoute {
heading: string;
content: string;
headingModule: string;
targetModule: string;
targetSection: string;
decision: 'STAY' | 'MIGRATE';
reason: string;
matchedTriggers: string[];
matchScore: number;
}

// ============================================================================
// Run Report
// ============================================================================
Expand Down
3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -42,5 +42,6 @@
"typescript": "~5.8.2",
"vitest": "^4.0.18",
"zod": "^3.24.1"
}
},
"version": "0.8.0"
}
2 changes: 1 addition & 1 deletion packages/adf/package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "@stackbilt/adf",
"sideEffects": false,
"version": "0.7.0",
"version": "0.8.0",
"description": "ADF (Attention-Directed Format) — AST-backed context format for AI agents",
"main": "./dist/index.js",
"types": "./dist/index.d.ts",
Expand Down
Loading
Loading