From 1a0aa4a8158d4d89f51984bf5902240bed0f58c1 Mon Sep 17 00:00:00 2001 From: Raymond Weitekamp Date: Tue, 24 Mar 2026 16:09:48 -0400 Subject: [PATCH] feat: add deterministic linter (tools/lint/) Rust-based linter for OpenProse programs. Supports both legacy .prose files and .md programs. Legacy (.prose): - Structure, syntax, string literals, escape sequences (E002) - Agent definitions and references - Property, permission, skill validation - Prompt hygiene, output validation (E023) - Legacy syntax detection .md programs: - YAML frontmatter validation - Component and contract section parsing - Heading classification (components vs state schemas vs docs) - Multi-file program directory auto-detection - Contract completeness observations Spec discovery: - discover command reports corpus patterns not covered by spec - Role/delegation correlation, API/prohibited overlap, state coherence build.rs reads skills/open-prose/compiler.md for vocabulary extraction. Tests validate against skills/open-prose/examples/. 22 tests, 0 failures. --- tools/lint/Cargo.lock | 555 ++++++ tools/lint/Cargo.toml | 27 + tools/lint/README.md | 182 ++ tools/lint/build.rs | 185 ++ tools/lint/fixtures/invalid/mixed.prose | 20 + tools/lint/fixtures/valid/basic.prose | 24 + tools/lint/src/diag.rs | 47 + tools/lint/src/fs.rs | 48 + tools/lint/src/lib.rs | 15 + tools/lint/src/lint.rs | 1597 ++++++++++++++++ tools/lint/src/lint_legacy.rs | 2250 +++++++++++++++++++++++ tools/lint/src/main.rs | 185 ++ tools/lint/src/profile.rs | 31 + tools/lint/src/wasm.rs | 30 + 14 files changed, 5196 insertions(+) create mode 100644 tools/lint/Cargo.lock create mode 100644 tools/lint/Cargo.toml create mode 100644 tools/lint/README.md create mode 100644 tools/lint/build.rs create mode 100644 tools/lint/fixtures/invalid/mixed.prose create mode 100644 tools/lint/fixtures/valid/basic.prose create mode 100644 tools/lint/src/diag.rs create mode 100644 tools/lint/src/fs.rs create mode 100644 tools/lint/src/lib.rs create mode 100644 tools/lint/src/lint.rs create mode 100644 tools/lint/src/lint_legacy.rs create mode 100644 tools/lint/src/main.rs create mode 100644 tools/lint/src/profile.rs create mode 100644 tools/lint/src/wasm.rs diff --git a/tools/lint/Cargo.lock b/tools/lint/Cargo.lock new file mode 100644 index 0000000..21400a6 --- /dev/null +++ b/tools/lint/Cargo.lock @@ -0,0 +1,555 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "anyhow" +version = "1.0.102" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" + +[[package]] +name = "bitflags" +version = "2.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" + +[[package]] +name = "bumpalo" +version = "3.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "errno" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" +dependencies = [ + "libc", + "windows-sys", +] + +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + +[[package]] +name = "getrandom" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasip2", + "wasip3", +] + +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "foldhash", +] + +[[package]] +name = "hashbrown" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "id-arena" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" + +[[package]] +name = "indexmap" +version = "2.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" +dependencies = [ + "equivalent", + "hashbrown 0.16.1", + "serde", + "serde_core", +] + +[[package]] +name = "itoa" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" + +[[package]] +name = "js-sys" +version = "0.3.91" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b49715b7073f385ba4bc528e5747d02e66cb39c6146efb66b781f131f0fb399c" +dependencies = [ + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "leb128fmt" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" + +[[package]] +name = "libc" +version = "0.2.183" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d" + +[[package]] +name = "linux-raw-sys" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53" + +[[package]] +name = "log" +version = "0.4.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" + +[[package]] +name = "memchr" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" + +[[package]] +name = "once_cell" +version = "1.21.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" + +[[package]] +name = "openprose-lint" +version = "0.1.0" +dependencies = [ + "anyhow", + "serde", + "serde-wasm-bindgen", + "serde_json", + "tempfile", + "walkdir", + "wasm-bindgen", +] + +[[package]] +name = "prettyplease" +version = "0.2.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" +dependencies = [ + "proc-macro2", + "syn", +] + +[[package]] +name = "proc-macro2" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "r-efi" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" + +[[package]] +name = "rustix" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys", +] + +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "semver" +version = "1.0.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde-wasm-bindgen" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8302e169f0eddcc139c70f139d19d6467353af16f9fce27e8c30158036a1e16b" +dependencies = [ + "js-sys", + "serde", + "wasm-bindgen", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.149" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +dependencies = [ + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + +[[package]] +name = "syn" +version = "2.0.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tempfile" +version = "3.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd" +dependencies = [ + "fastrand", + "getrandom", + "once_cell", + "rustix", + "windows-sys", +] + +[[package]] +name = "unicode-ident" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" + +[[package]] +name = "unicode-xid" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" + +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + +[[package]] +name = "wasip2" +version = "1.0.2+wasi-0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5" +dependencies = [ + "wit-bindgen", +] + +[[package]] +name = "wasip3" +version = "0.4.0+wasi-0.3.0-rc-2026-01-06" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" +dependencies = [ + "wit-bindgen", +] + +[[package]] +name = "wasm-bindgen" +version = "0.2.114" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6532f9a5c1ece3798cb1c2cfdba640b9b3ba884f5db45973a6f442510a87d38e" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.114" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18a2d50fcf105fb33bb15f00e7a77b772945a2ee45dcf454961fd843e74c18e6" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.114" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03ce4caeaac547cdf713d280eda22a730824dd11e6b8c3ca9e42247b25c631e3" +dependencies = [ + "bumpalo", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.114" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75a326b8c223ee17883a4251907455a2431acc2791c98c26279376490c378c16" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "wasm-encoder" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" +dependencies = [ + "leb128fmt", + "wasmparser", +] + +[[package]] +name = "wasm-metadata" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" +dependencies = [ + "anyhow", + "indexmap", + "wasm-encoder", + "wasmparser", +] + +[[package]] +name = "wasmparser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" +dependencies = [ + "bitflags", + "hashbrown 0.15.5", + "indexmap", + "semver", +] + +[[package]] +name = "winapi-util" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] + +[[package]] +name = "wit-bindgen" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" +dependencies = [ + "wit-bindgen-rust-macro", +] + +[[package]] +name = "wit-bindgen-core" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" +dependencies = [ + "anyhow", + "heck", + "wit-parser", +] + +[[package]] +name = "wit-bindgen-rust" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" +dependencies = [ + "anyhow", + "heck", + "indexmap", + "prettyplease", + "syn", + "wasm-metadata", + "wit-bindgen-core", + "wit-component", +] + +[[package]] +name = "wit-bindgen-rust-macro" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" +dependencies = [ + "anyhow", + "prettyplease", + "proc-macro2", + "quote", + "syn", + "wit-bindgen-core", + "wit-bindgen-rust", +] + +[[package]] +name = "wit-component" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" +dependencies = [ + "anyhow", + "bitflags", + "indexmap", + "log", + "serde", + "serde_derive", + "serde_json", + "wasm-encoder", + "wasm-metadata", + "wasmparser", + "wit-parser", +] + +[[package]] +name = "wit-parser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" +dependencies = [ + "anyhow", + "id-arena", + "indexmap", + "log", + "semver", + "serde", + "serde_derive", + "serde_json", + "unicode-xid", + "wasmparser", +] + +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" diff --git a/tools/lint/Cargo.toml b/tools/lint/Cargo.toml new file mode 100644 index 0000000..7b864f2 --- /dev/null +++ b/tools/lint/Cargo.toml @@ -0,0 +1,27 @@ +[package] +name = "openprose-lint" +version = "0.1.0" +edition = "2024" +description = "Deterministic linter for OpenProse programs (legacy .prose and .md)" +license = "MIT" +publish = false + +[lib] +crate-type = ["lib", "cdylib"] + +[dependencies] +anyhow = "1.0" +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" + +# WASM-only deps +[target.'cfg(target_arch = "wasm32")'.dependencies] +wasm-bindgen = "0.2" +serde-wasm-bindgen = "0.6" + +# Native-only deps +[target.'cfg(not(target_arch = "wasm32"))'.dependencies] +walkdir = "2.5" + +[dev-dependencies] +tempfile = "3" diff --git a/tools/lint/README.md b/tools/lint/README.md new file mode 100644 index 0000000..05f52a1 --- /dev/null +++ b/tools/lint/README.md @@ -0,0 +1,182 @@ +# openprose-lint + +Deterministic linter for OpenProse programs. Validates both legacy `.prose` files and `.md` programs against the language spec. + +OpenProse programs are executed by LLMs — the intelligence is in the model. The linter checks the **static, spec-driven parts** that should remain deterministic regardless of which model runs the program: + +- Structure and syntax (legacy: indentation, blocks, strings; frontmatter, headings) +- Declaration and reference consistency (agents, nodes, components) +- Property validation (models, permissions, skills) +- Contract quality (hedging in ensures, missing requires/ensures — detects contracts in `## requires` sections, bare top-level `requires:`, and code blocks under `## Contract`) +- Prompt hygiene (empty prompts, overly long prompts) +- Spec/corpus drift detection (`discover` command) + +## Install + +```bash +cd tools/lint +cargo build --release +``` + +The binary is at `target/release/openprose-lint`. + +## Usage + +### Lint legacy programs (.prose files) + +```bash +# Lint a file or directory of .prose files +openprose-lint lint skills/open-prose/examples/ + +# Use strict profile (errors instead of warnings for legacy constructs) +openprose-lint lint --profile strict skills/open-prose/examples/ +``` + +### Lint .md programs (.md files) + +```bash +# Lint individual files or directories +openprose-lint lint-md path/to/program/ + +# Multi-file programs are auto-detected — if a directory contains a +# root file (kind: program), sibling .md files are linted as a unit +openprose-lint lint-md path/to/programs/ +``` + +### Discover spec gaps + +Analyze a corpus of .md programs and report vocabulary not documented in the spec: + +```bash +openprose-lint discover path/to/programs/ +``` + +## Profiles + +| Profile | Behavior | +|---------|----------| +| `compat` (default) | Warnings for legacy/compatibility constructs | +| `strict` | Errors for anything outside the current spec | + +## Exit codes + +| Code | Meaning | +|------|---------| +| 0 | No errors | +| 1 | One or more errors | +| 2 | CLI usage or filesystem error | + +Warnings do not fail the run. + +## Architecture + +``` +tools/lint/ +├── build.rs # Extracts vocabulary from skills/open-prose/compiler.md +├── src/ +│ ├── main.rs # CLI entry point +│ ├── lib.rs # Public API +│ ├── lint_legacy.rs # legacy engine (.prose files) (.prose files) +│ ├── lint.rs # .md program engine (.md programs) +│ ├── diag.rs # Diagnostic types +│ ├── profile.rs # Lint profiles (strict/compat) +│ ├── fs.rs # File collection utilities +│ └── wasm.rs # WASM bindings (for browser/plugin use) +└── fixtures/ # Test fixtures +``` + +The `build.rs` script reads `skills/open-prose/compiler.md` at compile time to extract vocabulary (model names, property names, permission types). If the spec isn't found (e.g., building the crate standalone), hardcoded fallbacks are used. + +## Legacy Rules (.prose) + +### Errors + +| Code | Description | +|------|-------------| +| E001 | Unterminated string literal | +| E003 | Session missing prompt or agent | +| E006 | Duplicate agent definition | +| E007 | Undefined agent reference | +| E008 | Invalid model value | +| E009 | Duplicate property | +| E010 | Duplicate `use` statement | +| E011 | Empty `use` path | +| E012 | Invalid `use` alias/path shape | +| E013 | Skills must be an array | +| E014 | Skill name must be a string | +| E015 | Permissions must be a block | +| E016 | Malformed permission pattern/value | +| E017 | `resume:` requires persistent agent | +| E019 | Duplicate variable declaration | +| E020 | Empty input name | +| E021 | Duplicate input declaration | +| E024 | Duplicate output declaration | +| OPE001 | Tabs used for indentation | +| OPE002 | Gate missing prompt | +| OPE003 | Invalid loop max value | + +### Warnings + +| Code | Description | +|------|-------------| +| W001 | Empty session prompt | +| W002 | Whitespace-only session prompt | +| W003 | Prompt exceeds 10,000 characters | +| W004 | Empty prompt property | +| W005 | Unknown property name | +| W006 | Unknown import source format | +| W008 | Unknown permission type | +| W010 | Empty skills array | +| OPW001 | Unbounded loop without max iterations | +| OPW002 | Discretion condition may be ambiguous | +| OPW003 | Legacy `import` syntax | +| OPW004 | Legacy labeled session syntax | +| OPW005 | Legacy session block syntax | +| OPW007 | `input` after executable statements | + +## Rules (.md programs) + +### Errors + +| Code | Description | +|------|-------------| +| V2E001 | Missing YAML frontmatter | +| V2E002 | Unterminated YAML frontmatter | +| V2E003 | Duplicate frontmatter key | +| V2E010 | Missing required field: name | +| V2E011 | Missing required field: kind | +| V2E012 | Unknown component kind | +| V2E013 | Program without nodes/services | +| V2E020 | Unterminated fenced code block | +| V2E030 | Duplicate component name | +| V2E040 | Node declared but not defined in body (single-file mode) | +| V2E050 | No root program file in directory | +| V2E051 | Node file missing from program directory | + +### Warnings + +| Code | Description | +|------|-------------| +| V2W001 | Unknown frontmatter key | +| V2W002 | Unknown component role | +| V2W003 | Missing version | +| V2W004 | Component name contains spaces | +| V2W005 | Kind used in corpus but not in spec (strict only) | +| V2W010 | Empty contract clause | +| V2W011 | Hedging language in ensures clause | +| V2W012 | Strategy clause too terse | +| V2W014 | Service/program-node without ensures | +| V2W015 | Program without requires | +| V2W020 | Component without code block | +| V2W021 | Component code block missing role | +| V2W030 | Component in body but not in frontmatter nodes | + +## WASM + +The linter can be compiled to WASM for browser or plugin use: + +```bash +cargo build --target wasm32-unknown-unknown --release +``` + +The `wasm.rs` module exposes `lint_wasm(source: &str) -> JsValue` for use from JavaScript. diff --git a/tools/lint/build.rs b/tools/lint/build.rs new file mode 100644 index 0000000..75c4439 --- /dev/null +++ b/tools/lint/build.rs @@ -0,0 +1,185 @@ +//! Parses the compiler spec markdown to generate linter vocabulary. +//! +//! Extracts from the co-located compiler.md spec: +//! - Agent property names and known model values +//! - Permission types and values +//! +//! The generated file is written to OUT_DIR/spec_vocab.rs and included +//! by src/lint.rs at compile time. + +use std::collections::BTreeSet; +use std::env; +use std::fs; +use std::path::Path; + +fn main() { + // Resolve compiler.md relative to this crate's position in the repo: + // tools/lint/build.rs → skills/open-prose/compiler.md + let spec_path = Path::new("../../skills/open-prose/compiler.md"); + + println!("cargo:rerun-if-changed={}", spec_path.display()); + + let spec = match fs::read_to_string(spec_path) { + Ok(s) => s, + Err(_) => { + eprintln!( + "cargo:warning=Spec not found at {}, using fallback vocabulary", + spec_path.display() + ); + write_fallback(); + return; + } + }; + + let models = extract_models(&spec); + let agent_props = extract_agent_properties(&spec); + let permission_types = extract_table_column(&spec, "#### Permission Types", 0); + let permission_values = extract_table_column(&spec, "#### Permission Values", 0) + .into_iter() + .filter(|v| v != "Array") + .collect::>(); + + let out_dir = env::var("OUT_DIR").unwrap(); + let out_path = Path::new(&out_dir).join("spec_vocab.rs"); + + let code = format!( + r#"// Auto-generated from compiler.md — do not edit manually. + +pub const SPEC_MODELS: &[&str] = &[{models}]; +pub const SPEC_AGENT_PROPERTIES: &[&str] = &[{agent_props}]; +pub const SPEC_PERMISSION_TYPES: &[&str] = &[{perm_types}]; +pub const SPEC_PERMISSION_VALUES: &[&str] = &[{perm_values}]; +"#, + models = format_str_slice(&models), + agent_props = format_str_slice(&agent_props), + perm_types = format_str_slice(&permission_types), + perm_values = format_str_slice(&permission_values), + ); + + fs::write(&out_path, code).expect("failed to write spec_vocab.rs"); +} + +fn write_fallback() { + let out_dir = env::var("OUT_DIR").unwrap(); + let out_path = Path::new(&out_dir).join("spec_vocab.rs"); + fs::write( + &out_path, + r#"// Fallback — compiler.md not found. +pub const SPEC_MODELS: &[&str] = &[]; +pub const SPEC_AGENT_PROPERTIES: &[&str] = &[]; +pub const SPEC_PERMISSION_TYPES: &[&str] = &[]; +pub const SPEC_PERMISSION_VALUES: &[&str] = &[]; +"#, + ) + .expect("failed to write fallback spec_vocab.rs"); +} + +fn extract_models(spec: &str) -> BTreeSet { + let mut models = BTreeSet::new(); + for line in spec.lines() { + if line.contains("`model`") && line.contains("identifier") { + let cols: Vec<&str> = line.split('|').collect(); + if cols.len() >= 4 { + for val in cols[3].split(',') { + let val = val.trim().trim_matches('`').trim(); + if !val.is_empty() + && val + .chars() + .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_') + { + models.insert(val.to_string()); + } + } + } + } + } + models +} + +fn extract_agent_properties(spec: &str) -> BTreeSet { + let mut props = BTreeSet::new(); + let mut in_agent_table = false; + + for line in spec.lines() { + if line.contains("| Property") + && line.contains("| Type") + && line.contains("| Values") + { + in_agent_table = true; + continue; + } + if in_agent_table && line.starts_with("| -") { + continue; + } + if in_agent_table && !line.starts_with('|') { + in_agent_table = false; + continue; + } + if in_agent_table { + let cols: Vec<&str> = line.split('|').collect(); + if cols.len() >= 2 { + let prop = cols[1].trim().trim_matches('`').trim(); + if !prop.is_empty() && prop != "Property" { + props.insert(prop.to_string()); + } + } + } + } + props +} + +fn extract_table_column(spec: &str, heading: &str, col_index: usize) -> BTreeSet { + let mut values = BTreeSet::new(); + let mut found_heading = false; + let mut in_table = false; + + for line in spec.lines() { + if line.trim() == heading { + found_heading = true; + continue; + } + if !found_heading { + continue; + } + if !in_table { + if line.starts_with('|') { + in_table = true; + if line.starts_with("| -") { + // separator row — skip without extracting values + } + continue; + } else if !line.trim().is_empty() { + continue; + } + } + if in_table { + if line.starts_with("| -") { + continue; + } + if !line.starts_with('|') { + break; + } + let cols: Vec<&str> = line.split('|').collect(); + if cols.len() > col_index + 1 { + let val = cols[col_index + 1].trim().trim_matches('`').trim(); + if !val.is_empty() + && val != "Type" + && val != "Value" + && val + .chars() + .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_') + { + values.insert(val.to_string()); + } + } + } + } + values +} + +fn format_str_slice(set: &BTreeSet) -> String { + set.iter() + .map(|s| format!("\"{s}\"")) + .collect::>() + .join(", ") +} diff --git a/tools/lint/fixtures/invalid/mixed.prose b/tools/lint/fixtures/invalid/mixed.prose new file mode 100644 index 0000000..87775ee --- /dev/null +++ b/tools/lint/fixtures/invalid/mixed.prose @@ -0,0 +1,20 @@ +session "" + +input topic: "too late" + +agent researcher: + model: turbo + prompt: "" + prompt: "duplicate" + permissions: deny + +agent researcher: + model: sonnet + +resume: reviewer + +loop: + session "forever" + +gate deploy: + allow: ["yes", "no"] diff --git a/tools/lint/fixtures/valid/basic.prose b/tools/lint/fixtures/valid/basic.prose new file mode 100644 index 0000000..18a7a22 --- /dev/null +++ b/tools/lint/fixtures/valid/basic.prose @@ -0,0 +1,24 @@ +use "@anthropic/web-search" + +input topic: "Topic to research" + +agent researcher: + model: sonnet + prompt: "Research carefully" + persist: project + skills: ["web-search"] + permissions: + read: ["README.md"] + bash: deny + +let notes = session: researcher + prompt: "Research {topic}" + +loop (max: 3): + session "Refine the findings" + context: notes + +resume: researcher + prompt: "Summarize the findings" + +output result = notes diff --git a/tools/lint/src/diag.rs b/tools/lint/src/diag.rs new file mode 100644 index 0000000..3515331 --- /dev/null +++ b/tools/lint/src/diag.rs @@ -0,0 +1,47 @@ +use std::fmt::{Display, Formatter}; +use std::path::PathBuf; + +#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd)] +pub enum Severity { + Error, + Warning, +} + +impl Display for Severity { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + Self::Error => f.write_str("error"), + Self::Warning => f.write_str("warning"), + } + } +} + +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct Diagnostic { + pub code: &'static str, + pub severity: Severity, + pub message: String, + pub path: PathBuf, + pub line: usize, + pub column: usize, +} + +impl Diagnostic { + pub fn new( + path: &std::path::Path, + code: &'static str, + severity: Severity, + message: impl Into, + line: usize, + column: usize, + ) -> Self { + Self { + code, + severity, + message: message.into(), + path: path.to_path_buf(), + line, + column, + } + } +} diff --git a/tools/lint/src/fs.rs b/tools/lint/src/fs.rs new file mode 100644 index 0000000..bdda430 --- /dev/null +++ b/tools/lint/src/fs.rs @@ -0,0 +1,48 @@ +use anyhow::{Context, Result}; +use std::path::{Path, PathBuf}; +use walkdir::WalkDir; + +pub fn collect_prose_files(targets: &[PathBuf]) -> Result> { + let mut files = Vec::new(); + + for target in targets { + if target.is_file() { + if is_prose_file(target) { + files.push( + target + .canonicalize() + .with_context(|| format!("canonicalize {}", target.display()))?, + ); + } + continue; + } + + if target.is_dir() { + for entry in WalkDir::new(target) + .into_iter() + .filter_map(|entry| entry.ok()) + .filter(|entry| entry.file_type().is_file()) + { + if is_prose_file(entry.path()) { + files.push( + entry + .path() + .canonicalize() + .with_context(|| format!("canonicalize {}", entry.path().display()))?, + ); + } + } + continue; + } + + anyhow::bail!("path does not exist: {}", target.display()); + } + + files.sort(); + files.dedup(); + Ok(files) +} + +pub fn is_prose_file(path: &Path) -> bool { + path.extension().and_then(|ext| ext.to_str()) == Some("prose") +} diff --git a/tools/lint/src/lib.rs b/tools/lint/src/lib.rs new file mode 100644 index 0000000..74307e2 --- /dev/null +++ b/tools/lint/src/lib.rs @@ -0,0 +1,15 @@ +pub mod diag; +#[cfg(not(target_arch = "wasm32"))] +pub mod fs; +pub mod lint; +pub mod lint_legacy; +pub mod profile; + +#[cfg(target_arch = "wasm32")] +pub mod wasm; + +pub use diag::{Diagnostic, Severity}; +pub use lint_legacy::{LintResult as LegacyLintResult, count_diagnostics, lint_source, lint_source_with_profile}; +#[cfg(not(target_arch = "wasm32"))] +pub use lint_legacy::{lint_path, lint_path_with_profile, lint_paths, lint_paths_with_profile}; +pub use profile::LintProfile; diff --git a/tools/lint/src/lint.rs b/tools/lint/src/lint.rs new file mode 100644 index 0000000..2cf544a --- /dev/null +++ b/tools/lint/src/lint.rs @@ -0,0 +1,1597 @@ +use crate::diag::{Diagnostic, Severity}; +use crate::profile::LintProfile; +use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; +use std::path::{Path, PathBuf}; + +#[cfg(not(target_arch = "wasm32"))] +use anyhow::{Context, Result}; + +// ── Lint Result ────────────────────────────────────────────────────────── + +#[derive(Clone, Debug)] +pub struct LintResult { + pub path: PathBuf, + pub diagnostics: Vec, +} + +// ── Frontmatter ────────────────────────────────────────────────────────── + +#[derive(Clone, Debug, Default)] +pub struct Frontmatter { + pub name: Option, + pub kind: Option, + pub version: Option, + pub nodes: Vec, + pub role: Option, + pub api: Vec, + pub delegates: Vec, + pub prohibited: Vec, + pub slots: Vec, + pub requires: Vec, + pub ensures: Vec, + pub description: Option, + pub all_keys: HashMap, // key -> line number +} + +// ── Contract Sections (Markdown body) ──────────────────────────────────── + +#[derive(Clone, Debug, Default)] +struct ContractSections { + requires: Vec, + ensures: Vec, + errors: Vec, + invariants: Vec, + strategies: Vec, +} + +#[derive(Clone, Debug)] +struct ContractItem { + text: String, + line: usize, +} + +// ── Heading classification ────────────────────────────────────────────────── + +#[derive(Clone, Debug, PartialEq, Eq)] +enum HeadingKind { + /// Executable component: matches a node name, or kebab-case identifier + Component, + /// State schema: prefixed with & + StateSchema, + /// Documentation/structural heading + Documentation, +} + +#[derive(Clone, Debug)] +struct Heading { + name: String, + line: usize, + #[allow(dead_code)] + level: u8, // 2 for ##, 3 for ### + kind: HeadingKind, + has_code_block: bool, + code_block_fields: HashSet, +} + +// ── Known vocabulary (from spec) ──────────────────────────────────────────── +// These are what the spec explicitly documents. + +const SPEC_FRONTMATTER_KEYS: &[&str] = &[ + "name", "kind", "version", "description", + "nodes", "services", + "role", "api", "state", "shape", + "requires", "ensures", "errors", "invariants", "strategies", + "prohibited", +]; + +const SPEC_KINDS: &[&str] = &[ + "program", "program-node", "service", +]; + +const SPEC_ROLES: &[&str] = &[ + "orchestrator", "coordinator", "leaf", +]; + +const KNOWN_CONTRACT_SECTIONS: &[&str] = &[ + "requires", "ensures", "errors", "invariants", "strategies", +]; + +// ── Extended vocabulary (observed in press corpus, not yet in spec) ────────── + +const CORPUS_FRONTMATTER_KEYS: &[&str] = &[ + // Delegation & state (used by all program-node files) + "delegates", "reads", "writes", "components", "slots", + // Shape sub-keys used at top level + "self", + // Driver/profile keys + "author", "tags", "models", "drivers", + // Code block field keys sometimes in frontmatter + "capability", "principles", "given", + // Misc + "related", "purpose", "glossary", +]; + +const CORPUS_KINDS: &[&str] = &[ + "driver", "profile", +]; + +// ── Rule codes ────────────────────────────────────────────────────────────── +// +// V2E001–V2E009: structural (frontmatter delimiters) +// V2E010–V2E019: required frontmatter fields +// V2E020–V2E029: body structure +// V2E030–V2E039: component validation +// V2E040–V2E049: cross-validation (single-file) +// V2E050–V2E059: cross-validation (multi-file) +// +// V2W001–V2W009: frontmatter vocabulary +// V2W010–V2W019: contract quality +// V2W020–V2W029: component quality +// V2W030–V2W039: cross-validation warnings + +// ── Public API ────────────────────────────────────────────────────────────── + +#[cfg(not(target_arch = "wasm32"))] +pub fn lint_path(path: &Path) -> Result { + lint_path_with_profile(path, LintProfile::Compat) +} + +#[cfg(not(target_arch = "wasm32"))] +pub fn lint_path_with_profile(path: &Path, profile: LintProfile) -> Result { + let source = std::fs::read_to_string(path) + .with_context(|| format!("read {}", path.display()))?; + Ok(lint_source_with_profile(path, &source, profile)) +} + +pub fn lint_source(path: &Path, source: &str) -> LintResult { + lint_source_with_profile(path, source, LintProfile::Compat) +} + +pub fn lint_source_with_profile( + path: &Path, + source: &str, + profile: LintProfile, +) -> LintResult { + lint_source_inner(path, source, profile, false) +} + +fn lint_source_inner( + path: &Path, + source: &str, + profile: LintProfile, + multi_file: bool, +) -> LintResult { + let mut diagnostics = Vec::new(); + + let (frontmatter, body_start) = parse_frontmatter(path, source, &mut diagnostics); + validate_frontmatter(path, &frontmatter, profile, &mut diagnostics); + + let body = if body_start < source.lines().count() { + source.lines().skip(body_start).collect::>().join("\n") + } else { + String::new() + }; + + let (headings, contract_sections) = + parse_markdown_body(path, &body, body_start, &frontmatter, &mut diagnostics); + + validate_contracts(path, &frontmatter, &contract_sections, &mut diagnostics); + validate_headings(path, &frontmatter, &headings, &mut diagnostics); + cross_validate(path, &frontmatter, &headings, multi_file, &mut diagnostics); + + diagnostics.sort_by(|a, b| (a.line, a.column, &a.code).cmp(&(b.line, b.column, &b.code))); + + LintResult { + path: path.to_path_buf(), + diagnostics, + } +} + +#[cfg(not(target_arch = "wasm32"))] +pub fn collect_files(targets: &[PathBuf]) -> Result> { + use walkdir::WalkDir; + + let mut files = Vec::new(); + for target in targets { + if target.is_file() { + if is_prose_md_file(target) { + files.push(target.canonicalize() + .with_context(|| format!("canonicalize {}", target.display()))?); + } + continue; + } + if target.is_dir() { + for entry in WalkDir::new(target) + .into_iter() + .filter_map(|e| e.ok()) + .filter(|e| e.file_type().is_file()) + { + if is_prose_md_file(entry.path()) { + files.push(entry.path().canonicalize() + .with_context(|| format!("canonicalize {}", entry.path().display()))?); + } + } + } + } + files.sort(); + files.dedup(); + Ok(files) +} + +#[cfg(not(target_arch = "wasm32"))] +pub fn lint_paths_with_profile( + targets: &[PathBuf], + profile: LintProfile, +) -> Result> { + let mut results = Vec::new(); + let mut handled_dirs: HashSet = HashSet::new(); + + for target in targets { + if target.is_dir() { + lint_dir_recursive(target, profile, &mut results, &mut handled_dirs)?; + } else if target.is_file() && is_prose_md_file(target) { + // Single file — check if its parent is a program dir + if let Some(parent) = target.parent() { + if is_program_dir(parent) && handled_dirs.insert(parent.to_path_buf()) { + results.extend(lint_program_dir(parent, profile)?); + } else if !handled_dirs.contains(parent) { + results.push(lint_path_with_profile(target, profile)?); + } + } else { + results.push(lint_path_with_profile(target, profile)?); + } + } + } + + Ok(results) +} + +/// Recursively discover program directories and standalone .md files. +#[cfg(not(target_arch = "wasm32"))] +fn lint_dir_recursive( + dir: &Path, + profile: LintProfile, + results: &mut Vec, + handled_dirs: &mut HashSet, +) -> Result<()> { + if is_program_dir(dir) { + if handled_dirs.insert(dir.to_path_buf()) { + results.extend(lint_program_dir(dir, profile)?); + } + return Ok(()); + } + + // Not a program dir — check subdirectories and standalone files + let mut subdirs = Vec::new(); + let mut loose_files = Vec::new(); + + for entry in std::fs::read_dir(dir)?.flatten() { + let path = entry.path(); + if path.is_dir() && !path.file_name().map(|n| n.to_string_lossy().starts_with('.')).unwrap_or(true) { + subdirs.push(path); + } else if path.is_file() && is_prose_md_file(&path) { + loose_files.push(path); + } + } + + // Recurse into subdirectories + for subdir in subdirs { + lint_dir_recursive(&subdir, profile, results, handled_dirs)?; + } + + // Lint loose .md files in this directory (not part of any program dir) + for file in loose_files { + results.push(lint_path_with_profile(&file, profile)?); + } + + Ok(()) +} + +/// Check if a directory is a multi-file program (contains a kind: program root). +#[cfg(not(target_arch = "wasm32"))] +fn is_program_dir(dir: &Path) -> bool { + let Ok(entries) = std::fs::read_dir(dir) else { + return false; + }; + for entry in entries.flatten() { + let path = entry.path(); + if path.extension().and_then(|e| e.to_str()) != Some("md") { + continue; + } + if let Ok(content) = std::fs::read_to_string(&path) + && looks_like_prose_md(&content) + && (content.contains("\nkind: program\n") + || content.contains("\nkind: program\r") + || content.starts_with("---\nkind: program\n")) + { + return true; + } + } + false +} + +// ── Detection ─────────────────────────────────────────────────────────────── + +pub fn is_prose_md_file(path: &Path) -> bool { + let ext = path.extension().and_then(|e| e.to_str()); + if ext != Some("md") { + return false; + } + if let Ok(content) = std::fs::read_to_string(path) { + return looks_like_prose_md(&content); + } + false +} + +pub fn looks_like_prose_md(source: &str) -> bool { + if !source.starts_with("---") { + return false; + } + if let Some(end) = source[3..].find("\n---") { + let frontmatter = &source[3..3 + end]; + frontmatter.lines().any(|line| line.trim().starts_with("kind:")) + } else { + false + } +} + +// ── Frontmatter Parsing ───────────────────────────────────────────────────── + +fn parse_frontmatter( + path: &Path, + source: &str, + diagnostics: &mut Vec, +) -> (Frontmatter, usize) { + let mut fm = Frontmatter::default(); + + if !source.starts_with("---") { + diagnostics.push(Diagnostic::new( + path, "V2E001", Severity::Error, + "Missing YAML frontmatter (file must start with ---)", + 1, 1, + )); + return (fm, 0); + } + + let after_open = &source[3..]; + let Some(end_pos) = after_open.find("\n---") else { + diagnostics.push(Diagnostic::new( + path, "V2E002", Severity::Error, + "Unterminated YAML frontmatter (missing closing ---)", + 1, 1, + )); + return (fm, source.lines().count()); + }; + + let fm_text = &after_open[1..end_pos]; // skip newline after opening --- + let fm_end_line = fm_text.lines().count() + 2; + let body_start = fm_end_line; + + // Track nesting depth for multi-level YAML + let mut current_top_key: Option = None; + let mut in_list = false; + let mut current_list: Vec = Vec::new(); + + for (idx, line) in fm_text.lines().enumerate() { + let line_num = idx + 2; + let trimmed = line.trim(); + + if trimmed.is_empty() { + continue; + } + + let indent = line.len() - line.trim_start().len(); + + // Nested key (indented under a top-level key like state:) + if indent > 0 { + if let Some(item) = trimmed.strip_prefix("- ") { + // List item + if in_list { + current_list.push(item.trim().to_string()); + } + } + // Sub-keys under state:, shape:, etc. — don't flag as unknown + continue; + } + + // Flush pending list + if in_list { + if let Some(ref key) = current_top_key { + apply_frontmatter_list(&mut fm, key, ¤t_list); + } + current_list.clear(); + in_list = false; + } + + // Top-level key: value + if let Some(colon_pos) = trimmed.find(':') { + let key = trimmed[..colon_pos].trim(); + let value = trimmed[colon_pos + 1..].trim(); + + // Check for unknown top-level keys + if !SPEC_FRONTMATTER_KEYS.contains(&key) + && !CORPUS_FRONTMATTER_KEYS.contains(&key) + && !key.contains(' ') + { + diagnostics.push(Diagnostic::new( + path, "V2W001", Severity::Warning, + format!("Unknown frontmatter key: `{key}`"), + line_num, 1, + )); + } + + // Flag keys in corpus but not in spec (informational in strict mode) + // This is useful for spec discovery but not an error. + + // Check for duplicate top-level keys + if let Some(prev_line) = fm.all_keys.insert(key.to_string(), line_num) { + diagnostics.push(Diagnostic::new( + path, "V2E003", Severity::Error, + format!("Duplicate frontmatter key `{key}` (first at line {prev_line})"), + line_num, 1, + )); + } + + current_top_key = Some(key.to_string()); + + if value.is_empty() { + // Start of nested block or list + in_list = true; + continue; + } + + // Inline array: [a, b, c] + if value.starts_with('[') && value.ends_with(']') { + let items: Vec = value[1..value.len() - 1] + .split(',') + .map(|s| s.trim().to_string()) + .filter(|s| !s.is_empty()) + .collect(); + apply_frontmatter_value(&mut fm, key, value, &items); + } else { + apply_frontmatter_value(&mut fm, key, value, &[]); + } + } + } + + // Flush trailing list + if in_list + && let Some(ref key) = current_top_key { + apply_frontmatter_list(&mut fm, key, ¤t_list); + } + + (fm, body_start) +} + +fn apply_frontmatter_value(fm: &mut Frontmatter, key: &str, value: &str, items: &[String]) { + match key { + "name" => fm.name = Some(value.to_string()), + "kind" => fm.kind = Some(value.to_string()), + "version" => fm.version = Some(value.to_string()), + "description" => fm.description = Some(value.to_string()), + "role" => fm.role = Some(value.to_string()), + "nodes" | "services" => { + if !items.is_empty() { + fm.nodes = items.to_vec(); + } else { + fm.nodes = vec![value.to_string()]; + } + } + "api" => { + if !items.is_empty() { fm.api = items.to_vec(); } + } + "delegates" => { + if !items.is_empty() { fm.delegates = items.to_vec(); } + } + "prohibited" => { + if !items.is_empty() { + fm.prohibited = items.to_vec(); + } else if !value.is_empty() { + fm.prohibited = value.split(',').map(|s| s.trim().to_string()).filter(|s| !s.is_empty()).collect(); + } + } + "slots" => { + if !items.is_empty() { fm.slots = items.to_vec(); } + } + _ => {} + } +} + +fn apply_frontmatter_list(fm: &mut Frontmatter, key: &str, items: &[String]) { + match key { + "nodes" | "services" => fm.nodes = items.to_vec(), + "api" => fm.api = items.to_vec(), + "delegates" => fm.delegates = items.to_vec(), + "prohibited" => fm.prohibited = items.to_vec(), + "slots" => fm.slots = items.to_vec(), + "requires" => fm.requires = items.to_vec(), + "ensures" => fm.ensures = items.to_vec(), + _ => {} + } +} + +// ── Frontmatter Validation ────────────────────────────────────────────────── + +fn validate_frontmatter( + path: &Path, + fm: &Frontmatter, + profile: LintProfile, + diagnostics: &mut Vec, +) { + // V2E010: missing name + if fm.name.is_none() { + diagnostics.push(Diagnostic::new( + path, "V2E010", Severity::Error, + "Missing required frontmatter field: name", + 1, 1, + )); + } + + // V2E011: missing kind + if fm.kind.is_none() { + diagnostics.push(Diagnostic::new( + path, "V2E011", Severity::Error, + "Missing required frontmatter field: kind", + 1, 1, + )); + } + + // V2E012: unknown kind (strict = error, compat = warning for corpus kinds) + if let Some(ref kind) = fm.kind + && !SPEC_KINDS.contains(&kind.as_str()) { + if CORPUS_KINDS.contains(&kind.as_str()) { + // In corpus but not in spec — warn in strict, skip in compat + if profile == LintProfile::Strict { + diagnostics.push(Diagnostic::new( + path, "V2W005", Severity::Warning, + format!("Component kind `{kind}` is used in the Press corpus but not documented in the spec"), + 1, 1, + )); + } + } else { + diagnostics.push(Diagnostic::new( + path, "V2E012", Severity::Error, + format!("Unknown component kind: `{kind}` (spec: {}; corpus: {})", + SPEC_KINDS.join(", "), CORPUS_KINDS.join(", ")), + 1, 1, + )); + } + } + + // V2W002: unknown role + if let Some(ref role) = fm.role + && !SPEC_ROLES.contains(&role.as_str()) { + diagnostics.push(Diagnostic::new( + path, "V2W002", Severity::Warning, + format!("Unknown component role: `{role}` (expected: {})", + SPEC_ROLES.join(", ")), + 1, 1, + )); + } + + // V2E013: program must have nodes/services + if let Some(ref kind) = fm.kind + && kind == "program" && fm.nodes.is_empty() { + diagnostics.push(Diagnostic::new( + path, "V2E013", Severity::Error, + "Program must declare `nodes:` or `services:` listing its components", + 1, 1, + )); + } + + // V2W003: version missing + if fm.version.is_none() { + diagnostics.push(Diagnostic::new( + path, "V2W003", Severity::Warning, + "Missing version in frontmatter", + 1, 1, + )); + } + + // V2W004: name contains spaces + if let Some(ref name) = fm.name + && name.contains(' ') { + diagnostics.push(Diagnostic::new( + path, "V2W004", Severity::Warning, + format!("Component name `{name}` contains spaces; prefer kebab-case"), + 1, 1, + )); + } +} + +// ── Markdown Body Parsing ─────────────────────────────────────────────────── + +fn classify_heading(name: &str, fm_nodes: &HashSet) -> HeadingKind { + // &-prefixed = state schema + if name.starts_with('&') { + return HeadingKind::StateSchema; + } + + // Exact match to a declared node = always a component + if fm_nodes.contains(&name.to_lowercase()) { + return HeadingKind::Component; + } + + // Starts with a digit = numbered step (documentation) + if name.starts_with(|c: char| c.is_ascii_digit()) { + return HeadingKind::Documentation; + } + + // Contains spaces = almost certainly documentation + // Exception: single-word PascalCase could be a schema name, but those + // aren't components either (BriefAdherence, CurationAdherence, etc.) + if name.contains(' ') { + return HeadingKind::Documentation; + } + + // PascalCase without hyphens = schema/type name, not a component + // Components use kebab-case (game-solver, level-solver) or lowercase (oha, searcher) + if name.chars().next().map(|c| c.is_ascii_uppercase()).unwrap_or(false) + && !name.contains('-') + && name.chars().any(|c| c.is_ascii_lowercase()) + { + return HeadingKind::Documentation; + } + + // kebab-case or lowercase identifiers = likely component + let looks_like_component = !name.is_empty() + && name.chars().all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_') + && name.chars().next().map(|c| c.is_ascii_lowercase()).unwrap_or(false); + + if looks_like_component { + return HeadingKind::Component; + } + + HeadingKind::Documentation +} + +fn parse_markdown_body( + path: &Path, + body: &str, + body_offset: usize, + fm: &Frontmatter, + diagnostics: &mut Vec, +) -> (Vec, ContractSections) { + let mut headings = Vec::new(); + let mut sections = ContractSections::default(); + let fm_nodes: HashSet = fm.nodes.iter().map(|n| n.to_lowercase()).collect(); + + let mut current_heading: Option = None; + let mut current_section: Option = None; + let mut in_code_block = false; + let mut code_block_content = String::new(); + + for (idx, line) in body.lines().enumerate() { + let line_num = body_offset + idx + 1; + let trimmed = line.trim(); + + // Track fenced code blocks + if trimmed.starts_with("```") { + if in_code_block { + // Closing — parse fields if inside a heading + if let Some(ref mut h) = current_heading { + h.has_code_block = true; + for cb_line in code_block_content.lines() { + let cb_trimmed = cb_line.trim(); + if let Some(colon_pos) = cb_trimmed.find(':') { + let field = cb_trimmed[..colon_pos].trim(); + if !field.is_empty() { + h.code_block_fields.insert(field.to_lowercase()); + } + } + } + } + + // Extract contracts from inside code blocks. + // The corpus puts requires:/ensures: inside ``` blocks + // (often under ## Contract). Parse them as contract items. + parse_code_block_contracts( + &code_block_content, + body_offset + idx.saturating_sub(code_block_content.lines().count()), + &mut sections, + ); + + in_code_block = false; + code_block_content.clear(); + } else { + in_code_block = true; + } + continue; + } + + if in_code_block { + code_block_content.push_str(line); + code_block_content.push('\n'); + continue; + } + + // ## heading + if let Some(heading_text) = trimmed.strip_prefix("## ") { + let heading_lower = heading_text.to_lowercase(); + + // Flush pending heading + if let Some(h) = current_heading.take() { + headings.push(h); + } + + if KNOWN_CONTRACT_SECTIONS.contains(&heading_lower.as_str()) { + current_section = Some(heading_lower); + } else { + current_section = None; + // ## headings are structural — don't classify as components + } + continue; + } + + // ### heading + if let Some(heading_text) = trimmed.strip_prefix("### ") { + if let Some(h) = current_heading.take() { + headings.push(h); + } + + let kind = classify_heading(heading_text.trim(), &fm_nodes); + current_heading = Some(Heading { + name: heading_text.trim().to_string(), + line: line_num, + level: 3, + kind, + has_code_block: false, + code_block_fields: HashSet::new(), + }); + current_section = None; + continue; + } + + // Bare contract section markers (e.g., `requires:` at top level without ## heading) + // The test fixtures use this pattern: frontmatter, then `requires:\n- item` + if current_heading.is_none() { + let trimmed_lower = trimmed.to_lowercase(); + if trimmed_lower == "requires:" || trimmed_lower.starts_with("requires:") { + current_section = Some("requires".to_string()); + continue; + } else if trimmed_lower == "ensures:" || trimmed_lower.starts_with("ensures:") { + current_section = Some("ensures".to_string()); + continue; + } else if trimmed_lower == "errors:" || trimmed_lower.starts_with("errors:") { + current_section = Some("errors".to_string()); + continue; + } else if trimmed_lower == "invariants:" || trimmed_lower.starts_with("invariants:") { + current_section = Some("invariants".to_string()); + continue; + } else if trimmed_lower == "strategies:" || trimmed_lower.starts_with("strategies:") { + current_section = Some("strategies".to_string()); + continue; + } + } + + // List items in contract sections + if let Some(ref section) = current_section + && let Some(item_text) = trimmed.strip_prefix("- ") { + let item = ContractItem { + text: item_text.to_string(), + line: line_num, + }; + match section.as_str() { + "requires" => sections.requires.push(item), + "ensures" => sections.ensures.push(item), + "errors" => sections.errors.push(item), + "invariants" => sections.invariants.push(item), + "strategies" => sections.strategies.push(item), + _ => {} + } + } + } + + // Flush trailing heading + if let Some(h) = current_heading { + headings.push(h); + } + + if in_code_block { + diagnostics.push(Diagnostic::new( + path, "V2E020", Severity::Error, + "Unterminated fenced code block", + body_offset + body.lines().count(), 1, + )); + } + + (headings, sections) +} + +/// Extract requires:/ensures:/errors:/invariants:/strategies: from inside a +/// fenced code block. The corpus commonly puts contracts inside ``` blocks +/// under ## Contract rather than as ## requires/## ensures headings. +fn parse_code_block_contracts( + content: &str, + base_line: usize, + sections: &mut ContractSections, +) { + let mut current_key: Option<&str> = None; + + for (idx, line) in content.lines().enumerate() { + let trimmed = line.trim(); + let line_num = base_line + idx + 1; + + // Top-level key (not indented, ends with colon) + if !trimmed.is_empty() && !line.starts_with(' ') && !line.starts_with('\t') { + if trimmed == "requires:" || trimmed.starts_with("requires:") { + current_key = Some("requires"); + continue; + } else if trimmed == "ensures:" || trimmed.starts_with("ensures:") { + current_key = Some("ensures"); + continue; + } else if trimmed == "errors:" || trimmed.starts_with("errors:") { + current_key = Some("errors"); + continue; + } else if trimmed == "invariants:" || trimmed.starts_with("invariants:") { + current_key = Some("invariants"); + continue; + } else if trimmed == "strategies:" || trimmed.starts_with("strategies:") { + current_key = Some("strategies"); + continue; + } else { + // Some other top-level key — stop collecting for current section + current_key = None; + continue; + } + } + + // Indented line under a contract key — treat as a contract item + if let Some(key) = current_key { + if let Some(item_text) = trimmed.strip_prefix("- ") { + let item = ContractItem { + text: item_text.to_string(), + line: line_num, + }; + match key { + "requires" => sections.requires.push(item), + "ensures" => sections.ensures.push(item), + "errors" => sections.errors.push(item), + "invariants" => sections.invariants.push(item), + "strategies" => sections.strategies.push(item), + _ => {} + } + } + // Non-list indented lines (continuation of previous item) — skip + } + } +} + +// ── Contract Validation ───────────────────────────────────────────────────── + +fn validate_contracts( + path: &Path, + fm: &Frontmatter, + sections: &ContractSections, + diagnostics: &mut Vec, +) { + for item in §ions.requires { + if item.text.trim().is_empty() { + diagnostics.push(Diagnostic::new( + path, "V2W010", Severity::Warning, + "Empty requires clause", + item.line, 1, + )); + } + } + + for item in §ions.ensures { + if item.text.trim().is_empty() { + diagnostics.push(Diagnostic::new( + path, "V2W010", Severity::Warning, + "Empty ensures clause", + item.line, 1, + )); + } + } + + // Hedging language in ensures + for item in §ions.ensures { + let lower = item.text.to_lowercase(); + if lower.starts_with("should ") || lower.contains(" should ") || + lower.starts_with("might ") || lower.contains(" might ") || + lower.starts_with("may ") || lower.contains(" may ") { + diagnostics.push(Diagnostic::new( + path, "V2W011", Severity::Warning, + "Ensures clause uses hedging language (should/might/may); ensures are obligations, not suggestions", + item.line, 1, + )); + } + } + + for item in §ions.strategies { + if item.text.trim().len() < 10 { + diagnostics.push(Diagnostic::new( + path, "V2W012", Severity::Warning, + "Strategy clause may be too terse to guide model behavior", + item.line, 1, + )); + } + } + + // V2W014: service/program-node without ensures (a component that guarantees nothing) + let kind = fm.kind.as_deref().unwrap_or(""); + if (kind == "service" || kind == "program-node") + && sections.ensures.is_empty() + && fm.ensures.is_empty() + { + diagnostics.push(Diagnostic::new( + path, "V2W014", Severity::Warning, + format!("Component of kind `{kind}` has no ensures clauses (not found in frontmatter, ## ensures section, bare ensures:, or code block contracts)"), + 1, 1, + )); + } + + // V2W015: program without requires (inputs never specified) + if kind == "program" + && sections.requires.is_empty() + && fm.requires.is_empty() + { + diagnostics.push(Diagnostic::new( + path, "V2W015", Severity::Warning, + "Program has no requires clauses — callers won't know what inputs to provide", + 1, 1, + )); + } +} + +// ── Heading Validation ────────────────────────────────────────────────────── + +fn validate_headings( + path: &Path, + _fm: &Frontmatter, + headings: &[Heading], + diagnostics: &mut Vec, +) { + // Duplicate component names + let mut seen: HashMap = HashMap::new(); + for h in headings { + if h.kind != HeadingKind::Component { + continue; + } + let lower = h.name.to_lowercase(); + if let Some(prev_line) = seen.insert(lower, h.line) { + diagnostics.push(Diagnostic::new( + path, "V2E030", Severity::Error, + format!("Duplicate component name `{}` (first at line {})", h.name, prev_line), + h.line, 1, + )); + } + } + + // Component without code block (only for actual components, not docs/state) + for h in headings { + if h.kind == HeadingKind::Component && !h.has_code_block { + diagnostics.push(Diagnostic::new( + path, "V2W020", Severity::Warning, + format!("Component `{}` has no fenced code block defining its contract", h.name), + h.line, 1, + )); + } + } + + // Component code block missing role + for h in headings { + if h.kind == HeadingKind::Component && h.has_code_block && !h.code_block_fields.contains("role") { + diagnostics.push(Diagnostic::new( + path, "V2W021", Severity::Warning, + format!("Component `{}` code block does not declare a role", h.name), + h.line, 1, + )); + } + } +} + +// ── Cross-validation ──────────────────────────────────────────────────────── + +fn cross_validate( + path: &Path, + fm: &Frontmatter, + headings: &[Heading], + multi_file: bool, + diagnostics: &mut Vec, +) { + if fm.kind.as_deref() != Some("program") { + return; + } + + let component_names: HashSet = headings + .iter() + .filter(|h| h.kind == HeadingKind::Component) + .map(|h| h.name.to_lowercase()) + .collect(); + + // V2E040: node declared but not in body (only single-file mode) + if !multi_file { + for node in &fm.nodes { + let lower = node.to_lowercase(); + if !component_names.contains(&lower) { + diagnostics.push(Diagnostic::new( + path, "V2E040", Severity::Error, + format!("Node `{node}` declared in frontmatter but not defined as a ### component in body"), + 1, 1, + )); + } + } + } + + // V2W030: component in body but not in frontmatter nodes + let fm_nodes: HashSet = fm.nodes.iter().map(|n| n.to_lowercase()).collect(); + for h in headings { + if h.kind != HeadingKind::Component { + continue; + } + let lower = h.name.to_lowercase(); + if !fm_nodes.contains(&lower) { + diagnostics.push(Diagnostic::new( + path, "V2W030", Severity::Warning, + format!("Component `{}` defined in body but not listed in frontmatter nodes/services", h.name), + h.line, 1, + )); + } + } +} + +// ── Multi-file Program Directory ──────────────────────────────────────────── + +#[cfg(not(target_arch = "wasm32"))] +pub fn lint_program_dir( + dir: &Path, + profile: LintProfile, +) -> Result> { + let mut results = Vec::new(); + let mut root_path = None; + let mut root_nodes = Vec::new(); + + for entry in std::fs::read_dir(dir)? { + let entry = entry?; + let path = entry.path(); + if path.extension().and_then(|e| e.to_str()) != Some("md") { + continue; + } + if let Ok(content) = std::fs::read_to_string(&path) + && looks_like_prose_md(&content) { + let result = lint_source_inner(&path, &content, profile, true); + + if content.contains("\nkind: program") || content.starts_with("---\nkind: program") { + root_path = Some(path.clone()); + let (fm, _) = parse_frontmatter(&path, &content, &mut Vec::new()); + root_nodes = fm.nodes.clone(); + } + + results.push(result); + } + } + + // V2E050: no root program file + if root_path.is_none() && !results.is_empty() { + let dir_path = dir.to_path_buf(); + results.push(LintResult { + path: dir_path.clone(), + diagnostics: vec![Diagnostic::new( + &dir_path, "V2E050", Severity::Error, + "No root program file found (no file with `kind: program`)", + 1, 1, + )], + }); + } + + // V2E051: node file missing + if let Some(ref rp) = root_path { + let existing_files: HashSet = std::fs::read_dir(dir)? + .filter_map(|e| e.ok()) + .filter_map(|e| { + let p = e.path(); + if p.extension().and_then(|ext| ext.to_str()) == Some("md") { + p.file_stem().map(|s| s.to_string_lossy().to_string()) + } else { + None + } + }) + .collect(); + + for node in &root_nodes { + if !existing_files.contains(node) { + results.push(LintResult { + path: rp.clone(), + diagnostics: vec![Diagnostic::new( + rp, "V2E051", Severity::Error, + format!("Node `{node}` listed in program but no `{node}.md` file found"), + 1, 1, + )], + }); + } + } + } + + Ok(results) +} + +// ── Spec Discovery ────────────────────────────────────────────────────────── + +/// Observation from a corpus of .md program files — patterns for the spec author to consider. +#[derive(Clone, Debug, Default)] +pub struct SpecDiscovery { + /// Frontmatter keys not in SPEC_FRONTMATTER_KEYS, with file count + pub undocumented_keys: BTreeMap>, + /// kind: values not in SPEC_KINDS + pub undocumented_kinds: BTreeMap>, + /// role: values not in SPEC_ROLES + pub undocumented_roles: BTreeMap>, + /// ### heading patterns classified as Documentation (potential spec gap) + pub doc_heading_patterns: BTreeMap>, + /// Contract section names found that aren't in the known set + pub undocumented_sections: BTreeMap>, + /// role + delegates patterns observed (e.g., "leaf with empty delegates", "orchestrator with delegates") + pub role_delegation_patterns: BTreeMap, + /// api/prohibited overlap within the same component + pub api_prohibited_overlaps: Vec<(String, Vec)>, + /// State variables that are read but never written (within a program dir) + pub orphan_state_reads: Vec<(String, String)>, // (program, state_var) + /// State variables that are written but never read + pub orphan_state_writes: Vec<(String, String)>, + /// Total files analyzed + pub file_count: usize, +} + +impl std::fmt::Display for SpecDiscovery { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + writeln!(f, "=== Spec Discovery Report ({} files) ===\n", self.file_count)?; + + if !self.undocumented_kinds.is_empty() { + writeln!(f, "## Undocumented `kind:` values\n")?; + writeln!(f, "The spec defines: {}", SPEC_KINDS.join(", "))?; + writeln!(f, "The corpus also uses:\n")?; + for (kind, files) in &self.undocumented_kinds { + writeln!(f, " `{kind}` ({} files): {}", files.len(), + files.iter().take(3).cloned().collect::>().join(", "))?; + } + writeln!(f)?; + } + + if !self.undocumented_keys.is_empty() { + writeln!(f, "## Undocumented frontmatter keys\n")?; + writeln!(f, "The spec defines: {}\n", SPEC_FRONTMATTER_KEYS.join(", "))?; + for (key, files) in &self.undocumented_keys { + writeln!(f, " `{key}` ({} files)", files.len())?; + } + writeln!(f)?; + } + + if !self.undocumented_roles.is_empty() { + writeln!(f, "## Undocumented `role:` values\n")?; + writeln!(f, "The spec defines: {}\n", SPEC_ROLES.join(", "))?; + for (role, files) in &self.undocumented_roles { + writeln!(f, " `{role}` ({} files)", files.len())?; + } + writeln!(f)?; + } + + if !self.role_delegation_patterns.is_empty() { + writeln!(f, "## Role ↔ delegation patterns\n")?; + writeln!(f, "How `role:` correlates with `delegates:` in the corpus:\n")?; + for (pattern, count) in &self.role_delegation_patterns { + writeln!(f, " {pattern}: {count} files")?; + } + writeln!(f)?; + } + + if !self.api_prohibited_overlaps.is_empty() { + writeln!(f, "## API / prohibited overlaps\n")?; + writeln!(f, "Components where the same API appears in both `api:` and `prohibited:`:\n")?; + for (file, apis) in &self.api_prohibited_overlaps { + writeln!(f, " {file}: {}", apis.join(", "))?; + } + writeln!(f)?; + } + + if !self.orphan_state_reads.is_empty() || !self.orphan_state_writes.is_empty() { + writeln!(f, "## State coherence observations\n")?; + if !self.orphan_state_reads.is_empty() { + writeln!(f, "State variables read but never written in the same program:\n")?; + for (prog, var) in &self.orphan_state_reads { + writeln!(f, " {prog}: reads `{var}` — no node writes it")?; + } + writeln!(f)?; + } + if !self.orphan_state_writes.is_empty() { + writeln!(f, "State variables written but never read in the same program:\n")?; + for (prog, var) in &self.orphan_state_writes { + writeln!(f, " {prog}: writes `{var}` — no other node reads it")?; + } + writeln!(f)?; + } + } + + Ok(()) + } +} + +/// Analyze a set of .md program files and report vocabulary not in the spec. +#[cfg(not(target_arch = "wasm32"))] +pub fn discover_spec_gaps(targets: &[PathBuf]) -> Result { + let files = collect_files(targets)?; + let mut discovery = SpecDiscovery { file_count: files.len(), ..Default::default() }; + + for file in &files { + let content = std::fs::read_to_string(file) + .with_context(|| format!("read {}", file.display()))?; + let filename = file.file_name() + .map(|f| f.to_string_lossy().to_string()) + .unwrap_or_default(); + + let (fm, body_start) = parse_frontmatter(file, &content, &mut Vec::new()); + + // Undocumented frontmatter keys + for key in fm.all_keys.keys() { + if !SPEC_FRONTMATTER_KEYS.contains(&key.as_str()) { + discovery.undocumented_keys + .entry(key.clone()) + .or_default() + .insert(filename.clone()); + } + } + + // Undocumented kinds + if let Some(ref kind) = fm.kind + && !SPEC_KINDS.contains(&kind.as_str()) { + discovery.undocumented_kinds + .entry(kind.clone()) + .or_default() + .insert(filename.clone()); + } + + // Undocumented roles + if let Some(ref role) = fm.role + && !SPEC_ROLES.contains(&role.as_str()) { + discovery.undocumented_roles + .entry(role.clone()) + .or_default() + .insert(filename.clone()); + } + + // Heading patterns + let body = content.lines().skip(body_start).collect::>().join("\n"); + let fm_nodes: HashSet = fm.nodes.iter().map(|n| n.to_lowercase()).collect(); + for line in body.lines() { + let trimmed = line.trim(); + if let Some(heading) = trimmed.strip_prefix("### ") { + let kind = classify_heading(heading.trim(), &fm_nodes); + if kind == HeadingKind::Documentation { + // Categorize the pattern + let pattern = if heading.trim().starts_with(|c: char| c.is_ascii_digit()) { + "numbered step".to_string() + } else if heading.trim().starts_with('&') { + "state schema".to_string() + } else { + heading.trim().to_string() + }; + discovery.doc_heading_patterns + .entry(pattern) + .or_default() + .insert(filename.clone()); + } + } + } + + // Role ↔ delegation pattern + if let Some(ref role) = fm.role { + let has_delegates = !fm.delegates.is_empty() + && fm.delegates.iter().any(|d| d != "[]" && !d.is_empty()); + let pattern = format!("{role} + {}", if has_delegates { "delegates" } else { "no delegates" }); + *discovery.role_delegation_patterns.entry(pattern).or_insert(0) += 1; + } + + // API / prohibited overlap + if !fm.api.is_empty() && !fm.prohibited.is_empty() { + let api_set: HashSet<&str> = fm.api.iter().map(|s| s.as_str()).collect(); + let overlap: Vec = fm.prohibited.iter() + .filter(|p| api_set.contains(p.as_str())) + .cloned() + .collect(); + if !overlap.is_empty() { + discovery.api_prohibited_overlaps.push((filename.clone(), overlap)); + } + } + } + + // State coherence: scan program directories for read/write mismatches + // Group files by parent directory to find program boundaries + let mut programs: BTreeMap> = BTreeMap::new(); + for file in &files { + if let Some(parent) = file.parent() + && is_program_dir(parent) { + let prog_name = parent.file_name() + .map(|n| n.to_string_lossy().to_string()) + .unwrap_or_default(); + let content = std::fs::read_to_string(file)?; + let (fm, _) = parse_frontmatter(file, &content, &mut Vec::new()); + let fname = file.file_name() + .map(|n| n.to_string_lossy().to_string()) + .unwrap_or_default(); + programs.entry(prog_name).or_default().push((fname, fm)); + } + } + + for (prog_name, components) in &programs { + let mut all_reads: HashSet = HashSet::new(); + let mut all_writes: HashSet = HashSet::new(); + + for (_fname, fm) in components { + // Parse state reads/writes from the all_keys (they're nested under state:) + // We already capture delegates — for state we need to look at the raw keys + // For now, use the delegates field as a proxy, and check requires/ensures for & refs + for req in &fm.requires { + if req.contains('&') { + // Extract &VarName patterns + for word in req.split_whitespace() { + if word.starts_with('&') { + all_reads.insert(word.trim_matches(|c: char| !c.is_alphanumeric() && c != '&').to_string()); + } + } + } + } + for ens in &fm.ensures { + if ens.contains('&') { + for word in ens.split_whitespace() { + if word.starts_with('&') { + all_writes.insert(word.trim_matches(|c: char| !c.is_alphanumeric() && c != '&').to_string()); + } + } + } + } + } + + for var in &all_reads { + if !all_writes.contains(var) { + discovery.orphan_state_reads.push((prog_name.clone(), var.clone())); + } + } + for var in &all_writes { + if !all_reads.contains(var) { + discovery.orphan_state_writes.push((prog_name.clone(), var.clone())); + } + } + } + + Ok(discovery) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn detects_v2_content() { + let source = "---\nname: test\nkind: program\nnodes: [a, b]\n---\n# Test\n"; + assert!(looks_like_prose_md(source)); + } + + #[test] + fn rejects_non_v2_content() { + assert!(!looks_like_prose_md("agent foo:\n model: sonnet\n")); + assert!(!looks_like_prose_md("---\nname: test\n---\n")); // no kind: + } + + #[test] + fn missing_frontmatter() { + let source = "# Just a heading\nSome text\n"; + let result = lint_source(Path::new("test.md"), source); + assert!(result.diagnostics.iter().any(|d| d.code == "V2E001")); + } + + #[test] + fn unterminated_frontmatter() { + let source = "---\nname: test\nkind: program\n"; + let result = lint_source(Path::new("test.md"), source); + assert!(result.diagnostics.iter().any(|d| d.code == "V2E002")); + } + + #[test] + fn missing_name() { + let source = "---\nkind: program\nnodes: [a]\n---\n# Test\n"; + let result = lint_source(Path::new("test.md"), source); + assert!(result.diagnostics.iter().any(|d| d.code == "V2E010")); + } + + #[test] + fn missing_kind() { + let source = "---\nname: test\n---\n# Test\n"; + let result = lint_source(Path::new("test.md"), source); + assert!(result.diagnostics.iter().any(|d| d.code == "V2E011")); + } + + #[test] + fn unknown_kind_error() { + let source = "---\nname: test\nkind: widget\n---\n# Test\n"; + let result = lint_source(Path::new("test.md"), source); + assert!(result.diagnostics.iter().any(|d| d.code == "V2E012")); + } + + #[test] + fn driver_kind_accepted_in_compat() { + let source = "---\nname: test\nkind: driver\nversion: 0.1.0\n---\n# Test\n"; + let result = lint_source(Path::new("test.md"), source); + // No error in compat mode for corpus kinds + assert!(!result.diagnostics.iter().any(|d| d.code == "V2E012")); + } + + #[test] + fn program_without_nodes() { + let source = "---\nname: test\nkind: program\n---\n# Test\n"; + let result = lint_source(Path::new("test.md"), source); + assert!(result.diagnostics.iter().any(|d| d.code == "V2E013")); + } + + #[test] + fn duplicate_frontmatter_key() { + let source = "---\nname: test\nkind: program\nname: other\nnodes: [a]\n---\n# Test\n"; + let result = lint_source(Path::new("test.md"), source); + assert!(result.diagnostics.iter().any(|d| d.code == "V2E003")); + } + + #[test] + fn nested_yaml_not_flagged_as_unknown() { + let source = "---\nname: test\nkind: program-node\nversion: 0.1.0\nstate:\n reads: [&Foo]\n writes: [&Bar]\n---\n# Test\n"; + let result = lint_source(Path::new("test.md"), source); + // reads/writes should NOT appear as unknown keys (they're nested under state:) + assert!(!result.diagnostics.iter().any(|d| + d.code == "V2W001" && d.message.contains("reads")), + "reads should not be flagged: {:?}", result.diagnostics); + } + + #[test] + fn hedging_in_ensures() { + let source = "---\nname: test\nkind: service\n---\n# Test\n\n## ensures\n\n- result should be correct\n"; + let result = lint_source(Path::new("test.md"), source); + assert!(result.diagnostics.iter().any(|d| d.code == "V2W011")); + } + + #[test] + fn state_schema_heading_not_treated_as_component() { + let source = "---\nname: test\nkind: program\nnodes: [solver]\nversion: 0.1.0\n---\n\n### solver\n\n```\nrole: leaf\n```\n\n### &GameState\n\n```\nlevel: number\n```\n"; + let result = lint_source(Path::new("test.md"), source); + // &GameState should not trigger V2W030 (not in nodes) + assert!(!result.diagnostics.iter().any(|d| + d.code == "V2W030" && d.message.contains("GameState")), + "state schema should not be flagged as unlisted component: {:?}", result.diagnostics); + } + + #[test] + fn doc_heading_not_treated_as_component() { + let source = "---\nname: test\nkind: program\nnodes: [solver]\nversion: 0.1.0\n---\n\n### solver\n\n```\nrole: leaf\n```\n\n### When to use direct delegation\n\nSome docs here.\n"; + let result = lint_source(Path::new("test.md"), source); + // Documentation heading should not trigger V2W030 + assert!(!result.diagnostics.iter().any(|d| + d.code == "V2W030" && d.message.contains("When")), + "doc heading should not be flagged: {:?}", result.diagnostics); + } + + #[test] + fn valid_program_no_errors() { + let source = "\ +--- +name: deep-research +kind: program +version: 0.1.0 +nodes: [researcher, critic] +--- + +# Deep Research + +### researcher + +``` +role: leaf +use: \"researcher\" +requires from caller: + - topic to research +produces for caller: + - findings with sources +``` + +### critic + +``` +role: leaf +use: \"critic\" +requires from caller: + - findings to evaluate +produces for caller: + - evaluation with scores +``` +"; + let result = lint_source(Path::new("test.md"), source); + let errors: Vec<_> = result.diagnostics.iter() + .filter(|d| d.severity == Severity::Error) + .collect(); + assert!(errors.is_empty(), "unexpected errors: {:?}", errors); + } + + #[test] + fn node_not_defined_in_body() { + let source = "---\nname: test\nkind: program\nnodes: [a, b, missing]\nversion: 0.1.0\n---\n\n### a\n\n```\nrole: leaf\n```\n\n### b\n\n```\nrole: leaf\n```\n"; + let result = lint_source(Path::new("test.md"), source); + assert!(result.diagnostics.iter().any(|d| d.code == "V2E040"), + "expected V2E040, got: {:?}", result.diagnostics); + } + + #[test] + fn duplicate_component_name() { + let source = "---\nname: test\nkind: program\nnodes: [a]\nversion: 0.1.0\n---\n\n### a\n\n```\nrole: leaf\n```\n\n### a\n\n```\nrole: leaf\n```\n"; + let result = lint_source(Path::new("test.md"), source); + assert!(result.diagnostics.iter().any(|d| d.code == "V2E030")); + } + + #[test] + fn contracts_inside_code_blocks_are_parsed() { + // The corpus pattern: ## Contract with requires/ensures inside a code block + let source = "\ +--- +name: critic +kind: program-node +version: 0.1.0 +--- + +# Critic + +## Contract + +``` +requires: + - result: the work product to evaluate + - criteria: what constitutes acceptance + +ensures: + - Return a structured verdict + - Issues are specific and actionable +``` +"; + let result = lint_source(Path::new("test.md"), source); + // Should NOT have V2W014 (missing ensures) because ensures are in the code block + assert!(!result.diagnostics.iter().any(|d| d.code == "V2W014"), + "should not flag missing ensures when they're in a code block: {:?}", + result.diagnostics); + } + + #[test] + fn bare_toplevel_contracts_are_parsed() { + // The fixtures pattern: requires/ensures as bare markdown after frontmatter + let source = "\ +--- +name: uppercaser +kind: service +version: 0.1.0 +--- + +requires: +- text: a piece of text + +ensures: +- uppercased: the text converted to all uppercase +"; + let result = lint_source(Path::new("test.md"), source); + assert!(!result.diagnostics.iter().any(|d| d.code == "V2W014"), + "should not flag missing ensures when they're bare top-level: {:?}", + result.diagnostics); + } + + #[test] + fn service_without_any_ensures_warns() { + // No ensures anywhere — not in frontmatter, not in sections, not in code blocks + let source = "\ +--- +name: bare-service +kind: service +version: 0.1.0 +--- + +# Bare Service + +Does stuff but makes no promises. +"; + let result = lint_source(Path::new("test.md"), source); + assert!(result.diagnostics.iter().any(|d| d.code == "V2W014"), + "expected V2W014 for service with no ensures: {:?}", result.diagnostics); + } +} diff --git a/tools/lint/src/lint_legacy.rs b/tools/lint/src/lint_legacy.rs new file mode 100644 index 0000000..cf4013d --- /dev/null +++ b/tools/lint/src/lint_legacy.rs @@ -0,0 +1,2250 @@ +use crate::diag::{Diagnostic, Severity}; +#[cfg(not(target_arch = "wasm32"))] +use crate::fs::collect_prose_files; +use crate::profile::LintProfile; +#[cfg(not(target_arch = "wasm32"))] +use anyhow::{Context, Result}; +use std::collections::{HashMap, HashSet}; +#[cfg(not(target_arch = "wasm32"))] +use std::fs; +use std::path::{Path, PathBuf}; + +// ── Spec-generated vocabulary ─────────────────────────────────────── +// These are extracted from the compiler spec at build time by build.rs. +// Rebuild after spec changes to update. +#[cfg(not(target_arch = "wasm32"))] +mod spec_vocab { + include!(concat!(env!("OUT_DIR"), "/spec_vocab.rs")); +} + +// Merge spec-generated vocabulary with hardcoded compat values. +// Compat values cover fork extensions (gate, exec, web, edit, etc.) +// that may not be in the upstream spec. +const COMPAT_MODELS: &[&str] = &["sonnet", "opus", "haiku"]; +const COMPAT_AGENT_PROPERTIES: &[&str] = &[ + "model", "prompt", "persist", "context", "retry", "backoff", "skills", "permissions", +]; +const SESSION_PROPERTIES: &[&str] = &[ + "model", "prompt", "persist", "context", "retry", "backoff", "skills", "permissions", + "timeout", "cwd", "on-fail", "on_fail", +]; +const EXEC_PROPERTIES: &[&str] = &["timeout", "cwd", "on-fail", "on_fail"]; +const GATE_PROPERTIES: &[&str] = &["prompt", "allow", "timeout", "on_reject"]; +const COMPAT_PERMISSION_TYPES: &[&str] = &["read", "write", "execute", "bash", "network", "web", "edit", "exec"]; +const COMPAT_PERMISSION_VALUES: &[&str] = &["allow", "deny", "ask", "prompt"]; + +/// Returns the effective vocabulary, preferring spec-generated values +/// and falling back to compat defaults when spec values are empty. +#[cfg(not(target_arch = "wasm32"))] +fn known_models() -> &'static [&'static str] { + if spec_vocab::SPEC_MODELS.is_empty() { COMPAT_MODELS } else { spec_vocab::SPEC_MODELS } +} + +#[cfg(target_arch = "wasm32")] +fn known_models() -> &'static [&'static str] { COMPAT_MODELS } + +#[cfg(not(target_arch = "wasm32"))] +fn agent_properties() -> &'static [&'static str] { + if spec_vocab::SPEC_AGENT_PROPERTIES.is_empty() { COMPAT_AGENT_PROPERTIES } else { spec_vocab::SPEC_AGENT_PROPERTIES } +} + +#[cfg(target_arch = "wasm32")] +fn agent_properties() -> &'static [&'static str] { COMPAT_AGENT_PROPERTIES } + +#[cfg(not(target_arch = "wasm32"))] +fn permission_types() -> &'static [&'static str] { + if spec_vocab::SPEC_PERMISSION_TYPES.is_empty() { COMPAT_PERMISSION_TYPES } else { spec_vocab::SPEC_PERMISSION_TYPES } +} + +#[cfg(target_arch = "wasm32")] +fn permission_types() -> &'static [&'static str] { COMPAT_PERMISSION_TYPES } + +#[cfg(not(target_arch = "wasm32"))] +fn permission_values() -> &'static [&'static str] { + if spec_vocab::SPEC_PERMISSION_VALUES.is_empty() { COMPAT_PERMISSION_VALUES } else { spec_vocab::SPEC_PERMISSION_VALUES } +} + +#[cfg(target_arch = "wasm32")] +fn permission_values() -> &'static [&'static str] { COMPAT_PERMISSION_VALUES } + +#[derive(Clone, Debug)] +pub struct LintResult { + pub path: PathBuf, + pub diagnostics: Vec, +} + +#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] +pub struct DiagnosticCounts { + pub errors: usize, + pub warnings: usize, +} + +#[derive(Clone, Debug)] +struct Scope { + variables: HashMap, + outputs: HashMap, +} + +impl Scope { + fn new() -> Self { + Self { + variables: HashMap::new(), + outputs: HashMap::new(), + } + } +} + +#[derive(Clone, Debug)] +struct AgentRecord { + persistent: bool, +} + +#[derive(Clone, Debug)] +struct AgentRef { + name: String, + line: usize, + column: usize, + kind: RefKind, +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +enum RefKind { + Session, + Resume, +} + +#[derive(Clone, Debug)] +struct LogicalLine { + line: usize, + indent: usize, + text: String, +} + +#[derive(Clone, Debug)] +enum PendingLogical { + String { + start_line: usize, + indent: usize, + buffer: String, + state: QuoteState, + }, + Discretion { + start_line: usize, + indent: usize, + buffer: String, + }, + Container { + start_line: usize, + indent: usize, + balance: isize, + buffer: String, + }, +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +enum QuoteState { + Single { escaped: bool }, + Triple, +} + +#[derive(Clone, Debug)] +struct ScanOutcome { + processed: String, + state: Option, +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +enum BlockKind { + Root, + Agent, + Session, + Resume, + Exec, + Gate, + Permissions, + PropertyBag, + Control, + BlockDef, + Object, +} + +#[derive(Clone, Debug)] +struct BlockFrame { + kind: BlockKind, + indent: usize, + line: usize, + column: usize, + name: Option, + creates_scope: bool, + seen_properties: HashSet, + has_prompt: bool, +} + +impl BlockFrame { + fn new(kind: BlockKind, indent: usize, line: usize, column: usize) -> Self { + let creates_scope = matches!(kind, BlockKind::Control | BlockKind::BlockDef); + Self { + kind, + indent, + line, + column, + name: None, + creates_scope, + seen_properties: HashSet::new(), + has_prompt: false, + } + } +} + +#[derive(Clone, Debug)] +struct ParseState { + profile: LintProfile, + diagnostics: Vec, + blocks: Vec, + scopes: Vec, + agents: HashMap, + imports: HashSet, + inputs: HashMap, + pending_refs: Vec, + saw_executable: bool, +} + +impl ParseState { + fn new(profile: LintProfile) -> Self { + Self { + profile, + diagnostics: Vec::new(), + blocks: vec![BlockFrame::new(BlockKind::Root, 0, 1, 1)], + scopes: vec![Scope::new()], + agents: HashMap::new(), + imports: HashSet::new(), + inputs: HashMap::new(), + pending_refs: Vec::new(), + saw_executable: false, + } + } + + fn push_block(&mut self, block: BlockFrame) { + if block.creates_scope { + self.scopes.push(Scope::new()); + } + self.blocks.push(block); + } + + fn pop_block(&mut self) { + if let Some(block) = self.blocks.pop() + && block.creates_scope + { + let _ = self.scopes.pop(); + } + } + + fn current_scope_mut(&mut self) -> &mut Scope { + self.scopes.last_mut().expect("scope stack is never empty") + } +} + +#[cfg(not(target_arch = "wasm32"))] +pub fn lint_paths(targets: &[PathBuf]) -> Result> { + lint_paths_with_profile(targets, LintProfile::Compat) +} + +#[cfg(not(target_arch = "wasm32"))] +pub fn lint_paths_with_profile( + targets: &[PathBuf], + profile: LintProfile, +) -> Result> { + let files = collect_prose_files(targets)?; + let mut results = Vec::with_capacity(files.len()); + + for file in files { + results.push(lint_path_with_profile(&file, profile)?); + } + + Ok(results) +} + +#[cfg(not(target_arch = "wasm32"))] +pub fn lint_path(path: &Path) -> Result { + lint_path_with_profile(path, LintProfile::Compat) +} + +#[cfg(not(target_arch = "wasm32"))] +pub fn lint_path_with_profile(path: &Path, profile: LintProfile) -> Result { + let source = fs::read_to_string(path).with_context(|| format!("read {}", path.display()))?; + Ok(lint_source_with_profile(path, &source, profile)) +} + +pub fn lint_source(path: &Path, source: &str) -> LintResult { + lint_source_with_profile(path, source, LintProfile::Compat) +} + +pub fn lint_source_with_profile(path: &Path, source: &str, profile: LintProfile) -> LintResult { + let (lines, mut diagnostics) = logical_lines(path, source); + let mut state = ParseState::new(profile); + state.diagnostics.append(&mut diagnostics); + + for line in lines { + parse_logical_line(path, &mut state, line); + } + + for block in &state.blocks { + if block.kind == BlockKind::Gate && !block.has_prompt { + push_diag( + &mut state.diagnostics, + path, + "OPE002", + Severity::Error, + "Gate missing prompt", + block.line, + block.column, + ); + } + } + + for pending in &state.pending_refs { + let Some(agent) = state.agents.get(&pending.name) else { + push_diag( + &mut state.diagnostics, + path, + "E007", + Severity::Error, + "Undefined agent reference", + pending.line, + pending.column, + ); + continue; + }; + + if pending.kind == RefKind::Resume && !agent.persistent { + push_diag( + &mut state.diagnostics, + path, + "E017", + Severity::Error, + "`resume:` requires persistent agent", + pending.line, + pending.column, + ); + } + } + + state.diagnostics.sort_by(|left, right| { + ( + left.path.clone(), + left.line, + left.column, + left.severity, + left.code, + left.message.clone(), + ) + .cmp(&( + right.path.clone(), + right.line, + right.column, + right.severity, + right.code, + right.message.clone(), + )) + }); + + LintResult { + path: path.to_path_buf(), + diagnostics: state.diagnostics, + } +} + +pub fn count_diagnostics(results: &[LintResult]) -> DiagnosticCounts { + let mut counts = DiagnosticCounts::default(); + + for result in results { + for diagnostic in &result.diagnostics { + match diagnostic.severity { + Severity::Error => counts.errors += 1, + Severity::Warning => counts.warnings += 1, + } + } + } + + counts +} + +fn logical_lines(path: &Path, source: &str) -> (Vec, Vec) { + let mut lines = Vec::new(); + let mut diagnostics = Vec::new(); + let mut pending: Option = None; + + for (idx, raw_line) in source.lines().enumerate() { + let line_number = idx + 1; + let indent = count_leading_spaces(raw_line); + + if raw_line.starts_with('\t') { + diagnostics.push(Diagnostic::new( + path, + "OPE001", + Severity::Error, + "Tabs used for indentation", + line_number, + 1, + )); + } + + match &mut pending { + Some(PendingLogical::String { + start_line, + indent: start_indent, + buffer, + state, + }) => { + let outcome = scan_line(raw_line, Some(*state)); + buffer.push('\n'); + buffer.push_str(&outcome.processed); + if let Some(next_state) = outcome.state { + *state = next_state; + } else { + let text = trim_first_line_indent(buffer, *start_indent); + lines.push(LogicalLine { + line: *start_line, + indent: *start_indent, + text, + }); + pending = None; + } + continue; + } + Some(PendingLogical::Discretion { + start_line, + indent: start_indent, + buffer, + }) => { + let processed = scan_line(raw_line, None).processed; + let trimmed = processed.trim(); + buffer.push('\n'); + buffer.push_str(trimmed); + if trimmed == "***:" { + lines.push(LogicalLine { + line: *start_line, + indent: *start_indent, + text: buffer.clone(), + }); + pending = None; + } + continue; + } + Some(PendingLogical::Container { + start_line, + indent: start_indent, + balance, + buffer, + }) => { + let processed = scan_line(raw_line, None).processed; + let trimmed = processed.trim(); + buffer.push('\n'); + buffer.push_str(trimmed); + *balance += delimiter_balance(trimmed); + if *balance <= 0 { + lines.push(LogicalLine { + line: *start_line, + indent: *start_indent, + text: buffer.clone(), + }); + pending = None; + } + continue; + } + None => {} + } + + let outcome = scan_line(raw_line, None); + let processed = outcome.processed; + let content = trim_first_line_indent(&processed, indent); + + if content.trim().is_empty() && outcome.state.is_none() { + continue; + } + + if let Some(state) = outcome.state { + pending = Some(PendingLogical::String { + start_line: line_number, + indent, + buffer: processed, + state, + }); + continue; + } + + let trimmed = content.trim(); + if starts_multiline_discretion(trimmed) { + pending = Some(PendingLogical::Discretion { + start_line: line_number, + indent, + buffer: trimmed.to_string(), + }); + continue; + } + + let balance = delimiter_balance(trimmed); + if balance > 0 && !is_object_block_start(trimmed) { + pending = Some(PendingLogical::Container { + start_line: line_number, + indent, + balance, + buffer: trimmed.to_string(), + }); + continue; + } + + if !trimmed.is_empty() { + lines.push(LogicalLine { + line: line_number, + indent, + text: content, + }); + } + } + + match pending { + Some(PendingLogical::String { start_line, .. }) => diagnostics.push(Diagnostic::new( + path, + "E001", + Severity::Error, + "Unterminated string literal", + start_line, + 1, + )), + Some(PendingLogical::Discretion { start_line, .. }) => diagnostics.push(Diagnostic::new( + path, + "E005", + Severity::Error, + "Invalid syntax: unterminated multi-line discretion block", + start_line, + 1, + )), + Some(PendingLogical::Container { start_line, .. }) => diagnostics.push(Diagnostic::new( + path, + "E005", + Severity::Error, + "Invalid syntax: unterminated container expression", + start_line, + 1, + )), + None => {} + } + + (lines, diagnostics) +} + +fn parse_logical_line(path: &Path, state: &mut ParseState, line: LogicalLine) { + let trimmed = line.text.trim(); + if trimmed.is_empty() { + return; + } + + if let Some(top) = state.blocks.last() + && top.kind == BlockKind::Object + && trimmed == "}" + && line.indent <= top.indent + { + state.pop_block(); + return; + } + + while state.blocks.len() > 1 { + let should_pop = { + let top = state.blocks.last().expect("non-empty block stack"); + line.indent <= top.indent + }; + if !should_pop { + break; + } + state.pop_block(); + } + + if let Some(top) = state.blocks.last() + && top.kind == BlockKind::Object + { + return; + } + + let current_kind = state + .blocks + .last() + .map(|block| block.kind) + .unwrap_or(BlockKind::Root); + if line.indent > state.blocks.last().map(|block| block.indent).unwrap_or(0) + && matches!( + current_kind, + BlockKind::Agent + | BlockKind::Session + | BlockKind::Resume + | BlockKind::Exec + | BlockKind::Gate + | BlockKind::Permissions + | BlockKind::PropertyBag + ) + && parse_property_line(path, state, &line, current_kind) + { + return; + } + + if parse_statement_line(path, state, &line) { + return; + } + + push_diag( + &mut state.diagnostics, + path, + "E004", + Severity::Error, + "Unexpected token", + line.line, + line.indent + 1, + ); +} + +fn parse_property_line( + path: &Path, + state: &mut ParseState, + line: &LogicalLine, + current_kind: BlockKind, +) -> bool { + let trimmed = line.text.trim(); + let Some((property, value)) = split_once_colon(trimmed) else { + return false; + }; + let property = property.trim(); + let value = value.trim(); + + if current_kind == BlockKind::Permissions { + validate_permission( + path, + &mut state.diagnostics, + state.profile, + line.line, + property, + value, + ); + return true; + } + + if current_kind == BlockKind::PropertyBag { + return true; + } + + let top = state.blocks.last_mut().expect("block stack is never empty"); + if !top.seen_properties.insert(property.to_string()) { + push_diag( + &mut state.diagnostics, + path, + "E009", + Severity::Error, + "Duplicate property", + line.line, + line.indent + 1, + ); + } + + let allowed = allowed_properties(current_kind); + if !allowed.contains(&property) { + push_diag( + &mut state.diagnostics, + path, + "W005", + Severity::Warning, + "Unknown property name", + line.line, + line.indent + 1, + ); + } + + match property { + "prompt" => { + top.has_prompt = true; + validate_prompt_like( + path, + &mut state.diagnostics, + line.line, + line.indent + 1, + value, + true, + ); + } + "model" => { + if !known_models().contains(&value) { + push_diag( + &mut state.diagnostics, + path, + "E008", + Severity::Error, + "Invalid model value", + line.line, + line.indent + 1, + ); + } + } + "persist" => { + if current_kind == BlockKind::Agent + && let Some(name) = &top.name + && let Some(agent) = state.agents.get_mut(name) + { + agent.persistent = !value.is_empty(); + } + } + "skills" => validate_skills( + path, + &mut state.diagnostics, + line.line, + line.indent + 1, + value, + ), + "context" => { + if value.is_empty() { + let block = BlockFrame::new( + BlockKind::PropertyBag, + line.indent, + line.line, + line.indent + 1, + ); + state.push_block(block); + } + } + "permissions" => { + if !value.is_empty() { + push_diag( + &mut state.diagnostics, + path, + "E015", + Severity::Error, + "Permissions must be a block", + line.line, + line.indent + 1, + ); + } else { + let block = BlockFrame::new( + BlockKind::Permissions, + line.indent, + line.line, + line.indent + 1, + ); + state.push_block(block); + } + } + "allow" => { + if !looks_like_string_array(value) { + push_diag( + &mut state.diagnostics, + path, + "E005", + Severity::Error, + "Invalid syntax", + line.line, + line.indent + 1, + ); + } + } + _ => {} + } + + true +} + +fn parse_statement_line(path: &Path, state: &mut ParseState, line: &LogicalLine) -> bool { + let trimmed = line.text.trim(); + + if let Some(rest) = trimmed.strip_prefix("-> ") { + return parse_arrow_target(path, state, line, rest.trim()); + } + + if let Some(rest) = trimmed.strip_prefix("use ") { + return parse_use(path, state, line, rest.trim()); + } + + if let Some(rest) = trimmed.strip_prefix("import ") { + return parse_import(path, state, line, rest.trim()); + } + + if let Some(rest) = trimmed.strip_prefix("input ") { + return parse_input(path, state, line, rest.trim()); + } + + if let Some(rest) = trimmed.strip_prefix("output ") { + return parse_output(path, state, line, rest.trim()); + } + + if let Some(rest) = trimmed.strip_prefix("agent ") { + return parse_agent(path, state, line, rest.trim()); + } + + if let Some(rest) = trimmed.strip_prefix("block ") { + return parse_block_def(state, line, rest.trim()); + } + + if let Some(rest) = trimmed.strip_prefix("gate ") { + return parse_gate(state, line, rest.trim()); + } + + if trimmed.starts_with("session:") { + state.saw_executable = true; + parse_session_agent( + path, + state, + line, + trimmed.trim_start_matches("session:").trim(), + false, + ); + return true; + } + + if let Some(rest) = trimmed.strip_prefix("session ") { + state.saw_executable = true; + return parse_session_stmt(path, state, line, rest.trim()); + } + + if trimmed.starts_with("resume:") { + state.saw_executable = true; + parse_resume( + path, + state, + line, + trimmed.trim_start_matches("resume:").trim(), + ); + return true; + } + + if let Some(rest) = trimmed.strip_prefix("exec ") { + state.saw_executable = true; + parse_exec(path, state, line, rest.trim(), false); + return true; + } + + if let Some(rest) = trimmed.strip_prefix("let ") { + state.saw_executable = true; + return parse_binding(path, state, line, rest.trim(), BindingKind::Let); + } + + if let Some(rest) = trimmed.strip_prefix("const ") { + state.saw_executable = true; + return parse_binding(path, state, line, rest.trim(), BindingKind::Const); + } + + if trimmed.starts_with("parallel ") + || trimmed == "parallel:" + || trimmed.starts_with("parallel:") + { + state.saw_executable = true; + let block = BlockFrame::new(BlockKind::Control, line.indent, line.line, line.indent + 1); + state.push_block(block); + return true; + } + + if trimmed.starts_with("repeat ") + || trimmed.starts_with("for ") + || trimmed.starts_with("try:") + || trimmed.starts_with("catch") + || trimmed.starts_with("finally:") + || trimmed.starts_with("choice ") + || trimmed.starts_with("if ") + || trimmed.starts_with("elif ") + || trimmed == "else:" + || trimmed.starts_with("option ") + || trimmed == "do:" + || trimmed.starts_with("parallel for ") + { + state.saw_executable = true; + validate_control_line(path, &mut state.diagnostics, line); + let block = BlockFrame::new(BlockKind::Control, line.indent, line.line, line.indent + 1); + state.push_block(block); + return true; + } + + if trimmed.starts_with("loop") { + state.saw_executable = true; + validate_loop_line(path, &mut state.diagnostics, line); + let block = BlockFrame::new(BlockKind::Control, line.indent, line.line, line.indent + 1); + state.push_block(block); + return true; + } + + if trimmed.starts_with("do ") || trimmed.starts_with("throw") { + state.saw_executable = true; + return true; + } + + if is_pipeline_line(trimmed) { + state.saw_executable = true; + if trimmed.ends_with(':') && !has_inline_after_colon(trimmed) { + let block = + BlockFrame::new(BlockKind::Control, line.indent, line.line, line.indent + 1); + state.push_block(block); + } + return true; + } + + if let Some((name, expr)) = split_assignment(trimmed) { + state.saw_executable = true; + let _ = name; + parse_expression(path, state, line, expr.trim()); + return true; + } + + if parse_identifier(trimmed) + .map(|(_, tail)| tail.trim().is_empty() || tail.trim_start().starts_with('(')) + .unwrap_or(false) + { + state.saw_executable = true; + return true; + } + + false +} + +fn parse_arrow_target( + path: &Path, + state: &mut ParseState, + line: &LogicalLine, + target: &str, +) -> bool { + if target.starts_with("session:") { + parse_session_agent( + path, + state, + line, + target.trim_start_matches("session:").trim(), + false, + ); + return true; + } + if let Some(rest) = target.strip_prefix("session ") { + return parse_session_stmt(path, state, line, rest.trim()); + } + if target.starts_with("resume:") { + parse_resume( + path, + state, + line, + target.trim_start_matches("resume:").trim(), + ); + return true; + } + if let Some(rest) = target.strip_prefix("exec ") { + parse_exec(path, state, line, rest.trim(), false); + return true; + } + true +} + +fn parse_use(path: &Path, state: &mut ParseState, line: &LogicalLine, rest: &str) -> bool { + if let Some(parsed) = parse_string_literal(rest) { + let literal = parsed.content; + let tail = parsed.rest; + let import_key = literal.trim().to_string(); + if import_key.is_empty() { + push_diag( + &mut state.diagnostics, + path, + "E011", + Severity::Error, + "Empty use path", + line.line, + line.indent + 1, + ); + } else if !state.imports.insert(import_key) { + push_diag( + &mut state.diagnostics, + path, + "E010", + Severity::Error, + "Duplicate use statement", + line.line, + line.indent + 1, + ); + } + + if let Some(alias_tail) = tail.trim().strip_prefix("as ") + && parse_identifier(alias_tail).is_none() + { + push_diag( + &mut state.diagnostics, + path, + "E012", + Severity::Error, + "Invalid use path format", + line.line, + line.indent + 1, + ); + } + return true; + } + + push_diag( + &mut state.diagnostics, + path, + "E011", + Severity::Error, + "Empty use path", + line.line, + line.indent + 1, + ); + true +} + +fn parse_import(path: &Path, state: &mut ParseState, line: &LogicalLine, rest: &str) -> bool { + push_diag( + &mut state.diagnostics, + path, + "OPW003", + compatibility_severity(state.profile), + "Legacy import syntax accepted; prefer use \"path\" as alias", + line.line, + line.indent + 1, + ); + + let Some(parsed) = parse_string_literal(rest) else { + return true; + }; + let name = parsed.content; + let tail = parsed.rest; + if !tail.trim().starts_with("from ") { + push_diag( + &mut state.diagnostics, + path, + "W006", + Severity::Warning, + "Unknown import source format", + line.line, + line.indent + 1, + ); + return true; + } + + let source = tail.trim().trim_start_matches("from ").trim(); + if let Some(origin) = parse_string_literal(source) { + let key = format!("{}::{}", name.trim(), origin.content.trim()); + state.imports.insert(key); + } + true +} + +fn parse_input(path: &Path, state: &mut ParseState, line: &LogicalLine, rest: &str) -> bool { + let Some((name, tail)) = parse_identifier(rest) else { + push_diag( + &mut state.diagnostics, + path, + "E020", + Severity::Error, + "Empty input name", + line.line, + line.indent + 1, + ); + return true; + }; + + if !tail.trim_start().starts_with(':') { + push_diag( + &mut state.diagnostics, + path, + "E005", + Severity::Error, + "Invalid syntax", + line.line, + line.indent + 1, + ); + return true; + } + + if state.saw_executable { + push_diag( + &mut state.diagnostics, + path, + "OPW007", + compatibility_severity(state.profile), + "Input declaration after executable statement; spec currently treats this as invalid", + line.line, + line.indent + 1, + ); + } + + if state.inputs.insert(name.to_string(), line.line).is_some() { + push_diag( + &mut state.diagnostics, + path, + "E021", + Severity::Error, + "Duplicate input declaration", + line.line, + line.indent + 1, + ); + } + + let value = tail.trim_start().trim_start_matches(':').trim(); + validate_prompt_like( + path, + &mut state.diagnostics, + line.line, + line.indent + 1, + value, + false, + ); + true +} + +fn parse_output(path: &Path, state: &mut ParseState, line: &LogicalLine, rest: &str) -> bool { + if rest.trim().is_empty() { + push_diag( + &mut state.diagnostics, + path, + "E023", + Severity::Error, + "Empty output name", + line.line, + line.indent + 1, + ); + return true; + } + + if let Some((name, tail)) = parse_identifier(rest) + && let Some(expr) = tail.trim_start().strip_prefix('=') + { + let scope = state.current_scope_mut(); + if scope.outputs.insert(name.to_string(), line.line).is_some() { + push_diag( + &mut state.diagnostics, + path, + "E024", + Severity::Error, + "Duplicate output declaration", + line.line, + line.indent + 1, + ); + } + + parse_expression(path, state, line, expr.trim()); + return true; + } + + parse_expression(path, state, line, rest.trim()); + true +} + +fn parse_agent(path: &Path, state: &mut ParseState, line: &LogicalLine, rest: &str) -> bool { + let Some((name, tail)) = parse_identifier(rest) else { + return false; + }; + if tail.trim() != ":" { + return false; + } + + if state.agents.contains_key(name) { + push_diag( + &mut state.diagnostics, + path, + "E006", + Severity::Error, + "Duplicate agent definition", + line.line, + line.indent + 1, + ); + } else { + state + .agents + .insert(name.to_string(), AgentRecord { persistent: false }); + } + + let mut block = BlockFrame::new(BlockKind::Agent, line.indent, line.line, line.indent + 1); + block.name = Some(name.to_string()); + state.push_block(block); + true +} + +fn parse_block_def(state: &mut ParseState, line: &LogicalLine, rest: &str) -> bool { + let Some((name, tail)) = parse_identifier(rest) else { + return false; + }; + let tail = tail.trim(); + if !(tail == ":" || (tail.starts_with('(') && tail.ends_with(':'))) { + return false; + } + let mut block = BlockFrame::new(BlockKind::BlockDef, line.indent, line.line, line.indent + 1); + block.name = Some(name.to_string()); + state.push_block(block); + true +} + +fn parse_gate(state: &mut ParseState, line: &LogicalLine, rest: &str) -> bool { + let Some((name, tail)) = parse_identifier(rest) else { + return false; + }; + if tail.trim() != ":" { + return false; + } + let mut block = BlockFrame::new(BlockKind::Gate, line.indent, line.line, line.indent + 1); + block.name = Some(name.to_string()); + state.push_block(block); + true +} + +fn parse_session_stmt(path: &Path, state: &mut ParseState, line: &LogicalLine, rest: &str) -> bool { + if let Some(prompt) = parse_string_literal(rest) { + validate_prompt_content( + path, + &mut state.diagnostics, + line.line, + line.indent + 1, + &prompt.content, + true, + ); + let block = BlockFrame::new(BlockKind::Session, line.indent, line.line, line.indent + 1); + state.push_block(block); + return true; + } + + if let Some((label, tail)) = parse_identifier(rest) { + let tail = tail.trim_start(); + if tail == ":" { + push_diag( + &mut state.diagnostics, + path, + "OPW005", + compatibility_severity(state.profile), + "Legacy session block syntax accepted", + line.line, + line.indent + 1, + ); + let mut block = + BlockFrame::new(BlockKind::Session, line.indent, line.line, line.indent + 1); + block.name = Some(label.to_string()); + state.push_block(block); + return true; + } + if let Some(agent_name) = tail.strip_prefix(':').map(str::trim) + && let Some((agent, _)) = parse_identifier(agent_name) + { + push_diag( + &mut state.diagnostics, + path, + "OPW004", + compatibility_severity(state.profile), + "Legacy labeled session syntax accepted", + line.line, + line.indent + 1, + ); + state.pending_refs.push(AgentRef { + name: agent.to_string(), + line: line.line, + column: line.indent + 1, + kind: RefKind::Session, + }); + let mut block = + BlockFrame::new(BlockKind::Session, line.indent, line.line, line.indent + 1); + block.name = Some(label.to_string()); + state.push_block(block); + return true; + } + } + + push_diag( + &mut state.diagnostics, + path, + "E003", + Severity::Error, + "Session missing prompt or agent", + line.line, + line.indent + 1, + ); + true +} + +fn parse_session_agent( + path: &Path, + state: &mut ParseState, + line: &LogicalLine, + rest: &str, + output_like: bool, +) { + if let Some((agent, _tail)) = parse_identifier(rest) { + state.pending_refs.push(AgentRef { + name: agent.to_string(), + line: line.line, + column: line.indent + 1, + kind: RefKind::Session, + }); + let block = BlockFrame::new(BlockKind::Session, line.indent, line.line, line.indent + 1); + state.push_block(block); + } else if !output_like { + push_diag( + &mut state.diagnostics, + path, + "E003", + Severity::Error, + "Session missing prompt or agent", + line.line, + line.indent + 1, + ); + } +} + +fn parse_resume(path: &Path, state: &mut ParseState, line: &LogicalLine, rest: &str) { + if let Some((agent, _tail)) = parse_identifier(rest) { + state.pending_refs.push(AgentRef { + name: agent.to_string(), + line: line.line, + column: line.indent + 1, + kind: RefKind::Resume, + }); + let block = BlockFrame::new(BlockKind::Resume, line.indent, line.line, line.indent + 1); + state.push_block(block); + } else { + push_diag( + &mut state.diagnostics, + path, + "E007", + Severity::Error, + "Undefined agent reference", + line.line, + line.indent + 1, + ); + } +} + +fn parse_exec( + path: &Path, + state: &mut ParseState, + line: &LogicalLine, + rest: &str, + _output_like: bool, +) { + validate_prompt_like( + path, + &mut state.diagnostics, + line.line, + line.indent + 1, + rest, + false, + ); + let block = BlockFrame::new(BlockKind::Exec, line.indent, line.line, line.indent + 1); + state.push_block(block); +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +enum BindingKind { + Let, + Const, +} + +fn parse_binding( + path: &Path, + state: &mut ParseState, + line: &LogicalLine, + rest: &str, + _kind: BindingKind, +) -> bool { + if rest.starts_with('{') { + return true; + } + + let Some((name, tail)) = parse_identifier(rest) else { + return false; + }; + let Some(expr) = tail.trim_start().strip_prefix('=') else { + return false; + }; + + register_variable(path, state, line, name); + parse_expression(path, state, line, expr.trim()); + true +} + +fn parse_expression(path: &Path, state: &mut ParseState, line: &LogicalLine, expr: &str) { + if expr == "{" { + let block = BlockFrame::new(BlockKind::Object, line.indent, line.line, line.indent + 1); + state.push_block(block); + return; + } + + if is_pipeline_line(expr) { + if expr.ends_with(':') && !has_inline_after_colon(expr) { + let block = + BlockFrame::new(BlockKind::Control, line.indent, line.line, line.indent + 1); + state.push_block(block); + } + return; + } + + if expr.starts_with("session:") { + parse_session_agent( + path, + state, + line, + expr.trim_start_matches("session:").trim(), + true, + ); + return; + } + + if let Some(rest) = expr.strip_prefix("session ") { + let _ = parse_session_stmt(path, state, line, rest.trim()); + return; + } + + if expr.starts_with("resume:") { + parse_resume(path, state, line, expr.trim_start_matches("resume:").trim()); + return; + } + + if let Some(rest) = expr.strip_prefix("exec ") { + parse_exec(path, state, line, rest.trim(), true); + return; + } + + if expr.starts_with("do ") {} +} + +fn register_variable(path: &Path, state: &mut ParseState, line: &LogicalLine, name: &str) { + let scope = state.current_scope_mut(); + if scope + .variables + .insert(name.to_string(), line.line) + .is_some() + { + push_diag( + &mut state.diagnostics, + path, + "E019", + Severity::Error, + "Duplicate variable name", + line.line, + line.indent + 1, + ); + } +} + +fn validate_permission( + path: &Path, + diagnostics: &mut Vec, + profile: LintProfile, + line: usize, + property: &str, + value: &str, +) { + if !permission_types().contains(&property) { + push_diag( + diagnostics, + path, + "W008", + compatibility_severity(profile), + "Unknown permission type", + line, + 1, + ); + } + + if permission_values().contains(&value) { + return; + } + + if looks_like_value_array(value) { + return; + } + + // Bare identifier that isn't a known permission value + if parse_identifier(value).map(|(_, tail)| tail.trim().is_empty()).unwrap_or(false) { + push_diag( + diagnostics, + path, + "W009", + Severity::Warning, + "Unknown permission value", + line, + 1, + ); + return; + } + + push_diag( + diagnostics, + path, + "E016", + Severity::Error, + "Permission pattern must be a string or identifier", + line, + 1, + ); +} + +fn validate_skills( + path: &Path, + diagnostics: &mut Vec, + line: usize, + column: usize, + value: &str, +) { + if !value.starts_with('[') || !value.ends_with(']') { + push_diag( + diagnostics, + path, + "E013", + Severity::Error, + "Skills must be an array", + line, + column, + ); + return; + } + + let inner = &value[1..value.len() - 1]; + if inner.trim().is_empty() { + push_diag( + diagnostics, + path, + "W010", + Severity::Warning, + "Empty skills array", + line, + column, + ); + return; + } + + for item in split_csv_like(inner) { + let trimmed = item.trim(); + if parse_string_literal(trimmed).is_none() { + push_diag( + diagnostics, + path, + "E014", + Severity::Error, + "Skill name must be a string", + line, + column, + ); + return; + } + } +} + +fn validate_string_escapes( + path: &Path, + diagnostics: &mut Vec, + line: usize, + column: usize, + source: &str, +) { + let mut escaped = false; + for ch in source.chars() { + if escaped { + match ch { + '\\' | '"' | 'n' | 't' | '{' | '}' => {} + _ => { + push_diag( + diagnostics, + path, + "E002", + Severity::Error, + format!("Unknown escape sequence: \\{ch}"), + line, + column, + ); + } + } + escaped = false; + continue; + } + if ch == '\\' { + escaped = true; + } + } +} + +fn validate_prompt_like( + path: &Path, + diagnostics: &mut Vec, + line: usize, + column: usize, + source: &str, + session_prompt: bool, +) { + if let Some(literal) = parse_string_literal(source.trim()) { + validate_string_escapes(path, diagnostics, line, column, &literal.content); + validate_prompt_content( + path, + diagnostics, + line, + column, + &literal.content, + session_prompt, + ); + return; + } + + if source.trim().is_empty() { + let (code, message) = if session_prompt { + ("W001", "Empty session prompt") + } else { + ("W004", "Empty prompt property") + }; + push_diag( + diagnostics, + path, + code, + Severity::Warning, + message, + line, + column, + ); + } +} + +fn validate_prompt_content( + path: &Path, + diagnostics: &mut Vec, + line: usize, + column: usize, + content: &str, + session_prompt: bool, +) { + if content.is_empty() { + let (code, message) = if session_prompt { + ("W001", "Empty session prompt") + } else { + ("W004", "Empty prompt property") + }; + push_diag( + diagnostics, + path, + code, + Severity::Warning, + message, + line, + column, + ); + return; + } + + if content.trim().is_empty() { + let (code, message) = if session_prompt { + ("W002", "Whitespace-only session prompt") + } else { + ("W004", "Empty prompt property") + }; + push_diag( + diagnostics, + path, + code, + Severity::Warning, + message, + line, + column, + ); + } + + if content.len() > 10_000 { + push_diag( + diagnostics, + path, + "W003", + Severity::Warning, + "Prompt exceeds 10,000 characters", + line, + column, + ); + } +} + +fn validate_loop_line(path: &Path, diagnostics: &mut Vec, line: &LogicalLine) { + let trimmed = line.text.trim(); + if trimmed == "loop:" || trimmed.starts_with("loop:") { + push_diag( + diagnostics, + path, + "OPW001", + Severity::Warning, + "Unbounded loop without max iterations", + line.line, + line.indent + 1, + ); + } + + if (trimmed.starts_with("loop until ") || trimmed.starts_with("loop while ")) + && let Some(condition) = extract_discretion_condition(trimmed) + && condition.trim().len() < 10 + { + push_diag( + diagnostics, + path, + "OPW002", + Severity::Warning, + "Discretion condition may be ambiguous", + line.line, + line.indent + 1, + ); + } + + if let Some(max_text) = extract_loop_max(trimmed) + && max_text + .parse::() + .ok() + .filter(|value| *value > 0) + .is_none() + { + push_diag( + diagnostics, + path, + "OPE003", + Severity::Error, + "Invalid loop max value", + line.line, + line.indent + 1, + ); + } +} + +fn validate_control_line(path: &Path, diagnostics: &mut Vec, line: &LogicalLine) { + let trimmed = line.text.trim(); + if (trimmed.starts_with("if ") + || trimmed.starts_with("elif ") + || trimmed.starts_with("choice ")) + && let Some(condition) = extract_discretion_condition(trimmed) + && condition.trim().len() < 10 + { + push_diag( + diagnostics, + path, + "OPW002", + Severity::Warning, + "Discretion condition may be ambiguous", + line.line, + line.indent + 1, + ); + } +} + +fn allowed_properties(kind: BlockKind) -> &'static [&'static str] { + match kind { + BlockKind::Agent => agent_properties(), + BlockKind::Session | BlockKind::Resume => SESSION_PROPERTIES, + BlockKind::Exec => EXEC_PROPERTIES, + BlockKind::Gate => GATE_PROPERTIES, + BlockKind::Permissions + | BlockKind::PropertyBag + | BlockKind::Control + | BlockKind::BlockDef + | BlockKind::Object + | BlockKind::Root => &[], + } +} + +fn push_diag( + diagnostics: &mut Vec, + path: &Path, + code: &'static str, + severity: Severity, + message: impl Into, + line: usize, + column: usize, +) { + diagnostics.push(Diagnostic::new(path, code, severity, message, line, column)); +} + +fn compatibility_severity(profile: LintProfile) -> Severity { + match profile { + LintProfile::Strict => Severity::Error, + LintProfile::Compat => Severity::Warning, + } +} + +fn parse_identifier(input: &str) -> Option<(&str, &str)> { + let mut chars = input.char_indices(); + let (_, first) = chars.next()?; + if !(first.is_ascii_alphabetic() || first == '_') { + return None; + } + + let mut end = first.len_utf8(); + for (idx, ch) in chars { + if ch.is_ascii_alphanumeric() || ch == '_' || ch == '-' { + end = idx + ch.len_utf8(); + } else { + break; + } + } + Some((&input[..end], &input[end..])) +} + +#[derive(Clone, Debug)] +struct ParsedString<'a> { + content: String, + rest: &'a str, +} + +fn parse_string_literal(input: &str) -> Option> { + if let Some(rest) = input.strip_prefix("\"\"\"") { + let end = rest.find("\"\"\"")?; + let content = rest[..end].to_string(); + let tail = &rest[end + 3..]; + return Some(ParsedString { + content, + rest: tail, + }); + } + + let rest = input.strip_prefix('"')?; + let mut escaped = false; + for (idx, ch) in rest.char_indices() { + if escaped { + escaped = false; + continue; + } + match ch { + '\\' => escaped = true, + '"' => { + return Some(ParsedString { + content: rest[..idx].to_string(), + rest: &rest[idx + 1..], + }); + } + _ => {} + } + } + + None +} + +fn split_assignment(input: &str) -> Option<(&str, &str)> { + if input.starts_with("output ") || input.starts_with("let ") || input.starts_with("const ") { + return None; + } + + let (name, tail) = parse_identifier(input)?; + let expr = tail.trim_start().strip_prefix('=')?; + Some((name, expr)) +} + +fn split_once_colon(input: &str) -> Option<(&str, &str)> { + let mut quote: Option = None; + let bytes = input.as_bytes(); + let mut idx = 0; + + while idx < bytes.len() { + if let Some(state) = quote { + match state { + QuoteState::Triple => { + if input[idx..].starts_with("\"\"\"") { + quote = None; + idx += 3; + } else { + idx += 1; + } + } + QuoteState::Single { escaped } => { + let ch = input[idx..].chars().next().expect("valid char"); + if escaped { + quote = Some(QuoteState::Single { escaped: false }); + } else if ch == '\\' { + quote = Some(QuoteState::Single { escaped: true }); + } else if ch == '"' { + quote = None; + } + idx += ch.len_utf8(); + } + } + continue; + } + + if input[idx..].starts_with("\"\"\"") { + quote = Some(QuoteState::Triple); + idx += 3; + continue; + } + + let ch = input[idx..].chars().next().expect("valid char"); + if ch == '"' { + quote = Some(QuoteState::Single { escaped: false }); + idx += ch.len_utf8(); + continue; + } + if ch == ':' { + return Some((&input[..idx], &input[idx + 1..])); + } + idx += ch.len_utf8(); + } + + None +} + +fn split_csv_like(input: &str) -> Vec<&str> { + let mut values = Vec::new(); + let mut start = 0; + let mut depth = 0usize; + let mut idx = 0; + let mut quote: Option = None; + + while idx < input.len() { + if let Some(state) = quote { + match state { + QuoteState::Triple => { + if input[idx..].starts_with("\"\"\"") { + quote = None; + idx += 3; + } else { + idx += 1; + } + } + QuoteState::Single { escaped } => { + let ch = input[idx..].chars().next().expect("valid char"); + if escaped { + quote = Some(QuoteState::Single { escaped: false }); + } else if ch == '\\' { + quote = Some(QuoteState::Single { escaped: true }); + } else if ch == '"' { + quote = None; + } + idx += ch.len_utf8(); + } + } + continue; + } + + if input[idx..].starts_with("\"\"\"") { + quote = Some(QuoteState::Triple); + idx += 3; + continue; + } + let ch = input[idx..].chars().next().expect("valid char"); + match ch { + '"' => quote = Some(QuoteState::Single { escaped: false }), + '[' | '{' | '(' => depth += 1, + ']' | '}' | ')' => depth = depth.saturating_sub(1), + ',' if depth == 0 => { + values.push(&input[start..idx]); + start = idx + 1; + } + _ => {} + } + idx += ch.len_utf8(); + } + values.push(&input[start..]); + values +} + +fn looks_like_string_array(value: &str) -> bool { + if !value.starts_with('[') || !value.ends_with(']') { + return false; + } + let inner = &value[1..value.len() - 1]; + if inner.trim().is_empty() { + return true; + } + split_csv_like(inner) + .into_iter() + .all(|item| parse_string_literal(item.trim()).is_some()) +} + +fn looks_like_value_array(value: &str) -> bool { + if !value.starts_with('[') || !value.ends_with(']') { + return false; + } + let inner = &value[1..value.len() - 1]; + if inner.trim().is_empty() { + return true; + } + split_csv_like(inner).into_iter().all(|item| { + let trimmed = item.trim(); + parse_string_literal(trimmed).is_some() + || parse_identifier(trimmed) + .map(|(_, tail)| tail.trim().is_empty()) + .unwrap_or(false) + }) +} + +fn extract_discretion_condition(input: &str) -> Option { + if let Some(start) = input.find("***") { + let tail = &input[start + 3..]; + if let Some(end) = tail.rfind("***:") { + return Some(tail[..end].replace('\n', " ").trim().to_string()); + } + } + + let start = input.find("**")?; + let tail = &input[start + 2..]; + let end = tail.find("**")?; + Some(tail[..end].trim().to_string()) +} + +fn extract_loop_max(input: &str) -> Option { + let start = input.find("(max:")?; + let tail = &input[start + 5..]; + let end = tail.find(')')?; + Some( + tail[..end] + .trim() + .trim_start_matches(':') + .trim() + .to_string(), + ) +} + +fn is_pipeline_line(input: &str) -> bool { + input.starts_with('|') + || input.contains(" | map:") + || input.contains(" | filter:") + || input.contains(" | pmap:") + || input.contains(" | reduce(") +} + +fn is_object_block_start(input: &str) -> bool { + input.starts_with("output ") && input.trim_end().ends_with('{') +} + +fn delimiter_balance(input: &str) -> isize { + let mut balance = 0isize; + let mut idx = 0; + let mut quote: Option = None; + + while idx < input.len() { + if let Some(state) = quote { + match state { + QuoteState::Triple => { + if input[idx..].starts_with("\"\"\"") { + idx += 3; + quote = None; + } else { + idx += input[idx..].chars().next().expect("valid char").len_utf8(); + } + } + QuoteState::Single { escaped } => { + let ch = input[idx..].chars().next().expect("valid char"); + idx += ch.len_utf8(); + if escaped { + quote = Some(QuoteState::Single { escaped: false }); + } else if ch == '\\' { + quote = Some(QuoteState::Single { escaped: true }); + } else if ch == '"' { + quote = None; + } + } + } + continue; + } + + if input[idx..].starts_with("\"\"\"") { + quote = Some(QuoteState::Triple); + idx += 3; + continue; + } + + let ch = input[idx..].chars().next().expect("valid char"); + match ch { + '"' => quote = Some(QuoteState::Single { escaped: false }), + '[' | '(' => balance += 1, + ']' | ')' => balance -= 1, + _ => {} + } + idx += ch.len_utf8(); + } + + balance +} + +fn has_inline_after_colon(input: &str) -> bool { + let Some((_, tail)) = split_once_colon(input) else { + return false; + }; + !tail.trim().is_empty() +} + +fn starts_multiline_discretion(input: &str) -> bool { + (input.starts_with("if ***") + || input.starts_with("elif ***") + || input.starts_with("choice ***") + || input.starts_with("loop until ***") + || input.starts_with("loop while ***")) + && !input.contains("***:") +} + +fn count_leading_spaces(input: &str) -> usize { + input.chars().take_while(|ch| *ch == ' ').count() +} + +fn trim_first_line_indent(input: &str, indent: usize) -> String { + let mut lines = input.lines(); + let first = lines.next().unwrap_or_default(); + let mut text = first.chars().skip(indent).collect::(); + for line in lines { + text.push('\n'); + text.push_str(line); + } + text.trim_end().to_string() +} + +fn scan_line(input: &str, initial: Option) -> ScanOutcome { + let mut processed = String::new(); + let mut idx = 0; + let mut state = initial; + + while idx < input.len() { + if let Some(current) = state { + match current { + QuoteState::Triple => { + if input[idx..].starts_with("\"\"\"") { + processed.push_str("\"\"\""); + idx += 3; + state = None; + } else { + let ch = input[idx..].chars().next().expect("valid char"); + processed.push(ch); + idx += ch.len_utf8(); + } + } + QuoteState::Single { escaped } => { + let ch = input[idx..].chars().next().expect("valid char"); + processed.push(ch); + idx += ch.len_utf8(); + if escaped { + state = Some(QuoteState::Single { escaped: false }); + } else if ch == '\\' { + state = Some(QuoteState::Single { escaped: true }); + } else if ch == '"' { + state = None; + } + } + } + continue; + } + + if input[idx..].starts_with("\"\"\"") { + processed.push_str("\"\"\""); + idx += 3; + state = Some(QuoteState::Triple); + continue; + } + + let ch = input[idx..].chars().next().expect("valid char"); + if ch == '"' { + processed.push(ch); + idx += ch.len_utf8(); + state = Some(QuoteState::Single { escaped: false }); + continue; + } + + if ch == '#' { + break; + } + + processed.push(ch); + idx += ch.len_utf8(); + } + + ScanOutcome { processed, state } +} + +#[cfg(test)] +mod tests { + use super::{count_diagnostics, lint_paths, lint_source, lint_source_with_profile}; + use crate::profile::LintProfile; + use std::collections::BTreeMap; + use std::path::{Path, PathBuf}; + + /// Resolve the examples directory relative to this crate's position + /// in the prose repo: tools/lint/ → skills/open-prose/examples + fn examples_dir() -> PathBuf { + Path::new("../../skills/open-prose/examples").to_path_buf() + } + + #[test] + fn valid_fixture_has_no_errors() { + let source = std::fs::read_to_string("fixtures/valid/basic.prose").unwrap(); + let result = lint_source(Path::new("fixtures/valid/basic.prose"), &source); + assert!( + result + .diagnostics + .iter() + .all(|diagnostic| diagnostic.severity != crate::diag::Severity::Error) + ); + } + + #[test] + fn invalid_fixture_reports_errors() { + let source = std::fs::read_to_string("fixtures/invalid/mixed.prose").unwrap(); + let result = lint_source(Path::new("fixtures/invalid/mixed.prose"), &source); + let codes = result + .diagnostics + .iter() + .map(|diagnostic| diagnostic.code) + .collect::>(); + assert!(codes.contains(&"E008")); + assert!(codes.contains(&"E009")); + assert!(codes.contains(&"E015")); + } + + #[test] + fn legacy_import_is_error_in_strict_and_warning_in_compat() { + let source = "import \"web-search\" from \"github:anthropic/skills\"\n"; + let strict = lint_source_with_profile( + Path::new("test.prose"), + source, + LintProfile::Strict, + ); + let compat = lint_source_with_profile( + Path::new("test.prose"), + source, + LintProfile::Compat, + ); + assert!(strict.diagnostics.iter().any(|d| d.code == "OPW003" + && d.severity == crate::diag::Severity::Error)); + assert!(compat.diagnostics.iter().any(|d| d.code == "OPW003" + && d.severity == crate::diag::Severity::Warning)); + } + + #[test] + fn runtime_input_is_error_in_strict_and_warning_in_compat() { + let source = "session \"Draft\"\n\ninput approval: \"Approve?\"\n"; + let strict = lint_source_with_profile( + Path::new("test.prose"), + source, + LintProfile::Strict, + ); + let compat = lint_source_with_profile( + Path::new("test.prose"), + source, + LintProfile::Compat, + ); + assert!(strict.diagnostics.iter().any(|d| d.code == "OPW007" + && d.severity == crate::diag::Severity::Error)); + assert!(compat.diagnostics.iter().any(|d| d.code == "OPW007" + && d.severity == crate::diag::Severity::Warning)); + } + + #[test] + fn examples_lint_without_errors() { + let examples = examples_dir(); + if !examples.exists() { + eprintln!("Skipping: examples dir not found at {}", examples.display()); + return; + } + let results = lint_paths(&[examples.clone()]).unwrap(); + if results.is_empty() { + // No .prose files found — examples may have been converted to .md + eprintln!("Skipping: no .prose files found in {}", examples.display()); + return; + } + let counts = count_diagnostics(&results); + assert_eq!( + counts.errors, + 0, + "unexpected errors: {:?}", + summarize_errors(&results) + ); + } + + fn summarize_errors(results: &[crate::lint_legacy::LintResult]) -> BTreeMap { + let mut counts = BTreeMap::new(); + for result in results { + for diagnostic in &result.diagnostics { + if diagnostic.severity == crate::diag::Severity::Error { + *counts.entry(diagnostic.code.to_string()).or_insert(0) += 1; + } + } + } + counts + } +} diff --git a/tools/lint/src/main.rs b/tools/lint/src/main.rs new file mode 100644 index 0000000..6da0a7e --- /dev/null +++ b/tools/lint/src/main.rs @@ -0,0 +1,185 @@ +use anyhow::Result; +use openprose_lint::lint_legacy::{count_diagnostics, lint_paths_with_profile}; +use openprose_lint::lint; +use openprose_lint::profile::LintProfile; +use std::path::PathBuf; + +fn main() -> Result<()> { + let code = run(std::env::args().skip(1))?; + std::process::exit(code); +} + +fn run(args: impl IntoIterator) -> Result { + let args: Vec = args.into_iter().collect(); + + let command = if let Some(first) = args.first() { + match first.as_str() { + "lint" | "lint-md" | "discover" | "help" | "--help" | "-h" => first.clone(), + _ => "lint".to_string(), + } + } else { + print_usage(); + return Ok(0); + }; + + let rest: Vec = if ["lint", "lint-md", "discover", "help", "--help", "-h"] + .contains(&args.first().map(|s| s.as_str()).unwrap_or("")) + { + args[1..].to_vec() + } else { + args + }; + + match command.as_str() { + "lint" => run_lint(rest), + "lint-md" => run_lint_md(rest), + "discover" => run_discover(rest), + _ => { + print_usage(); + Ok(0) + } + } +} + +fn print_usage() { + eprintln!( + "openprose-lint — deterministic linter for OpenProse programs\n\ + \n\ + Usage:\n \ + openprose-lint lint [--profile strict|compat] [...] v1 .prose files\n \ + openprose-lint lint-md [--profile strict|compat] [...] .md programs\n \ + openprose-lint discover [...] spec gap report\n\ + \n\ + The linter auto-detects multi-file program directories.\n\ + \n\ + Profiles:\n \ + compat (default) Warnings for legacy/compatibility constructs\n \ + strict Errors for anything not in the current spec\n\ + \n\ + Exit codes:\n \ + 0 No errors\n \ + 1 One or more errors\n \ + 2 CLI usage error" + ); +} + +fn parse_lint_args(args: Vec) -> Result<(LintProfile, Vec), i32> { + let mut profile = LintProfile::default(); + let mut targets = Vec::new(); + let mut iter = args.into_iter(); + + while let Some(arg) = iter.next() { + match arg.as_str() { + "--profile" => { + let Some(value) = iter.next() else { + eprintln!("openprose-lint: missing value for --profile"); + return Err(2); + }; + profile = value.parse().map_err(|e| { + eprintln!("openprose-lint: {e}"); + 2 + })?; + } + _ => targets.push(PathBuf::from(arg)), + } + } + + if targets.is_empty() { + return Err(2); + } + + Ok((profile, targets)) +} + +fn run_lint(args: Vec) -> Result { + let (profile, targets) = match parse_lint_args(args) { + Ok(v) => v, + Err(code) => { + eprintln!("Usage: openprose-lint lint [--profile strict|compat] [...]"); + return Ok(code); + } + }; + + let results = lint_paths_with_profile(&targets, profile)?; + + if results.is_empty() { + eprintln!("openprose-lint: no .prose files found"); + return Ok(2); + } + + for result in &results { + if result.diagnostics.is_empty() { + continue; + } + for d in &result.diagnostics { + println!( + "{}:{}:{} {} {} {}", + d.path.display(), d.line, d.column, d.severity, d.code, d.message + ); + } + } + + let counts = count_diagnostics(&results); + println!( + "\n{} file(s), {} error(s), {} warning(s) [profile: {}]", + results.len(), counts.errors, counts.warnings, profile + ); + + Ok(if counts.errors > 0 { 1 } else { 0 }) +} + +fn run_lint_md(args: Vec) -> Result { + let (profile, targets) = match parse_lint_args(args) { + Ok(v) => v, + Err(code) => { + eprintln!("Usage: openprose-lint lint-md [--profile strict|compat] [...]"); + return Ok(code); + } + }; + + let results = lint::lint_paths_with_profile(&targets, profile)?; + + if results.is_empty() { + eprintln!("openprose-lint: no .md program files found"); + return Ok(2); + } + + let mut total_errors = 0usize; + let mut total_warnings = 0usize; + + for result in &results { + if result.diagnostics.is_empty() { + continue; + } + for d in &result.diagnostics { + match d.severity { + openprose_lint::Severity::Error => total_errors += 1, + openprose_lint::Severity::Warning => total_warnings += 1, + } + println!( + "{}:{}:{} {} {} {}", + d.path.display(), d.line, d.column, d.severity, d.code, d.message + ); + } + } + + println!( + "\n{} file(s), {} error(s), {} warning(s) [profile: {}]", + results.len(), total_errors, total_warnings, profile + ); + + Ok(if total_errors > 0 { 1 } else { 0 }) +} + +fn run_discover(args: Vec) -> Result { + let targets: Vec = args.into_iter().map(PathBuf::from).collect(); + + if targets.is_empty() { + eprintln!("Usage: openprose-lint discover [...]"); + return Ok(2); + } + + let discovery = lint::discover_spec_gaps(&targets)?; + println!("{discovery}"); + Ok(0) +} diff --git a/tools/lint/src/profile.rs b/tools/lint/src/profile.rs new file mode 100644 index 0000000..86ba895 --- /dev/null +++ b/tools/lint/src/profile.rs @@ -0,0 +1,31 @@ +use anyhow::{Result, bail}; +use std::fmt::{Display, Formatter}; +use std::str::FromStr; + +#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] +pub enum LintProfile { + Strict, + #[default] + Compat, +} + +impl FromStr for LintProfile { + type Err = anyhow::Error; + + fn from_str(input: &str) -> Result { + match input { + "strict" => Ok(Self::Strict), + "compat" => Ok(Self::Compat), + _ => bail!("unknown lint profile: {input}"), + } + } +} + +impl Display for LintProfile { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + Self::Strict => f.write_str("strict"), + Self::Compat => f.write_str("compat"), + } + } +} diff --git a/tools/lint/src/wasm.rs b/tools/lint/src/wasm.rs new file mode 100644 index 0000000..5350a71 --- /dev/null +++ b/tools/lint/src/wasm.rs @@ -0,0 +1,30 @@ +use std::path::Path; +use wasm_bindgen::prelude::*; + +use crate::lint::lint_source; + +#[wasm_bindgen] +pub fn lint(filename: &str, source: &str) -> JsValue { + let result = lint_source(Path::new(filename), source); + let diags: Vec = result + .diagnostics + .iter() + .map(|d| JsDiagnostic { + line: d.line, + column: d.column, + severity: d.severity.to_string(), + code: d.code.to_string(), + message: d.message.clone(), + }) + .collect(); + serde_wasm_bindgen::to_value(&diags).unwrap_or(JsValue::NULL) +} + +#[derive(serde::Serialize)] +struct JsDiagnostic { + line: usize, + column: usize, + severity: String, + code: String, + message: String, +}