diff --git a/.custom_wordlist.txt b/.custom_wordlist.txt index e69de29..e464644 100644 --- a/.custom_wordlist.txt +++ b/.custom_wordlist.txt @@ -0,0 +1,2 @@ +cna +hda diff --git a/.exclude_spelling b/.exclude_spelling new file mode 100644 index 0000000..bd5535a --- /dev/null +++ b/.exclude_spelling @@ -0,0 +1 @@ +pnpm-lock.yaml diff --git a/.github/workflows/linkcheck.yml b/.github/workflows/linkcheck.yml index a577dbf..a44f35d 100644 --- a/.github/workflows/linkcheck.yml +++ b/.github/workflows/linkcheck.yml @@ -30,3 +30,5 @@ jobs: - name: Run link check uses: lycheeverse/lychee-action@8646ba30535128ac92d33dfc9133794bfdd9b411 # v2.8.0 + with: + args: docs/**/*.md --config lychee.toml --root-dir "$(pwd)/docs/" diff --git a/.gitignore b/.gitignore index f399ac6..a8304ed 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,9 @@ shell.nix # added by aggregation docs/projects +docs/**/.media +docs/**/_static +docs/**/assets # Section directories - aggregated content is ignored, index.md is tracked docs/contributing/** @@ -19,5 +22,9 @@ docs/how-to/** docs/reference/** !docs/reference/glossary.md !docs/reference/index.md +!docs/reference/supporting_tools/ +docs/reference/supporting_tools/** +!docs/reference/supporting_tools/index.md +!docs/reference/supporting_tools/docs-ng/ docs/tutorials/** !docs/tutorials/index.md diff --git a/Makefile b/Makefile index b449361..3b6bda3 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: help run dev build preview aggregate aggregate-dry aggregate-repo test-aggregate-local clean clean-projects clean-aggregated-git test test-unit test-integration check spelling linkcheck woke +.PHONY: help run dev build preview aggregate aggregate-repo test-aggregate-local clean clean-projects clean-aggregated-git test test-unit test-integration check spelling linkcheck woke help: @echo "Garden Linux Documentation Hub - Available targets:" @@ -9,8 +9,9 @@ help: @echo " preview - Preview production build locally" @echo "" @echo " Testing:" - @echo " test - Run full test suite" - @echo " test-unit - Run unit tests only" + @echo " test - Run full test suite (38 tests: unit + integration)" + @echo " test-unit - Run unit tests only (35 tests)" + @echo " test-integration - Run integration tests only (3 tests)" @echo "" @echo " Quality Checks:" @echo " check - Run all quality checks (spelling, links, inclusive language)" @@ -19,11 +20,11 @@ help: @echo " woke - Check inclusive language with woke" @echo "" @echo " Documentation Aggregation:" - @echo " aggregate-local - Aggregate from local repos using relative paths (../gardenlinux ../builder ../python-gardenlinux-lib)" - @echo " test-aggregate-local - Test aggregation with local repos (recommended first)" - @echo " aggregate - Fetch and aggregate docs from all source repos" - @echo " aggregate-dry - Test aggregation without modifying docs/" - @echo " aggregate-repo - Aggregate specific repo (usage: make aggregate-repo REPO=gardenlinux)" + @echo " aggregate-local - Aggregate from local repos (file:// URLs in repos-config.local.json)" + @echo " aggregate - Aggregate from locked commits (repos-config.json)" + @echo " aggregate-update - Fetch latest from remotes and update commit locks" + @echo " aggregate-repo - Aggregate single repo (usage: make aggregate-repo REPO=gardenlinux)" + @echo " aggregate-update-repo - Update single repo to latest (usage: make aggregate-update-repo REPO=gardenlinux)" @echo "" @echo " Utilities:" @echo " clean - Clean aggregated docs and build artifacts" @@ -33,7 +34,7 @@ install: @echo "Installing dependencies..." pnpm install -run: install +dev: install pnpm run docs:dev build: install clean aggregate @@ -43,13 +44,16 @@ preview: install pnpm run docs:preview # Testing -test: install - @echo "Running full test suite..." - @cd scripts/tests && ./run_all.sh +test: test-unit test-integration + @echo "All tests passed!" -test-unit: install +test-unit: @echo "Running unit tests..." - @cd scripts/tests && python3 run_tests.py + python3 -m pytest tests/unit/ -v + +test-integration: + @echo "Running integration tests..." + python3 -m pytest tests/integration/ -v # Quality Checks check: spelling linkcheck woke @@ -68,21 +72,17 @@ woke: install @pnpm run docs:woke # Documentation Aggregation -test-aggregate-local: install - @echo "Testing aggregation with local repositories..." - ./scripts/test-local.sh --dry-run - aggregate-local: install @echo "Aggregating from local repositories (relative paths)..." - CONFIG_FILE=scripts/repos-config.local.json ./scripts/aggregate-docs.sh + python3 src/aggregate.py --config repos-config.local.json aggregate: install - @echo "Aggregating documentation from source repositories..." - ./scripts/aggregate-docs.sh + @echo "Aggregating documentation from locked source repositories..." + python3 src/aggregate.py -aggregate-dry: install - @echo "Dry run: Testing aggregation without modifying docs directory..." - ./scripts/aggregate-docs.sh --dry-run +aggregate-update: install + @echo "Aggregating documentation from latest source repositories..." + python3 src/aggregate.py --update-locks aggregate-repo: install @if [ -z "$(REPO)" ]; then \ @@ -90,8 +90,17 @@ aggregate-repo: install echo "Usage: make aggregate-repo REPO=gardenlinux"; \ exit 1; \ fi - @echo "Aggregating documentation for repository: $(REPO)" - ./scripts/aggregate-docs.sh --repo $(REPO) + @echo "Aggregating documentation for locked repository: $(REPO)" + python3 src/aggregate.py --repo $(REPO) + +aggregate-update-repo: install + @if [ -z "$(REPO)" ]; then \ + echo "Error: REPO variable not set"; \ + echo "Usage: make aggregate-update-repo REPO=gardenlinux"; \ + exit 1; \ + fi + @echo "Aggregating documentation for locked repository: $(REPO)" + python3 src/aggregate.py --update-locks --repo $(REPO) # Utilities clean: @@ -101,7 +110,7 @@ clean: rm -rf docs/projects @# Clean aggregated (untracked) content from section directories, preserving git-tracked files @if [ -d .git ]; then \ - git clean -fd docs/contributing/ docs/explanation/ docs/how-to/ docs/reference/ docs/tutorials/ 2>/dev/null || true; \ + git clean -fdX docs/contributing/ docs/explanation/ docs/how-to/ docs/reference/ docs/tutorials/ 2>/dev/null || true; \ else \ rm -rf docs/contributing docs/explanation docs/how-to docs/reference docs/tutorials; \ fi diff --git a/README.md b/README.md deleted file mode 100644 index 6c41264..0000000 --- a/README.md +++ /dev/null @@ -1,355 +0,0 @@ -# docs-ng - -Build the Garden Linux documentation with aggregated content from multiple repositories. - -## Overview - -This project provides a unified documentation hub for Garden Linux that aggregates content from multiple source repositories (gardenlinux, builder, python-gardenlinux-lib) and presents it in a cohesive VitePress site. - -### Documentation Structure - -The system uses a **dual-path approach** for documentation: - -1. **Targeted Documentation** — Files with `github_target_path` frontmatter are copied to specific locations in the main docs tree (e.g., `docs/tutorials/`, `docs/how-to/`) -2. **Project Mirror** — All repository documentation is also mirrored under `docs/projects//` for legacy access and comprehensive coverage - -## Quick Start - -```bash -# Run development server -make run - -# Aggregate documentation from repos -make aggregate - -# Run tests -make test -``` - -## Architecture Overview - -``` -┌─────────────────────────────────────────────────────────────┐ -│ Source Repositories │ -│ (gardenlinux, builder, python-gardenlinux-lib) │ -└─────────────────────┬───────────────────────────────────────┘ - │ - │ 1. Fetch (sparse checkout) - │ scripts/fetch-repo-docs.sh - ▼ - ┌───────────────┐ - │ Temp Storage │ - └───────┬───────┘ - │ - │ 2. Transform - │ scripts/transform_content.py - │ • Targeted doc placement (github_target_path) - │ • Directory restructuring - │ • Link rewriting - │ • Frontmatter YAML fixing - ▼ - ┌───────────────────────────────────┐ - │ docs/ directory │ - │ ├── tutorials/ │ - │ ├── how-to/ │ - │ ├── explanation/ │ - │ ├── reference/ │ - │ ├── contributing/ │ - │ └── projects/ │ - │ ├── gardenlinux/ │ - │ ├── builder/ │ - │ └── python-gardenlinux-lib/ │ - └───────────────────────────────────┘ - │ - │ VitePress builds the site - │ using vitepress-sidebar - ▼ - ┌───────────────────────────┐ - │ VitePress Site │ - │ (Development/Production)│ - └───────────────────────────┘ -``` - -## Documentation Aggregation - -The documentation aggregation system pulls content from multiple Git repositories and transforms it for VitePress. The process consists of two main steps orchestrated by `scripts/aggregate-docs.sh`: - -### Step 1: Fetch Documentation - -**Script:** `scripts/fetch-repo-docs.sh` - -Uses sparse Git checkout to efficiently fetch only the documentation directories from source repositories. This minimizes clone size and speeds up the process. - -**Configuration:** `scripts/repos-config.json` - -Each repository is defined with: - -- `name` — Repository identifier -- `url` — Git repository URL -- `branch` — Branch to fetch from -- `docs_path` — Path to documentation within the repo (e.g., `docs`) -- `target_path` — Where to place docs in the aggregated site (e.g., `projects/gardenlinux`) -- `github_org` / `github_repo` — Used for "Edit on GitHub" links -- `structure` — How to transform the directory structure (see below) - -**Structure Types:** - -- `flat` — Copy files as-is without transformation -- `sphinx` — Copy Sphinx documentation structure (RST files) -- `{ "dir1": "newdir1", "dir2": "newdir2" }` — Map source directories to target directories (e.g., `{ "tutorials": "tutorials", "how-to": "how-to" }`) - -### Step 2: Transform Content - -**Script:** `scripts/transform_content.py` - -Performs multiple transformations on the fetched documentation: - -#### 2a. Targeted Documentation Placement - -Files with `github_target_path` frontmatter are copied to their specified locations in the main docs tree. This allows documentation from source repos to be integrated directly into the primary documentation structure. - -Example frontmatter: - -```yaml ---- -title: "Tutorials" -github_target_path: "docs/tutorials/index.md" ---- -``` - -This file would be copied to `docs/tutorials/index.md` in addition to being mirrored in `docs/projects/`. - -#### 2b. Directory Structure Transformation - -Transforms the fetched documentation according to the `structure` configuration: - -- Renames numbered directories (e.g., `00_introduction` → `introduction`) -- Applies custom directory mappings -- Handles special files and media directories - -#### 2c. Link Rewriting - -Converts repository-relative links to work in the aggregated site: - -- `[link](../01_developers/build.md)` → `[link](/projects/gardenlinux/developers/build)` -- `[link](./intro.md)` → `[link](/projects/gardenlinux/introduction/intro)` -- Links to files outside `docs/` → GitHub URLs -- Handles `.media/` directories correctly - -#### 2d. Frontmatter YAML Fixing - -- Fixes YAML formatting in existing frontmatter -- Quotes YAML values with special characters (e.g., titles containing `:` or `#`) -- Ensures proper frontmatter structure - -#### 2e. Content Sanitization - -- Escapes angle brackets that aren't HTML tags (e.g., ``) -- Preserves code blocks and inline code -- Handles README.md → index.md conversion - -## Sidebar Menu Construction - -**File:** `docs/.vitepress/sidebar.ts` - -The documentation sidebar uses the `vitepress-sidebar` library with automatic generation: - -```typescript -generateSidebar({ - documentRootPath: "docs", - scanStartPath: "", - resolvePath: "/", - collapsed: true, - useTitleFromFileHeading: true, - useTitleFromFrontmatter: true, - useFolderLinkFromIndexFile: true, - useFolderTitleFromIndexFile: true, - excludePattern: ["projects"], - sortMenusByFrontmatterOrder: true, - frontmatterOrderDefaultValue: 999, -}); -``` - -**Key features:** - -- Automatically scans the `docs/` directory -- Excludes `docs/projects/` (legacy content, will be removed) -- Uses frontmatter `order` field for sorting (lower numbers appear first) -- Falls back to file/folder names for titles if not in frontmatter -- Respects `index.md` files for folder titles and links - -## Frontmatter Fields - -Frontmatter fields control how pages are displayed, organized, and linked. Here's a comprehensive reference: - -### Core Fields - -| Field | Purpose | Used By | Example | -| ------------- | --------------------- | ------------------------------- | ---------------------------------- | -| `title` | Page title | VitePress, sidebar, browser tab | `title: "Getting Started"` | -| `description` | Page meta description | SEO, social sharing | `description: "Quick start guide"` | -| `order` | Sidebar sort order | `vitepress-sidebar` | `order: 10` | - -### GitHub Integration - -| Field | Purpose | Used By | Example | -| -------------------- | -------------------------- | -------------------- | --------------------------------------------- | -| `github_org` | GitHub organization | Edit link generation | `github_org: gardenlinux` | -| `github_repo` | Repository name | Edit link generation | `github_repo: gardenlinux` | -| `github_branch` | Branch name | Edit link generation | `github_branch: main` | -| `github_source_path` | Original file path in repo | Edit link generation | `github_source_path: docs/tutorials/index.md` | - -### Aggregation & Targeting - -| Field | Purpose | Used By | Example | -| -------------------- | --------------------------------- | ------------------ | ----------------------------------------------- | -| `github_target_path` | Target location in main docs tree | Aggregation system | `github_target_path: "docs/tutorials/index.md"` | - -**Note:** Files with `github_target_path` or `target` are copied to the specified path in addition to being mirrored under `docs/projects/`. This enables documentation from source repos to appear in the primary documentation structure. - -### Migration Tracking - -| Field | Purpose | Used By | Example | -| ----------------------- | ------------------------ | ------------------ | --------------------------------------- | -| `migration_status` | Migration state | Documentation team | `migration_status: "new"` | -| `migration_source` | Original source location | Documentation team | `migration_source: "old-docs/guide.md"` | -| `migration_issue` | Related GitHub issue | Documentation team | `migration_issue: "#123"` | -| `migration_stakeholder` | Responsible person/team | Documentation team | `migration_stakeholder: "@username"` | -| `migration_approved` | Approval status | Documentation team | `migration_approved: false` | - -These fields help track the documentation reorganization effort and are not used by VitePress itself. - -### How Frontmatter Fields Are Used - -#### Sidebar Ordering - -The `order` field controls the position of pages in the sidebar: - -- Lower numbers appear first (e.g., `order: 10` before `order: 20`) -- Default order is `999` (via `frontmatterOrderDefaultValue`) -- Works with `vitepress-sidebar`'s `sortMenusByFrontmatterOrder: true` - -#### "Edit on GitHub" Links - -The VitePress config uses GitHub metadata to generate edit links: - -```typescript -editLink: { - pattern: ({ filePath, frontmatter }) => { - // If page has GitHub metadata from aggregated content - if ( - frontmatter.github_org && - frontmatter.github_repo && - frontmatter.github_source_path - ) { - const branch = frontmatter.github_branch || "main"; - return `https://github.com/${frontmatter.github_org}/${frontmatter.github_repo}/edit/${branch}/${frontmatter.github_source_path}`; - } - // Fallback for native docs-ng pages - return `https://github.com/gardenlinux/docs-ng/edit/main/docs/${filePath}`; - }; -} -``` - -This ensures that users editing aggregated documentation are directed to the correct source repository. - -#### Targeted Documentation Placement - -When a file includes `github_target_path` or `target`, the aggregation system copies it to that specific location: - -```yaml ---- -title: "Tutorials" -github_target_path: "docs/tutorials/index.md" ---- -``` - -This file will be placed at `docs/tutorials/index.md` (in addition to `docs/projects//tutorials/index.md`). - -## Testing - -Run the test suite to verify scripts work correctly: - -```bash -make test # Run all tests -make test-unit # Run unit tests only -make test-integration # Run integration tests only -``` - -See `scripts/tests/README.md` for more details. - -## Available Commands - -Run `make help` for all available commands: - -```bash -# Development -make run # Run docs development server -make build # Build documentation for production -make preview # Preview production build locally - -# Testing -make test # Run full test suite -make test-unit # Run unit tests only - -# Documentation Aggregation -make aggregate # Fetch and aggregate docs from all source repos -make aggregate-dry # Test aggregation without modifying docs/ -make aggregate-repo REPO= # Aggregate specific repo only -make test-aggregate-local # Test with local repos (for development) - -# Utilities -make clean # Clean aggregated docs and build artifacts -make clean-projects # Remove only aggregated project docs -make clean-aggregated-git # Remove uncommitted aggregated docs -``` - -## Repository Configuration - -The `scripts/repos-config.json` file defines which repositories to aggregate and how to transform them: - -```json -{ - "repos": [ - { - "name": "gardenlinux", - "url": "https://github.com/gardenlinux/gardenlinux", - "github_org": "gardenlinux", - "github_repo": "gardenlinux", - "docs_path": "docs", - "target_path": "projects/gardenlinux", - "branch": "docs-ng", - "structure": { - "tutorials": "tutorials", - "how-to": "how-to", - "explanation": "explanation", - "reference": "reference", - "contributing": "contributing" - }, - "special_files": {}, - "media_directories": [".media"] - } - ] -} -``` - -### Configuration Fields - -- `name` — Repository identifier used in paths -- `url` — Git repository URL for cloning -- `github_org` / `github_repo` — Used for edit links -- `docs_path` — Path to documentation within the repo -- `target_path` — Destination in aggregated site (e.g., `projects/gardenlinux`) -- `branch` — Git branch to fetch -- `structure` — How to transform directory structure: - - Object: Directory mapping (e.g., `{"old-name": "new-name"}`) - - `"flat"`: Copy as-is - - `"sphinx"`: Sphinx documentation structure -- `special_files` — Files to place in specific locations -- `media_directories` — Directories to copy (e.g., `.media`) - -## Additional Resources - -- **VitePress Documentation**: https://vitepress.dev/ -- **vitepress-sidebar Documentation**: https://vitepress-sidebar.cdget.com/ -- **Garden Linux Main Repository**: https://github.com/gardenlinux/gardenlinux diff --git a/README.md b/README.md new file mode 120000 index 0000000..d0c1cc7 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +docs/reference/supporting_tools/docs-ng/overview/index.md \ No newline at end of file diff --git a/docs/reference/glossary.md b/docs/reference/glossary.md index e540e29..a1a48c3 100644 --- a/docs/reference/glossary.md +++ b/docs/reference/glossary.md @@ -23,11 +23,11 @@ The processor architecture for which a Garden Linux image is built. Supported ar ### AWS -Amazon Web Services. One of the major cloud platforms supported by Garden Linux. Garden Linux provides AWS-specific images through the [`aws`](https://github.com/gardenlinux/gardenlinux/blob/main/features/aws/README.md) platform feature with cloud-init integration and AWS-specific kernel modules. See [AWS platform guide](../how-to/platform-specific/aws.md) and [AWS first boot tutorial](../tutorials/first-boot-aws.md) for usage details. +Amazon Web Services. One of the major cloud platforms supported by Garden Linux. Garden Linux provides AWS-specific images through the [`aws`](https://github.com/gardenlinux/gardenlinux/blob/main/features/aws/README.md) platform feature with cloud-init integration and AWS-specific kernel modules. See [AWS platform guide](../how-to/platform-specific/aws.md) and [AWS first boot tutorial](../tutorials/cloud/first-boot-aws.md) for usage details. ### Azure -Microsoft Azure. A major cloud platform supported by Garden Linux through the [`azure`](https://github.com/gardenlinux/gardenlinux/blob/main/features/azure/README.md) platform feature with platform-specific image configurations and optimizations. See [Azure platform guide](../how-to/platform-specific/azure.md) and [Azure first boot tutorial](../tutorials/first-boot-azure.md) for usage details. +Microsoft Azure. A major cloud platform supported by Garden Linux through the [`azure`](https://github.com/gardenlinux/gardenlinux/blob/main/features/azure/README.md) platform feature with platform-specific image configurations and optimizations. See [Azure platform guide](../how-to/platform-specific/azure.md) and [Azure first boot tutorial](../tutorials/cloud/first-boot-azure.md) for usage details. --- @@ -35,7 +35,7 @@ Microsoft Azure. A major cloud platform supported by Garden Linux through the [` ### Bare Metal -A platform target for Garden Linux images designed to run directly on physical hardware without a hypervisor through the [`baremetal`](https://github.com/gardenlinux/gardenlinux/blob/main/features/baremetal/README.md) platform feature. Also referred to as [`metal`](https://github.com/gardenlinux/gardenlinux/blob/main/features/metal/README.md) in build configurations. See [Bare Metal platform guide](../how-to/platform-specific/bare-metal.md) and [Bare Metal first boot tutorial](../tutorials/first-boot-bare-metal.md) for usage details. +A platform target for Garden Linux images designed to run directly on physical hardware without a hypervisor through the [`baremetal`](https://github.com/gardenlinux/gardenlinux/blob/main/features/baremetal/README.md) platform feature. Also referred to as [`metal`](https://github.com/gardenlinux/gardenlinux/blob/main/features/metal/README.md) in build configurations. See [Bare Metal platform guide](../how-to/platform-specific/bare-metal.md) and [Bare Metal first boot tutorial](../tutorials/on-premises/first-boot-bare-metal.md) for usage details. ### Builder @@ -127,7 +127,7 @@ A Debian GNU/Linux derivative designed to provide small, auditable Linux images ### GCP -Google Cloud Platform. A major cloud platform supported by Garden Linux through the [`gcp`](https://github.com/gardenlinux/gardenlinux/blob/main/features/gcp/README.md) platform feature with platform-specific configurations. See [GCP platform guide](../how-to/platform-specific/gcp.md) and [GCP first boot tutorial](../tutorials/first-boot-gcp.md) for usage details. +Google Cloud Platform. A major cloud platform supported by Garden Linux through the [`gcp`](https://github.com/gardenlinux/gardenlinux/blob/main/features/gcp/README.md) platform feature with platform-specific configurations. See [GCP platform guide](../how-to/platform-specific/gcp.md) and [GCP first boot tutorial](../tutorials/cloud/first-boot-gcp.md) for usage details. ### GitHub Actions diff --git a/docs/reference/supporting_tools/docs-ng/explanation/architecture.md b/docs/reference/supporting_tools/docs-ng/explanation/architecture.md new file mode 100644 index 0000000..5a8617b --- /dev/null +++ b/docs/reference/supporting_tools/docs-ng/explanation/architecture.md @@ -0,0 +1,288 @@ +--- +title: "docs-ng Architecture" +description: "Deep dive into how the docs-ng documentation aggregation system works" +github_org: gardenlinux +github_repo: docs-ng +github_source_path: docs/reference/supporting_tools/docs-ng/explanation/architecture.md +--- + +# docs-ng Architecture + +Deep dive into the design and implementation of the docs-ng documentation aggregation system. + +> **Source Repository:** [gardenlinux/docs-ng](https://github.com/gardenlinux/docs-ng) + +## System Overview + +docs-ng is a documentation aggregation pipeline that combines content from multiple source repositories into a unified VitePress documentation site. + +``` +┌─────────────────┐ +│ Source Repos │ +│ - gardenlinux │ +│ - builder │ +│ - python-gl-lib │ +└────────┬────────┘ + │ + ▼ +┌─────────────────┐ +│ Fetch Stage │ +│ Git sparse │ +│ checkout or │ +│ local copy │ +└────────┬────────┘ + │ + ▼ +┌─────────────────┐ +│ Transform Stage │ +│ Rewrite links │ +│ Fix frontmatter │ +└────────┬────────┘ + │ + ▼ +┌─────────────────┐ +│ Structure Stage │ +│ Reorganize dirs │ +│ Copy media │ +└────────┬────────┘ + │ + ▼ +┌─────────────────┐ +│ docs/ output │ +│ VitePress build │ +└─────────────────┘ +``` + +## Core Components + +### 1. Fetch Stage (`fetcher.py`) + +**Purpose:** Retrieve documentation from source repositories + +**Mechanisms:** + +- **Git Sparse Checkout:** For remote repositories, uses sparse checkout to fetch only the `docs/` directory, minimizing clone size +- **Local Copy:** For `file://` URLs, performs direct filesystem copy without git operations +- **Commit Resolution:** Records the resolved commit hash for locking + +**Key Features:** + +- Supports both remote (git) and local (file) sources +- Handles root files separately from docs directory +- Provides commit hash for reproducible builds + +### 2. Transform Stage (`transformer.py`) + +**Purpose:** Modify content to work in the aggregated site + +**Transformations:** + +1. **Link Rewriting:** Transform relative links to work across repository boundaries + + - Intra-repo links: Maintained relative to project mirror + - Cross-repo links: Rewritten to absolute paths + - External links: Preserved as-is + +2. **Frontmatter Handling:** Ensure all documents have proper frontmatter + + - Add missing frontmatter blocks + - Quote YAML values safely + - Preserve existing metadata + +3. **Project Link Validation:** Fix broken links to project mirrors + +### 3. Structure Stage (`structure.py`) + +**Purpose:** Organize documentation into the final directory structure + +**Operations:** + +1. **Targeted Documentation:** Copy files with `github_target_path` to specified locations +2. **Directory Mapping:** Transform source directories according to `structure` config +3. **Media Copying:** Discover and copy media directories +4. **Markdown Processing:** Apply transformations to all markdown files + +**Structure Types:** + +- **Flat:** Copy all files as-is +- **Sphinx:** Handle Sphinx documentation structure +- **Custom Mapping:** Map source directories to Diataxis categories + +## Key Mechanisms + +### Targeted Documentation + +Files with `github_target_path` frontmatter are copied directly to their specified location: + +```yaml +--- +github_target_path: "docs/how-to/example.md" +--- +``` + +**Flow:** + +1. Scan all markdown files for `github_target_path` +2. Create target directory structure +3. Copy file to exact specified location +4. Apply markdown transformations + +This allows fine-grained control over where content appears in the final site. + +### Project Mirrors + +In addition to targeted docs, the entire `docs/` directory from each repo is mirrored under `docs/projects//`: + +**Purpose:** + +- Preserve complete repository documentation +- Provide fallback for untargeted content +- Enable browsing of raw source structure + +### Media Directory Handling + +Media directories are automatically discovered and copied: + +**Nested Media:** + +- Location: `tutorials/assets/` +- Copied to: `docs/tutorials/assets/` +- Rationale: Preserve relative paths for tutorial-specific media + +**Root-Level Media:** + +- Location: `_static/`, `.media/` +- Copied to: Common ancestor of all targeted files +- Rationale: Shared media available to all documents + +### Commit Locking + +For reproducible builds, commits can be locked: + +```json +{ + "name": "repo", + "ref": "main", + "commit": "abc123..." +} +``` + +**Benefits:** + +- Reproducible documentation builds +- Stable CI/CD pipelines +- Version control for aggregated docs + +**Update Process:** + +```bash +make aggregate-update +``` + +This fetches the latest from `ref` and updates commit locks. + +## Design Decisions + +### Why Git Sparse Checkout? + +- **Efficiency:** Only fetches docs directory, not entire repository +- **Speed:** Faster than full clone, especially for large repos +- **Minimal Disk Usage:** Reduces storage requirements + +### Why Frontmatter-Based Targeting? + +- **Flexibility:** Authors control where their docs appear +- **Decentralization:** No central mapping file to maintain +- **Explicit:** Clear indication in source files of their destination + +### Why Separate Fetch/Transform/Structure? + +- **Modularity:** Each stage has single responsibility +- **Testability:** Easy to test individual stages +- **Extensibility:** New transformations added without affecting fetch/structure + +### Why Project Mirrors? + +- **Completeness:** No documentation is lost +- **Development:** Easier to debug and understand source structure +- **Backwards Compatibility:** Existing links to source repos still work + +## Data Flow + +### Repository → Temporary Directory + +``` +Source Repo Temp Directory +├── docs/ → /tmp/xyz/repo-name/ +│ ├── tutorials/ ├── tutorials/ +│ ├── how-to/ ├── how-to/ +│ └── reference/ └── reference/ +├── README.md → README.md (if in root_files) +└── src/ (not copied) +``` + +### Temporary Directory → Docs Output + +``` +Temp Directory Docs Output +/tmp/xyz/repo-name/ → +├── tutorials/ docs/ +│ └── guide.md ├── tutorials/ +│ (github_target_path) │ └── guide.md (targeted) +├── how-to/ ├── how-to/ +└── reference/ └── projects/repo-name/ + ├── tutorials/ (mirror) + ├── how-to/ (mirror) + └── reference/ (mirror) +``` + +## Performance Characteristics + +### Fetch Stage + +- **Git sparse:** O(docs_size) + network latency +- **Local copy:** O(docs_size) filesystem I/O + +### Transform Stage + +- **Link rewriting:** O(n \* m) where n = files, m = avg file size +- **Frontmatter:** O(n) single pass through files + +### Structure Stage + +- **Targeted copy:** O(n) where n = files with github_target_path +- **Directory mapping:** O(n) where n = total files +- **Media copy:** O(m) where m = media files + +### Overall + +- Dominated by git network operations for remote repos +- Filesystem I/O bound for local repos +- Typically completes in seconds for typical documentation repos + +## Error Handling + +### Fetch Failures + +- Invalid git URL → Clear error message with URL +- Network issues → Retry with exponential backoff +- Missing docs_path → Warning, skip repository + +### Transform Failures + +- Invalid frontmatter → Add default frontmatter, log warning +- Broken links → Log warning, preserve original link +- Invalid markdown → Process as best-effort, log error + +### Structure Failures + +- Missing target directory → Create automatically +- Conflicting file paths → Error with clear message +- Media directory not found → Log warning, continue + +## See Also + +- [Technical Reference](/reference/supporting_tools/docs-ng/reference/technical) — Module and API documentation +- [Configuration Reference](/reference/supporting_tools/docs-ng/reference/configuration) — Complete configuration field reference +- [Getting Started](/reference/supporting_tools/docs-ng/tutorials/getting_started) — Setup guide +- [Adding Repositories](/reference/supporting_tools/docs-ng/how-to/adding-repos) — How to add new repos diff --git a/docs/reference/supporting_tools/docs-ng/how-to/adding-repos.md b/docs/reference/supporting_tools/docs-ng/how-to/adding-repos.md new file mode 100644 index 0000000..ac7357a --- /dev/null +++ b/docs/reference/supporting_tools/docs-ng/how-to/adding-repos.md @@ -0,0 +1,256 @@ +--- +title: "How to Add Repositories to docs-ng" +description: "Guide for adding new repositories to the documentation aggregation system" +github_org: gardenlinux +github_repo: docs-ng +github_source_path: docs/reference/supporting_tools/docs-ng/how-to/adding-repos.md +--- + +# How to Add Repositories to docs-ng + +This guide explains how to add a new repository to the docs-ng aggregation system. + +> **Source Repository:** [gardenlinux/docs-ng](https://github.com/gardenlinux/docs-ng) + +## Prerequisites + +- Access to the repository you want to add +- Understanding of the repository's documentation structure +- Edit access to `repos-config.json` + +## Step 1: Add Repository Configuration + +Edit `repos-config.json` and add a new entry to the `repos` array: + +```json +{ + "name": "new-repo", + "url": "https://github.com/gardenlinux/new-repo", + "docs_path": "docs", + "target_path": "projects/new-repo", + "ref": "main", + "structure": "flat" +} +``` + +### Required Fields + +- **`name`**: Unique identifier for the repository +- **`url`**: Git URL or `file://` path for local development +- **`docs_path`**: Path to docs directory within the repository +- **`target_path`**: Where to place mirrored docs (usually `projects/`) +- **`ref`**: Git branch or tag to fetch from + +### Optional Fields + +- **`commit`**: Lock to a specific commit hash for reproducibility +- **`root_files`**: List of root-level files to copy (e.g., `["README.md", "CONTRIBUTING.md"]`) +- **`structure`**: Directory mapping strategy (see below) +- **`media_directories`**: List of media directories to copy (e.g., `[".media", "_static", "assets"]`) + +## Step 2: Choose a Structure Strategy + +### Flat Structure + +Copy all files as-is without reorganization: + +```json +"structure": "flat" +``` + +### Sphinx Structure + +For Sphinx-generated documentation: + +```json +"structure": "sphinx" +``` + +### Custom Mapping + +Map source directories to Diataxis categories: + +```json +"structure": { + "tutorials": "tutorials", + "guides": "how-to", + "concepts": "explanation", + "api": "reference" +} +``` + +## Step 3: Configure Targeted Documentation + +To have files automatically placed into the main Diataxis structure, add `github_target_path` frontmatter to markdown files in the source repository: + +```markdown +--- +title: "Example Guide" +github_target_path: "docs/how-to/example-guide.md" +--- + +# Example Guide + +Content here... +``` + +Files with `github_target_path` will be copied to that exact location, not to `projects//`. + +## Step 4: Test with Local Configuration + +Create or edit `repos-config.local.json` for local testing: + +```json +{ + "repos": [ + { + "name": "new-repo", + "url": "file://../new-repo", + "docs_path": "docs", + "target_path": "projects/new-repo", + "structure": "flat" + } + ] +} +``` + +Then test aggregation: + +```bash +make aggregate-local +``` + +## Step 5: Verify the Output + +Check that files are in the correct locations: + +```bash +ls -la docs/projects/new-repo/ +``` + +If using `github_target_path`, verify targeted files: + +```bash +ls -la docs/tutorials/ +ls -la docs/how-to/ +``` + +## Step 6: Lock the Commit (Production) + +For production, lock to a specific commit: + +```bash +# This fetches the latest and updates repos-config.json +make aggregate-update-repo REPO=new-repo +``` + +Or manually add the commit hash: + +```json +{ + "name": "new-repo", + "url": "https://github.com/gardenlinux/new-repo", + "docs_path": "docs", + "target_path": "projects/new-repo", + "ref": "main", + "commit": "abc123def456...", + "structure": "flat" +} +``` + +## Advanced Configuration + +### Media Directories + +Automatically copy media directories alongside documentation: + +```json +{ + "name": "new-repo", + "media_directories": [".media", "assets", "_static"] +} +``` + +The system will: + +- Find all instances of these directories recursively +- Copy nested media dirs (e.g., `tutorials/assets/`) to the same relative path +- Copy root-level media dirs (e.g., `_static/`) to the common ancestor of targeted files + +### Root Files + +Copy root-level files (like README.md or CONTRIBUTING.md): + +```json +{ + "name": "new-repo", + "root_files": ["README.md", "CONTRIBUTING.md", "LICENSE"] +} +``` + +These files can also have `github_target_path` frontmatter for targeted placement. + +### Special Files + +Handle non-standard files: + +```json +{ + "name": "new-repo", + "special_files": { + "GUIDE.md": "how-to", + "CONCEPTS.md": "explanation" + } +} +``` + +## Complete Example + +Here's a complete configuration: + +```json +{ + "name": "example-tool", + "url": "https://github.com/gardenlinux/example-tool", + "docs_path": "documentation", + "target_path": "projects/example-tool", + "ref": "docs-ng", + "commit": "1234567890abcdef", + "root_files": ["README.md"], + "structure": { + "getting-started": "tutorials", + "guides": "how-to", + "concepts": "explanation", + "api-reference": "reference" + }, + "media_directories": [".media", "images"], + "special_files": { + "CHANGELOG.md": "reference" + } +} +``` + +## Troubleshooting + +### Files Not Appearing + +- Verify `docs_path` points to the correct directory +- Check that the repository has a `docs-ng` branch or adjust `ref` +- Ensure `github_target_path` frontmatter is correct + +### Media Not Copied + +- Add media directory names to `media_directories` +- Check that media dirs exist in the source repository + +### Links Broken + +- The transformer attempts to rewrite links automatically +- Check that relative links in source docs are correct +- Review `src/aggregation/transformer.py` for link rewriting logic + +## See Also + +- [Configuration Reference](/reference/supporting_tools/docs-ng/reference/configuration) — Complete field documentation +- [Architecture Explanation](/reference/supporting_tools/docs-ng/explanation/architecture) — How aggregation works +- [Technical Reference](/reference/supporting_tools/docs-ng/reference/technical) — Source code documentation diff --git a/docs/reference/supporting_tools/docs-ng/overview/index.md b/docs/reference/supporting_tools/docs-ng/overview/index.md new file mode 100644 index 0000000..1748327 --- /dev/null +++ b/docs/reference/supporting_tools/docs-ng/overview/index.md @@ -0,0 +1,88 @@ +--- +title: "docs-ng Documentation Hub" +description: "Documentation aggregation system for Garden Linux - combines docs from multiple repositories into a unified VitePress site" +github_org: gardenlinux +github_repo: docs-ng +github_source_path: docs/reference/supporting_tools/docs-ng/overview/index.md +--- + +# docs-ng: Garden Linux Documentation Hub + +Build unified documentation from multiple Garden Linux repositories. + +> **Source Repository:** [gardenlinux/docs-ng](https://github.com/gardenlinux/docs-ng) + +## Overview + +docs-ng is the documentation aggregation system that powers the unified Garden Linux documentation site. It aggregates content from multiple source repositories (gardenlinux, builder, python-gardenlinux-lib) into a cohesive VitePress site. + +### Key Features + +- **Targeted Documentation**: Files with `github_target_path` frontmatter are automatically placed into the correct Diataxis categories +- **Project Mirroring**: Complete repository documentation mirrored under `docs/projects//` +- **Commit Locking**: Reproducible builds with locked commit hashes +- **Media Handling**: Automatic discovery and copying of media directories +- **Link Rewriting**: Automatic link transformation for cross-repository references + +### Documentation Paths + +The system supports two complementary documentation paths: + +1. **Targeted Documentation** — Files with `github_target_path` frontmatter → `docs/tutorials/`, `docs/how-to/`, etc. +2. **Project Mirror** — All repo docs mirrored under `docs/projects//` + +## Quick Start + +```bash +# Aggregate documentation from repos +make aggregate + +# Run development server +make dev + +# Build production site +make build +``` + +## Architecture Overview + +``` +Source Repos → Fetch (git/local) → Transform → docs/ → VitePress +``` + +The aggregation pipeline consists of four main stages: + +1. **Fetch** — `src/aggregation/fetcher.py` pulls docs via git sparse checkout or local copy +2. **Transform** — `src/aggregation/transformer.py` rewrites links, fixes frontmatter +3. **Structure** — `src/aggregation/structure.py` reorganizes directories and copies media +4. **Output** — VitePress builds the site + +## Project Structure + +``` +docs-ng/ +├── repos-config.json # Repository configuration +├── repos-config.local.json # Local development config +├── src/ # Source code +│ ├── aggregate.py # CLI entry point +│ └── aggregation/ # Core package +├── tests/ # Test suite +└── docs/ # Generated documentation + ├── projects/ # Mirrored repository docs + ├── tutorials/ # Aggregated tutorials + ├── how-to/ # Aggregated guides + ├── explanation/ # Aggregated explanations + └── reference/ # Aggregated reference docs +``` + +## Further Reading + +- [Getting Started Tutorial](/reference/supporting_tools/docs-ng/tutorials/getting_started) — Step-by-step guide to using docs-ng +- [Adding Repositories](/reference/supporting_tools/docs-ng/how-to/adding-repos) — How to add new repositories to the aggregation +- [Technical Reference](/reference/supporting_tools/docs-ng/reference/technical) — Source code and API documentation +- [Configuration Reference](/reference/supporting_tools/docs-ng/reference/configuration) — Complete configuration options +- [Architecture Explanation](/reference/supporting_tools/docs-ng/explanation/architecture) — Deep dive into how docs-ng works + +## Contributing + +See the [docs-ng repository](https://github.com/gardenlinux/docs-ng) for contribution guidelines. diff --git a/docs/reference/supporting_tools/docs-ng/reference/configuration.md b/docs/reference/supporting_tools/docs-ng/reference/configuration.md new file mode 100644 index 0000000..f3b4db2 --- /dev/null +++ b/docs/reference/supporting_tools/docs-ng/reference/configuration.md @@ -0,0 +1,303 @@ +--- +title: "docs-ng Configuration Reference" +description: "Complete reference for repos-config.json and repos-config.local.json configuration options" +github_org: gardenlinux +github_repo: docs-ng +github_source_path: docs/reference/supporting_tools/docs-ng/reference/configuration.md +--- + +# docs-ng Configuration Reference + +Complete reference for configuring the docs-ng aggregation system. + +> **Source Repository:** [gardenlinux/docs-ng](https://github.com/gardenlinux/docs-ng) + +## Configuration Files + +### `repos-config.json` + +Main configuration file for production aggregation. Uses git URLs and commit locks. + +**Location:** Project root + +### `repos-config.local.json` + +Development configuration file for local testing. Uses `file://` URLs to avoid git operations. + +**Location:** Project root + +## Configuration Structure + +```json +{ + "repos": [ + { + "name": "repository-name", + "url": "https://github.com/org/repo", + "docs_path": "docs", + "target_path": "projects/repository-name", + "ref": "main", + "commit": "abc123...", + "root_files": ["README.md"], + "structure": "flat", + "media_directories": [".media", "assets"], + "special_files": { + "GUIDE.md": "how-to" + } + } + ] +} +``` + +## Field Reference + +### Required Fields + +#### `name` +- **Type:** String +- **Description:** Unique identifier for the repository +- **Example:** `"gardenlinux"`, `"builder"`, `"python-gardenlinux-lib"` +- **Notes:** Used in generated paths and logging + +#### `url` +- **Type:** String (URL or file path) +- **Description:** Repository location +- **Examples:** + - Git: `"https://github.com/gardenlinux/gardenlinux"` + - Local: `"file://../gardenlinux"` +- **Notes:** For local development, use `file://` URLs in `repos-config.local.json` + +#### `docs_path` +- **Type:** String +- **Description:** Path to documentation directory within the repository +- **Examples:** `"docs"`, `"documentation"`, `"."` (for root) +- **Notes:** Relative to repository root; content of this directory is copied + +#### `target_path` +- **Type:** String +- **Description:** Destination path in the docs directory +- **Example:** `"projects/gardenlinux"` +- **Notes:** Usually `projects/` for project mirrors + +#### `ref` +- **Type:** String +- **Description:** Git reference to fetch (branch, tag, or commit) +- **Examples:** `"main"`, `"docs-ng"`, `"v1.0.0"` +- **Notes:** Required for git URLs; ignored for `file://` URLs + +### Optional Fields + +#### `commit` +- **Type:** String (commit hash) +- **Description:** Lock to a specific commit for reproducible builds +- **Example:** `"abc123def456..."` +- **Default:** Not used (fetches from `ref`) +- **Notes:** Generated automatically with `make aggregate-update` + +#### `root_files` +- **Type:** Array of strings +- **Description:** Root-level files to copy (e.g., README.md, CONTRIBUTING.md) +- **Example:** `["README.md", "CONTRIBUTING.md", "LICENSE"]` +- **Default:** `[]` (no root files copied) +- **Notes:** Files can have `github_target_path` frontmatter for targeted placement + +#### `structure` +- **Type:** String or Object +- **Description:** How to reorganize directory structure +- **Options:** + - `"flat"` — Copy all files as-is + - `"sphinx"` — Sphinx documentation structure + - Object — Custom directory mapping (see below) +- **Default:** `"flat"` + +**Custom Structure Example:** +```json +"structure": { + "tutorials": "tutorials", + "guides": "how-to", + "concepts": "explanation", + "api-reference": "reference" +} +``` + +This maps source directories to Diataxis categories. + +#### `media_directories` +- **Type:** Array of strings +- **Description:** Directory names to treat as media/assets +- **Example:** `[".media", "assets", "_static", "images"]` +- **Default:** `[]` +- **Notes:** + - Searched recursively in source repository + - Nested media dirs (e.g., `tutorials/assets/`) copied to same relative path + - Root-level media dirs (e.g., `_static/`) copied to common ancestor of targeted files + +#### `special_files` +- **Type:** Object (filename → category mapping) +- **Description:** Map non-standard files to Diataxis categories +- **Example:** + ```json + { + "GUIDE.md": "how-to", + "CONCEPTS.md": "explanation", + "CHANGELOG.md": "reference" + } + ``` +- **Default:** `{}` +- **Notes:** Used when files don't follow standard naming conventions + +## Complete Example + +```json +{ + "repos": [ + { + "name": "gardenlinux", + "url": "https://github.com/gardenlinux/gardenlinux", + "docs_path": "docs", + "target_path": "projects/gardenlinux", + "ref": "docs-ng", + "commit": "c4b1d8d7f878fcb3e779315d28e35fcb19ae4dfb", + "root_files": [ + "CONTRIBUTING.md", + "SECURITY.md" + ], + "structure": { + "tutorials": "tutorials", + "how-to": "how-to", + "explanation": "explanation", + "reference": "reference", + "contributing": "contributing" + }, + "media_directories": [ + ".media", + "assets", + "_static" + ] + }, + { + "name": "builder", + "url": "https://github.com/gardenlinux/builder", + "docs_path": "docs", + "target_path": "projects/builder", + "ref": "docs-ng", + "commit": "b10476ad8c46130f310e36daa42c6e2c14fb51a9", + "structure": "flat", + "media_directories": [ + ".media", + "assets", + "_static" + ] + }, + { + "name": "python-gardenlinux-lib", + "url": "https://github.com/gardenlinux/python-gardenlinux-lib", + "docs_path": "docs", + "target_path": "projects/python-gardenlinux-lib", + "ref": "docs-ng", + "commit": "9142fccc3d83ab51759db7d328fa19166bc1df63", + "structure": "sphinx", + "media_directories": [ + ".media", + "assets", + "_static" + ] + } + ] +} +``` + +## Environment-Specific Configuration + +### Production (`repos-config.json`) + +- Use HTTPS git URLs +- Include `commit` locks for reproducibility +- Use `docs-ng` or stable branches for `ref` + +### Development (`repos-config.local.json`) + +- Use `file://` URLs for local repos +- Omit `commit` field (not used for local) +- Omit `ref` field (not needed for file://) +- Keep structure and other settings consistent with production + +**Example local config:** +```json +{ + "repos": [ + { + "name": "gardenlinux", + "url": "file://../gardenlinux", + "docs_path": "docs", + "target_path": "projects/gardenlinux", + "root_files": ["CONTRIBUTING.md", "SECURITY.md"], + "structure": { + "tutorials": "tutorials", + "how-to": "how-to", + "explanation": "explanation", + "reference": "reference" + }, + "media_directories": [".media", "assets"] + } + ] +} +``` + +## Common Configuration Patterns + +### Minimal Configuration + +Simplest configuration for a flat repository: + +```json +{ + "name": "my-repo", + "url": "https://github.com/org/my-repo", + "docs_path": "docs", + "target_path": "projects/my-repo", + "ref": "main", + "structure": "flat" +} +``` + +### With Targeted Documentation + +Repository using `github_target_path` frontmatter: + +```json +{ + "name": "my-repo", + "url": "https://github.com/org/my-repo", + "docs_path": "docs", + "target_path": "projects/my-repo", + "ref": "main", + "structure": "flat", + "media_directories": ["assets", "_static"] +} +``` + +Then in your markdown files: +```yaml +--- +title: "My Tutorial" +github_target_path: "docs/tutorials/my-tutorial.md" +--- +``` + +## Frontmatter Fields + +When using `github_target_path`, you can include additional metadata: + +- **`github_org`**: Organization name (e.g., `"gardenlinux"`) +- **`github_repo`**: Repository name (e.g., `"docs-ng"`) +- **`github_source_path`**: Original file path in source repo (e.g., `"docs/tutorial.md"`) + +These help create source links in the documentation. + +## See Also + +- [Getting Started](/reference/supporting_tools/docs-ng/tutorials/getting_started) — Initial setup guide +- [Adding Repositories](/reference/supporting_tools/docs-ng/how-to/adding-repos) — How to add new repos +- [Technical Reference](/reference/supporting_tools/docs-ng/reference/technical) — Source code documentation +- [Architecture](/reference/supporting_tools/docs-ng/explanation/architecture) — System design diff --git a/docs/reference/supporting_tools/docs-ng/reference/technical.md b/docs/reference/supporting_tools/docs-ng/reference/technical.md new file mode 100644 index 0000000..ab2caf7 --- /dev/null +++ b/docs/reference/supporting_tools/docs-ng/reference/technical.md @@ -0,0 +1,181 @@ +--- +title: "docs-ng Technical Reference" +description: "Source code documentation for the docs-ng aggregation system - modules, APIs, and implementation details" +github_org: gardenlinux +github_repo: docs-ng +github_source_path: docs/reference/supporting_tools/docs-ng/reference/technical.md +--- + +# docs-ng Technical Reference + +Source code documentation for the docs-ng aggregation system. + +> **Source Repository:** [gardenlinux/docs-ng](https://github.com/gardenlinux/docs-ng) > **Source File:** [src/README.md](https://github.com/gardenlinux/docs-ng/blob/main/src/README.md) + +## Source Code Structure + +``` +src/ +├── aggregate.py # CLI entry point +├── migration_tracker.py # Standalone utility +└── aggregation/ # Core package + ├── __init__.py + ├── models.py # Data classes + ├── config.py # Config I/O + ├── fetcher.py # Git + local fetch + ├── transformer.py # Content transforms + └── structure.py # Directory transforms +``` + +## Module Reference + +### `aggregation/models.py` + +Data classes for type safety: + +- **`RepoConfig`** — Repository configuration data class +- **`AggregateResult`** — Fetch result with commit hash + +### `aggregation/config.py` + +Configuration file handling: + +- **`load_config()`** — Parse repos-config.json +- **`save_config()`** — Write updated config (commit locks) + +### `aggregation/fetcher.py` + +Repository fetching: + +**`DocsFetcher`** — Main fetcher class + +Methods: + +- **`__init__(project_root, update_locks=False)`** — Initialize with optional commit lock updating +- **`fetch()`** — Fetch repository and return result with commit hash +- **`_fetch_remote()`** — Git sparse checkout from remote repository +- **`_fetch_local()`** — Filesystem copy from local repository +- **`_copy_docs()`** — Static method to copy docs directory +- **`_copy_root_files()`** — Static method to copy root-level files (e.g., CONTRIBUTING.md) + +### `aggregation/transformer.py` + +Content transformation: + +- **`rewrite_links()`** — Fix markdown links for cross-repository references +- **`quote_yaml_value()`** — YAML safety for frontmatter values +- **`ensure_frontmatter()`** — Add or fix frontmatter in markdown files +- **`parse_frontmatter()`** — Extract metadata from markdown frontmatter +- **`fix_broken_project_links()`** — Validate and fix links to project mirrors + +### `aggregation/structure.py` + +Directory operations: + +- **`transform_directory_structure()`** — Restructure docs based on config mapping +- **`copy_targeted_docs(source_dir, docs_dir, repo_name, media_dirs=None, root_files=None)`** — Copy files with `github_target_path` frontmatter to specified locations + - Handles nested media dirs (e.g., `tutorials/assets/`) by copying to same relative path + - Handles root-level media dirs (e.g., `_static/`) by copying to common ancestor of targeted files + - Supports scanning root_files for targeted placement +- **`process_markdown_file()`** — Transform single markdown file (links, frontmatter) +- **`process_all_markdown()`** — Batch process all markdown files in directory + +### `aggregate.py` + +CLI orchestration — Combines all modules into the complete aggregation workflow. + +## Usage Example + +Basic programmatic usage: + +```python +from aggregation import load_config, DocsFetcher, process_all_markdown + +# Load configuration +repos = load_config("repos-config.json") + +# Initialize fetcher +fetcher = DocsFetcher(project_root) + +# Fetch documentation +result = fetcher.fetch(repo, output_dir) + +# Transform markdown files +process_all_markdown(target_dir, repo_name) +``` + +## Key Concepts + +### Targeted Documentation + +Files with `github_target_path` in their frontmatter are automatically placed at that exact path: + +```yaml +--- +github_target_path: "docs/tutorials/example.md" +--- +``` + +The `copy_targeted_docs()` function scans all markdown files and copies those with this frontmatter to their specified locations. + +### Link Rewriting + +The `rewrite_links()` function transforms markdown links to work in the aggregated site: + +- Relative links within the same repo are maintained +- Cross-repository links are rewritten to point to the correct locations +- Links to project mirrors are validated + +### Media Handling + +Media directories specified in `media_directories` configuration are: + +1. Discovered recursively in the source repository +2. Copied alongside their associated documentation +3. Placed according to whether they're nested (same relative path) or root-level (common ancestor) + +### Commit Locking + +When `update_locks=True` is passed to `DocsFetcher.__init__()`, the system: + +1. Fetches from the `ref` (branch/tag) +2. Records the resolved commit hash +3. Updates `repos-config.json` with the lock + +This ensures reproducible builds. + +## Development + +### Running Tests + +See [Testing Reference](/reference/supporting_tools/docs-ng/reference/testing) for details on the test suite. + +### Adding New Transformation + +To add a new transformation: + +1. Add function to `transformer.py` +2. Call it from `process_markdown_file()` or `process_all_markdown()` +3. Add tests in `tests/unit/test_transformer.py` + +### Adding New Structure Type + +To add a new structure mapping type: + +1. Update `transform_directory_structure()` in `structure.py` +2. Add corresponding structure key handling +3. Update configuration documentation + +## Architecture Decisions + +Key architectural decisions are documented in the source repository: + +- Sparse git checkout for efficiency +- Frontmatter-based targeting for flexibility +- Separate fetch/transform/structure stages for modularity + +## See Also + +- [Testing Reference](/reference/supporting_tools/docs-ng/reference/testing) — Test suite documentation +- [Configuration Reference](/reference/supporting_tools/docs-ng/reference/configuration) — Complete configuration field reference +- [Architecture Explanation](/reference/supporting_tools/docs-ng/explanation/architecture) — How the system works diff --git a/docs/reference/supporting_tools/docs-ng/reference/testing.md b/docs/reference/supporting_tools/docs-ng/reference/testing.md new file mode 100644 index 0000000..b89bd43 --- /dev/null +++ b/docs/reference/supporting_tools/docs-ng/reference/testing.md @@ -0,0 +1,232 @@ +--- +title: "docs-ng Testing Guide" +description: "Test suite documentation for docs-ng - unit tests, integration tests, and testing best practices" +github_org: gardenlinux +github_repo: docs-ng +github_source_path: docs/reference/supporting_tools/docs-ng/reference/testing.md +--- + +# docs-ng Testing Guide + +Test suite documentation for the docs-ng aggregation system. + +> **Source Repository:** [gardenlinux/docs-ng](https://github.com/gardenlinux/docs-ng) > **Source File:** [tests/README.md](https://github.com/gardenlinux/docs-ng/blob/main/tests/README.md) + +## Test Structure + +``` +tests/ +├── conftest.py # pytest configuration +├── fixtures/ # Test data +├── unit/ # Unit tests (pure functions) +│ ├── test_config.py +│ ├── test_models.py +│ └── test_transformer.py +└── integration/ # Integration tests (filesystem) + └── test_aggregation.py +``` + +## Running Tests + +### All Tests + +```bash +make test +``` + +### Unit Tests Only + +```bash +make test-unit +``` + +### Integration Tests Only + +```bash +make test-integration +``` + +### Direct pytest + +For more control, use pytest directly: + +```bash +# Run specific test file +python3 -m pytest tests/unit/test_transformer.py -v + +# Run specific test function +python3 -m pytest tests/unit/test_transformer.py::test_rewrite_links -v + +# Run with coverage +python3 -m pytest tests/ --cov=src/aggregation --cov-report=html +``` + +## Test Types + +### Unit Tests + +Test pure functions with no I/O side effects: + +- **Link rewriting** (`rewrite_links`) — Transform markdown links +- **YAML quoting** (`quote_yaml_value`) — Safely quote YAML values +- **Frontmatter handling** (`ensure_frontmatter`, `parse_frontmatter`) — Parse and manipulate frontmatter +- **Config loading/saving** — Parse and write configuration files +- **Model validation** — Data class validation and serialization + +Unit tests are fast, isolated, and don't touch the filesystem. + +### Integration Tests + +Test filesystem operations and the full aggregation workflow: + +- **Local repository fetching** — Copy docs from local repos +- **Markdown file processing** — Transform files in place +- **Directory transformation** — Restructure directory trees +- **End-to-end aggregation** — Complete workflow testing + +Integration tests are slower and require temporary directories. + +## Test Fixtures + +Located in `tests/fixtures/`, these provide: + +- Sample markdown files +- Example frontmatter configurations +- Mock repository structures +- Configuration file examples + +## Adding Tests + +### Adding a Unit Test + +1. Create or update a test file in `tests/unit/` +2. Use pytest conventions (`test_*` functions, `Test*` classes) +3. Use `assert` statements for validation + +Example: + +```python +def test_rewrite_links(): + """Test that links are properly rewritten.""" + content = "[link](../other/file.md)" + result = rewrite_links(content, "repo-name", "path/to/file.md") + assert "[link](/projects/repo-name/other/file.md)" in result +``` + +### Adding an Integration Test + +1. Create or update a test file in `tests/integration/` +2. Use pytest fixtures for temporary directories +3. Clean up resources in teardown + +Example: + +```python +def test_fetch_local(tmp_path): + """Test fetching from local repository.""" + source = tmp_path / "source" + source.mkdir() + (source / "docs").mkdir() + (source / "docs" / "test.md").write_text("# Test") + + fetcher = DocsFetcher(tmp_path) + result = fetcher.fetch(config, tmp_path / "output") + + assert result.success + assert (tmp_path / "output" / "test.md").exists() +``` + +## Test Coverage + +Check test coverage with: + +```bash +python3 -m pytest tests/ --cov=src/aggregation --cov-report=term-missing +``` + +Target coverage levels: + +- **Unit tests**: >90% coverage of pure functions +- **Integration tests**: Key workflows covered +- **Overall**: >80% code coverage + +## Best Practices + +### Do + +- Test one thing per test function +- Use descriptive test names that explain what is being tested +- Use fixtures for common setup +- Keep tests fast and isolated +- Use parametrize for testing multiple inputs +- Assert specific outcomes, not just absence of errors + +### Don't + +- Test implementation details +- Make tests dependent on each other +- Use time-based assertions (use mocks instead) +- Leave temporary files after tests +- Test third-party library behavior + +## Continuous Integration + +Tests run automatically on: + +- Pull requests +- Pushes to main/docs-ng branches +- Scheduled nightly builds + +## Debugging Tests + +### Run with verbose output + +```bash +python3 -m pytest tests/ -vv +``` + +### Stop on first failure + +```bash +python3 -m pytest tests/ -x +``` + +### Run failed tests only + +```bash +python3 -m pytest tests/ --lf +``` + +### Use pdb debugger + +```bash +python3 -m pytest tests/ --pdb +``` + +## Common Issues + +### ImportError + +Ensure you're in the project root and Python can find the `src` directory: + +```bash +export PYTHONPATH="${PYTHONPATH}:$(pwd)" +``` + +### Fixture Not Found + +Check that `conftest.py` is in the correct location and properly defines fixtures. + +### Integration Tests Failing + +Integration tests may fail if: + +- Insufficient disk space +- Permission issues with temp directories +- Git not available in PATH + +## See Also + +- [Technical Reference](/reference/supporting_tools/docs-ng/reference/technical) — Source code documentation +- [Configuration Reference](/reference/supporting_tools/docs-ng/reference/configuration) — Configuration options +- [Architecture Explanation](/reference/supporting_tools/docs-ng/explanation/architecture) — System design diff --git a/docs/reference/supporting_tools/docs-ng/tutorials/getting_started.md b/docs/reference/supporting_tools/docs-ng/tutorials/getting_started.md new file mode 100644 index 0000000..6f79f4e --- /dev/null +++ b/docs/reference/supporting_tools/docs-ng/tutorials/getting_started.md @@ -0,0 +1,152 @@ +--- +title: "Getting Started with docs-ng" +description: "Step-by-step tutorial for setting up and using the docs-ng documentation aggregation system" +github_org: gardenlinux +github_repo: docs-ng +github_source_path: docs/reference/supporting_tools/docs-ng/tutorials/getting_started.md +--- + +# Getting Started with docs-ng + +This tutorial will walk you through setting up and using docs-ng to aggregate documentation from multiple repositories. + +> **Source Repository:** [gardenlinux/docs-ng](https://github.com/gardenlinux/docs-ng) + +## Prerequisites + +- Python 3.x +- pnpm (for VitePress) +- Git + +## Step 1: Clone the Repository + +```bash +git clone https://github.com/gardenlinux/docs-ng.git +cd docs-ng +``` + +## Step 2: Install Dependencies + +```bash +pnpm install +``` + +This installs VitePress and other Node.js dependencies needed to build the documentation site. + +## Step 3: Aggregate Documentation + +### From Remote Repositories (Production) + +Aggregate from locked commits in `repos-config.json`: + +```bash +make aggregate +``` + +This fetches documentation from the configured repositories at their locked commit hashes. + +### From Local Repositories (Development) + +For local development, use `repos-config.local.json` with `file://` URLs: + +```bash +make aggregate-local +``` + +This copies documentation from local repositories without using git. + +## Step 4: Start the Development Server + +```bash +make dev +``` + +The documentation site will be available at `http://localhost:5173`. + +## Step 5: Make Changes + +As you work on documentation in source repositories: + +1. Make changes to markdown files in source repos +2. Run `make aggregate-local` to update the aggregated docs +3. The dev server will hot-reload automatically + +## Step 6: Build for Production + +When ready to deploy: + +```bash +make build +``` + +This creates a production build in `docs/.vitepress/dist/`. + +## Common Workflows + +### Updating a Single Repository + +```bash +make aggregate-repo REPO=gardenlinux +``` + +### Updating Lock Files + +To fetch the latest commits and update `repos-config.json`: + +```bash +make aggregate-update +``` + +### Running Tests + +```bash +make test +``` + +## Project Structure + +After aggregation, your docs directory will look like: + +``` +docs/ +├── projects/ # Mirrored repository docs +│ ├── gardenlinux/ +│ ├── builder/ +│ └── python-gardenlinux-lib/ +├── tutorials/ # Aggregated tutorials +├── how-to/ # Aggregated guides +├── explanation/ # Aggregated explanations +├── reference/ # Aggregated reference +└── contributing/ # Aggregated contributing docs +``` + +## Next Steps + +- Learn how to [add new repositories](/reference/supporting_tools/docs-ng/how-to/adding-repos) +- Understand the [architecture](/reference/supporting_tools/docs-ng/explanation/architecture) +- Review the [configuration reference](/reference/supporting_tools/docs-ng/reference/configuration) + +## Troubleshooting + +### Clean Build + +If you encounter issues, try a clean build: + +```bash +make clean +make aggregate +make dev +``` + +### Check Dependencies + +Ensure all dependencies are installed: + +```bash +pnpm install +python3 --version # Should be 3.x +``` + +### Verify Configuration + +Check that `repos-config.json` or `repos-config.local.json` is properly configured. See the [configuration reference](/reference/supporting_tools/docs-ng/reference/configuration) for details. diff --git a/docs/reference/supporting_tools/index.md b/docs/reference/supporting_tools/index.md new file mode 100644 index 0000000..385db0f --- /dev/null +++ b/docs/reference/supporting_tools/index.md @@ -0,0 +1,11 @@ +--- +title: Supporting Tools +description: Documentation of several Garden Linux related Tools +order: 1 +--- + +# Supporting Tools + + + +--- diff --git a/lychee.toml b/lychee.toml index 84ea75d..f0e5da1 100644 --- a/lychee.toml +++ b/lychee.toml @@ -1,13 +1,31 @@ # lychee link checker configuration # https://lychee.cli.rs/ +# Verbosity level: 'error', 'warn', 'info', 'debug', 'trace' +# Setting to 'error' will hide [WARN] messages +verbose = "error" + # Exclude patterns exclude = [ - "^https://github\\.com/.*/edit/", - "^https://github\\.com/.*/new/", - "^mailto:", - "^tel:", - "^javascript:", + "^/", + "^file://", + # Legacy docs that reference non-existent GitHub paths + "^https://github\\.com/gardenlinux/gardenlinux/blob/main/legacy/", + "^https://github\\.com/gardenlinux/gardenlinux/blob/main/tests/README($|#)", + "^https://github\\.com/gardenlinux/gardenlinux/blob/main/features/.*/README\\.md$", + "^https://github\\.com/gardenlinux/gardenlinux/blob/main/features/.*/file\\.include/", + "^https://github\\.com/gardenlinux/gardenlinux/blob/main/assets/", + "^https://github\\.com/gardenlinux/gardenlinux/blob/main/reference/", + "^https://github\\.com/gardenlinux/gardenlinux/blob/main/how-to/", + "^https://github\\.com/gardenlinux/gardenlinux/blob/main/tutorials/", + "^https://github\\.com/gardenlinux/gardenlinux/blob/main/contributing/", + "^https://github\\.com/gardenlinux/gardenlinux/blob/main/explanation/", + "^https://github\\.com/gardenlinux/gardenlinux/blob/main/mailto:", + "^https://github\\.com/gardenlinux/builder/blob/main/reference/features$", + "^https://github\\.com/gardenlinux/gardenlinux/tree/main/tests/platformSetup/", + "^https://github\\.com/gardenlinux/python-gardenlinux-lib/blob/main/_static/", + "^https://github\\.com/gardenlinux/docs-ng/blob/main/src/README\\.md$", + "^https://github\\.com/gardenlinux/docs-ng/blob/main/tests/README\\.md$", ] # Accept these status codes as valid diff --git a/package.json b/package.json index 1e4e5f4..904ef5b 100644 --- a/package.json +++ b/package.json @@ -4,8 +4,8 @@ "docs:dev": "vitepress dev docs", "docs:build": "vitepress build docs", "docs:preview": "vitepress preview docs", - "docs:linkcheck": "lychee \"docs/**/*.md\" --config lychee.toml", - "docs:spelling": "codespell --ignore-words .custom_wordlist.txt docs", + "docs:linkcheck": "lychee \"docs/**/*.md\" --config lychee.toml --root-dir \"$(pwd)/docs/\"", + "docs:spelling": "codespell --ignore-words .custom_wordlist.txt docs --exclude-file $(cat .exclude_spelling)", "docs:woke": "woke docs/" }, "dependencies": { diff --git a/scripts/repos-config.json b/repos-config.json similarity index 56% rename from scripts/repos-config.json rename to repos-config.json index 1b5ef3c..da5a508 100644 --- a/scripts/repos-config.json +++ b/repos-config.json @@ -3,15 +3,14 @@ { "name": "gardenlinux", "url": "https://github.com/gardenlinux/gardenlinux", - "github_org": "gardenlinux", - "github_repo": "gardenlinux", "docs_path": "docs", + "target_path": "projects/gardenlinux", + "ref": "docs-ng", + "commit": "421e067d71fd118f58ec5126faedd028e623ad8d", "root_files": [ "CONTRIBUTING.md", "SECURITY.md" ], - "target_path": "projects/gardenlinux", - "branch": "docs-ng", "structure": { "tutorials": "tutorials", "how-to": "how-to", @@ -19,28 +18,38 @@ "reference": "reference", "contributing": "contributing" }, - "special_files": {}, "media_directories": [ - ".media" + ".media", + "assets", + "_static" ] }, { "name": "builder", "url": "https://github.com/gardenlinux/builder", - "github_org": "gardenlinux", - "github_repo": "builder", "docs_path": "docs", - "branch": "docs-ng", - "structure": "flat" + "target_path": "projects/builder", + "ref": "docs-ng", + "commit": "086e74ff032c5f2a05989aef4f20ba69f94bdbf9", + "media_directories": [ + ".media", + "assets", + "_static" + ] }, { "name": "python-gardenlinux-lib", "url": "https://github.com/gardenlinux/python-gardenlinux-lib", - "github_org": "gardenlinux", - "github_repo": "python-gardenlinux-lib", "docs_path": "docs", - "branch": "docs-ng", - "structure": "sphinx" + "target_path": "projects/python-gardenlinux-lib", + "ref": "docs-ng", + "commit": "9142fccc3d83ab51759db7d328fa19166bc1df63", + "structure": "sphinx", + "media_directories": [ + ".media", + "assets", + "_static" + ] } ] } diff --git a/scripts/repos-config.local.json b/repos-config.local.json similarity index 77% rename from scripts/repos-config.local.json rename to repos-config.local.json index 6768ac1..f312178 100644 --- a/scripts/repos-config.local.json +++ b/repos-config.local.json @@ -6,7 +6,6 @@ "docs_path": "docs", "root_files": ["CONTRIBUTING.md", "SECURITY.md"], "target_path": "projects/gardenlinux", - "branch": "docs-ng", "structure": { "tutorials": "tutorials", "how-to": "how-to", @@ -15,23 +14,23 @@ "contributing": "contributing" }, "special_files": {}, - "media_directories": [".media"] + "media_directories": [".media", "assets", "_static"] }, { "name": "builder", "url": "file://../builder", "docs_path": "docs", "target_path": "projects/builder", - "branch": "main", - "structure": "flat" + "structure": "flat", + "media_directories": [".media", "assets", "_static"] }, { "name": "python-gardenlinux-lib", "url": "file://../python-gardenlinux-lib", "docs_path": "docs", "target_path": "projects/python-gardenlinux-lib", - "branch": "main", - "structure": "sphinx" + "structure": "sphinx", + "media_directories": [".media", "assets", "_static"] } ] } diff --git a/requirements.txt b/requirements.txt index 7c541de..a6cec20 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ codespell==2.4.2 +pytest diff --git a/scripts/README.md b/scripts/README.md deleted file mode 100644 index 3f04a56..0000000 --- a/scripts/README.md +++ /dev/null @@ -1,295 +0,0 @@ -# Documentation Aggregation Scripts - -This directory contains scripts for aggregating documentation from multiple -Garden Linux repositories into this centralized documentation hub. - -## Overview - -The aggregation system fetches documentation from source repositories, -transforms them to work with VitePress, and integrates them into the docs hub. - -## Architecture - -``` -Source Repos docs-ng (this repo) -┌─────────────┐ ┌──────────────────┐ -│ gardenlinux │ │ │ -│ /docs/ │───┐ │ docs/projects/ │ -└─────────────┘ │ │ ├─ gardenlinux/ │ - │ │ ├─ builder/ │ -┌─────────────┐ │ Fetch & │ └─ python-lib/ │ -│ builder │ ├─ Transform │ │ -│ /docs/ │───┤ ─────────>│ VitePress Site │ -└─────────────┘ │ │ │ - │ └──────────────────┘ -┌─────────────┐ │ -│ python-lib │ │ -│ /docs/ │───┘ -└─────────────┘ -``` - -## Configuration - -### repos-config.json - -Repository configuration with the following parameters: - -- `name`: Repository name -- `url`: Git repository URL -- `docs_path`: Path to docs within the repository -- `target_path`: Where to place docs in this project (relative to docs/) -- `branch`: Git branch to fetch from -- `structure`: Directory structure mapping or copy mode - - Object with mappings like `{"00_introduction": "introduction"}` for - structured repos - - `"flat"` to copy all files as-is - - `"sphinx"` for Sphinx documentation -- `special_files` (optional): Map of files/directories to move to specific - locations - - Example: `{"boot_modes.md": "introduction", "architecture": "introduction"}` - - Files are moved during transformation, useful for organizing root-level - content -- `media_directories` (optional): List of directories to copy (including hidden - directories) - - Example: `[".media"]` - - These directories are copied as-is to preserve media assets - -### Example Configuration - -```json -{ - "name": "gardenlinux", - "url": "https://github.com/gardenlinux/gardenlinux", - "docs_path": "docs", - "target_path": "projects/gardenlinux", - "branch": "main", - "structure": { - "00_introduction": "introduction", - "01_developers": "developers", - "02_operators": "operators" - }, - "special_files": { - "boot_modes.md": "introduction", - "architecture": "introduction" - }, - "media_directories": [".media"] -} -``` - -## Scripts - -### `repos-config.json` - -Configuration file defining which repositories to aggregate from. - -It also maps the apparent structure of the docs file into their own sections. -This is an example for the docs located in the main Gardenlinux repository. - -**Structure:** - -```json -{ - "repos": [ - { - "name": "gardenlinux", - "url": "https://github.com/gardenlinux/gardenlinux.git", - "docs_path": "docs", - "target_path": "projects/gardenlinux", - "branch": "main", - "structure": { - "00_introduction": "introduction", - "01_developers": "developers", - "02_operators": "operators" - } - } - ] -} -``` - -### `fetch-repo-docs.sh` - -Fetches documentation from a repository using git sparse checkout. - -**Usage:** - -```bash -./fetch-repo-docs.sh -``` - -**Example:** - -```bash -./fetch-repo-docs.sh https://github.com/gardenlinux/gardenlinux.git main docs /tmp/gl-docs -``` - -### `transform_content.py` - -Transforms documentation content to work with VitePress: - -- Renames numbered directories (e.g., `00_introduction` → `introduction`) -- Rewrites internal links to work with new structure -- Adds/fixes frontmatter -- Handles different documentation structures - -**Usage:** - -```bash -./transform_content.py --config repos-config.json \ - --docs-dir ../docs \ - --temp-dir /tmp/fetched-docs -``` - -### `aggregate-docs.sh` - -Main orchestration script that runs the entire aggregation pipeline. - -**Usage:** - -```bash -# Aggregate all repositories -./aggregate-docs.sh - -# Dry run (don't modify docs directory) -./aggregate-docs.sh --dry-run - -# Aggregate specific repository -./aggregate-docs.sh --repo gardenlinux -``` - -## Makefile Targets - -For convenience, use these Makefile targets: - -```bash -# Test aggregation without modifying docs/ -make aggregate-dry - -# Aggregate all repositories -make aggregate - -# Aggregate specific repository -make aggregate-repo REPO=gardenlinux - -# Clean aggregated docs -make clean-projects -``` - -## Workflow - -1. **Fetch**: Use sparse checkout to clone only the `docs/` directory from - source repos -2. **Transform**: - - Restructure directories according to `structure` mapping - - Rewrite internal links to work with new paths - - Add frontmatter to markdown files -3. **Build**: VitePress builds the unified documentation site - -## Testing Locally - -```bash -# 1. Run dry-run to test without modifying docs/ -make aggregate-dry - -# 2. If successful, run actual aggregation -make aggregate - -# 3. Preview the documentation -make run - -# 4. Visit http://localhost:5173 to see aggregated docs -``` - -## CI/CD Integration - -The aggregation runs automatically via GitHub Actions: - -- **Schedule**: Daily at 2 AM UTC -- **Manual**: Via workflow dispatch in GitHub UI -- **Webhook**: Can be triggered by source repositories - -See `.github/workflows/aggregate-docs.yml` for details. - -## Adding New Repositories - -1. Add repository configuration to `repos-config.json`: - -```json -{ - "name": "new-repo", - "url": "https://github.com/gardenlinux/new-repo.git", - "docs_path": "docs", - "target_path": "projects/new-repo", - "branch": "main", - "structure": "flat" -} -``` - -> [!IMPORTANT] -> When the `docs/` directory contains subdirectories, mirror this structure in -> the `repos-config.json`. - -```json -{ - "name": "new-repo", - "url": "https://github.com/gardenlinux/new-repo.git", - "docs_path": "docs", - "target_path": "projects/new-repo", - "branch": "main", - "structure": { - "00_introduction": "introduction", - "01_developers": "developers", - "02_operators": "operators" - } -} -``` - -2. Test aggregation: - -```bash -make aggregate-repo REPO=new-repo -``` - -3. Verify in development server: - -```bash -make run -``` - -## Testsuite - -Run the test suite before making changes: - -```bash -cd scripts/tests -./run_all.sh -``` - -See `tests/README.md` for more details. - -## Troubleshooting - -### Links are broken after aggregation - -- Check the `rewrite_links()` function in `transform-content.py` -- Verify link patterns in source documentation -- Test with: `make aggregate-dry` - -### Directory structure not matching - -- Review `structure` configuration in `repos-config.json` -- Check `transform_directory_structure()` in `transform-content.py` - -### Fetch failing - -- Verify repository URL and branch in `repos-config.json` -- Check network connectivity -- Ensure sparse checkout is working: `git config core.sparseCheckout true` - -## Notes - -- **Local repos**: If testing with local repos, you can use `file://` URLs in - config -- **Authentication**: For private repos, ensure GitHub token has appropriate - permissions -- **Large docs**: To keep fetching sizes small, the sparse checkout only fetches - `docs/`. Any additional documentation should be comitted here directly. diff --git a/scripts/aggregate-docs.sh b/scripts/aggregate-docs.sh deleted file mode 100755 index 932351a..0000000 --- a/scripts/aggregate-docs.sh +++ /dev/null @@ -1,157 +0,0 @@ -#!/usr/bin/env bash -# Main script to aggregate documentation from multiple repositories -# This orchestrates the entire process: fetch -> transform -> update config - -set -e - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -PROJECT_ROOT="$(dirname "$SCRIPT_DIR")" -DOCS_DIR="$PROJECT_ROOT/docs" -TEMP_DIR=$(mktemp -d) -CONFIG_FILE="${CONFIG_FILE:-$SCRIPT_DIR/repos-config.json}" - -echo "==============================================================" -echo " Garden Linux Documentation Aggregation" -echo "==============================================================" -echo "" - -cleanup() { - echo "Cleaning up temporary files..." - rm -rf "$TEMP_DIR" -} - -trap cleanup EXIT -trap cleanup SIGINT -trap cleanup SIGTERM - -DRY_RUN=false -REPO_FILTER="" - -while [[ $# -gt 0 ]]; do - case $1 in - --dry-run) - DRY_RUN=true - shift - ;; - --repo) - REPO_FILTER="$2" - shift 2 - ;; - --help) - echo "Usage: $0 [OPTIONS]" - echo "" - echo "Options:" - echo " --dry-run Fetch and transform but don't update docs directory" - echo " --repo Only process specific repository" - echo " --help Show this help message" - exit 0 - ;; - *) - echo "Unknown option: $1" - exit 1 - ;; - esac -done - -echo "Configuration:" -echo " Config file: $CONFIG_FILE" -echo " Docs directory: $DOCS_DIR" -echo " Temp directory: $TEMP_DIR" -echo " Dry run: $DRY_RUN" -if [ -n "$REPO_FILTER" ]; then - echo " Repository filter: $REPO_FILTER" -fi -echo "" - -if [ ! -f "$CONFIG_FILE" ]; then - echo "Error: Config file not found: $CONFIG_FILE" - exit 1 -fi - -# Check if required scripts exist -FETCH_SCRIPT="$SCRIPT_DIR/fetch-repo-docs.sh" -TRANSFORM_SCRIPT="$SCRIPT_DIR/transform_content.py" - -for script in "$FETCH_SCRIPT" "$TRANSFORM_SCRIPT"; do - if [ ! -f "$script" ]; then - echo "Error: Required script not found: $script" - exit 1 - fi -done - -echo "Step 1: Fetching documentation from repositories" -echo "-------------------------------------------------------------" - -repos=$(python3 -c " -import json -with open('$CONFIG_FILE') as f: - config = json.load(f) - for repo in config['repos']: - if '$REPO_FILTER' and repo['name'] != '$REPO_FILTER': - continue - root_files = repo.get('root_files', []) - root_files_str = ' '.join(root_files) if root_files else '' - print(f\"{repo['name']}|{repo['url']}|{repo['branch']}|{repo['docs_path']}|{root_files_str}\") -") - -if [ -z "$repos" ]; then - echo "Error: No repositories to process" - exit 1 -fi - -while IFS='|' read -r name url branch docs_path root_files; do - echo "" - echo "Repository: $name" - - repo_temp_dir="$TEMP_DIR/$name" - mkdir -p "$repo_temp_dir" - - # Fetch docs using sparse checkout - # shellcheck disable=SC2086 - if ! "$FETCH_SCRIPT" "$url" "$branch" "$docs_path" "$repo_temp_dir" $root_files; then - echo "Warning: Failed to fetch docs for $name" - continue - fi -done <<<"$repos" - -echo "" -echo "Fetch complete!" -echo "" - -echo "Step 2: Transforming documentation content" -echo "-------------------------------------------------------------" - -if [ "$DRY_RUN" = true ]; then - echo "Dry run mode: Transforming to temporary location" - TRANSFORM_TARGET="$TEMP_DIR/transformed-docs" - mkdir -p "$TRANSFORM_TARGET" -else - TRANSFORM_TARGET="$DOCS_DIR" -fi - -transform_args="--config $CONFIG_FILE --docs-dir $TRANSFORM_TARGET --temp-dir $TEMP_DIR" - -if [ -n "$REPO_FILTER" ]; then - transform_args="$transform_args --repo $REPO_FILTER" -fi - -# shellcheck disable=SC2086 -if ! python3 "$TRANSFORM_SCRIPT" $transform_args; then - echo "Error: Transformation failed" - exit 1 -fi - -echo "" -echo "==============================================================" -echo " Documentation aggregation complete!" -echo "==============================================================" -echo "" - -if [ "$DRY_RUN" = true ]; then - echo "To apply changes, run without --dry-run flag" -else - echo "Next steps:" - echo " 1. Review the changes in docs/projects/" - echo " 2. Run 'make dev' or 'pnpm run docs:dev' to preview" - echo " 3. Commit the changes if satisfied" -fi diff --git a/scripts/fetch-repo-docs.sh b/scripts/fetch-repo-docs.sh deleted file mode 100755 index eb60e69..0000000 --- a/scripts/fetch-repo-docs.sh +++ /dev/null @@ -1,89 +0,0 @@ -#!/usr/bin/env bash -# Script to fetch documentation from remote repositories using sparse checkout -# Usage: ./fetch-repo-docs.sh [root_files...] - -set -e - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -REPO_URL="$1" -BRANCH="${2:-main}" -DOCS_PATH="${3:-docs}" -OUTPUT_DIR="$4" -shift 4 -ROOT_FILES=("$@") -TEMP_DIR=$(mktemp -d) - -if [ -z "$REPO_URL" ] || [ -z "$OUTPUT_DIR" ]; then - echo "Usage: $0 [root_files...]" - echo "Example: $0 https://github.com/gardenlinux/gardenlinux.git main docs /tmp/output CONTRIBUTING.md SECURITY.md" - exit 1 -fi - -# Convert relative file:// URLs to absolute paths -if [[ "$REPO_URL" == file://../* ]]; then - RELATIVE_PATH="${REPO_URL#file://}" - ABSOLUTE_PATH="$(cd "$SCRIPT_DIR/.." && cd "$RELATIVE_PATH" && pwd)" - REPO_URL="file://$ABSOLUTE_PATH" -fi - -echo "Fetching docs from: $REPO_URL" -echo " Branch: $BRANCH" -echo " Docs path: $DOCS_PATH" -if [ ${#ROOT_FILES[@]} -gt 0 ]; then - echo " Root files: ${ROOT_FILES[*]}" -fi -echo " Output: $OUTPUT_DIR" - -# Initialize sparse checkout -cd "$TEMP_DIR" -git init -git remote add origin "$REPO_URL" -git config core.sparseCheckout true - -echo "$DOCS_PATH/*" >> .git/info/sparse-checkout - -# Add root files to sparse checkout if specified -for root_file in "${ROOT_FILES[@]}"; do - if [ -n "$root_file" ]; then - echo "$root_file" >> .git/info/sparse-checkout - fi -done - -echo "Cloning (sparse checkout)..." -git fetch --depth=1 origin "$BRANCH" -git checkout "$BRANCH" - -if [ -d "$DOCS_PATH" ]; then - echo "Copying docs to $OUTPUT_DIR" - mkdir -p "$OUTPUT_DIR" - cp -r "$DOCS_PATH"/* "$OUTPUT_DIR/" 2>/dev/null || true - # Handle hidden directories for media - shopt -s dotglob - for item in "$DOCS_PATH"/.*; do - if [ -e "$item" ] && [ "$(basename "$item")" != "." ] && [ "$(basename "$item")" != ".." ]; then - cp -r "$item" "$OUTPUT_DIR/" 2>/dev/null || true - fi - done - shopt -u dotglob -else - echo "Warning: $DOCS_PATH directory not found in repository" -fi - -# Copy root files if specified -if [ ${#ROOT_FILES[@]} -gt 0 ]; then - echo "Copying root files to $OUTPUT_DIR" - for root_file in "${ROOT_FILES[@]}"; do - if [ -f "$root_file" ]; then - cp "$root_file" "$OUTPUT_DIR/" - echo " Copied: $root_file" - else - echo " Warning: $root_file not found" - fi - done -fi - -echo "Fetch complete!" - -# Cleanup -cd - > /dev/null -rm -rf "$TEMP_DIR" diff --git a/scripts/test-local.sh b/scripts/test-local.sh deleted file mode 100755 index 36cde2d..0000000 --- a/scripts/test-local.sh +++ /dev/null @@ -1,46 +0,0 @@ -#!/usr/bin/env bash -# Convenience script for testing aggregation with local repositories -# This uses repos-config.local.json which points to local file:// paths - -set -e - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" - -echo "Testing aggregation with local repositories..." -echo "" - -# Check if local repos exist -REPOS=( - "/home/$USER/*/gardenlinux/gardenlinux" - "/home/$USER/*/gardenlinux/builder" - "/home/$USER/*/gardenlinux/python-gardenlinux-lib" -) - -for repo in "${REPOS[@]}"; do - if [ ! -d "$repo" ]; then - echo "Error: Local repo not found: $repo" - exit 1 - fi -done - -echo "All local repositories found" -echo "" - -# Backup original config -if [ -f "$SCRIPT_DIR/repos-config.json" ]; then - cp "$SCRIPT_DIR/repos-config.json" "$SCRIPT_DIR/repos-config.json.bak" - echo "Backed up repos-config.json to repos-config.json.bak" -fi - -cp "$SCRIPT_DIR/repos-config.local.json" "$SCRIPT_DIR/repos-config.json" -echo "Using local configuration" -echo "" - -"$SCRIPT_DIR/aggregate-docs.sh" "$@" - -# Restore original config -if [ -f "$SCRIPT_DIR/repos-config.json.bak" ]; then - mv "$SCRIPT_DIR/repos-config.json.bak" "$SCRIPT_DIR/repos-config.json" - echo "" - echo "Restored original repos-config.json" -fi diff --git a/scripts/tests/README.md b/scripts/tests/README.md deleted file mode 100644 index eac9f55..0000000 --- a/scripts/tests/README.md +++ /dev/null @@ -1,97 +0,0 @@ -# Test Suite - -Test suite for the documentation aggregation scripts. - -## Running Tests - -### Unit Tests - -Test individual functions in the transformation scripts: - -```bash -cd scripts/tests -python3 run_tests.py -``` - -Run a specific test: - -```bash -python3 run_tests.py test_escape_angle_brackets_in_text -``` - -### Integration Tests - -Test overall script functionality: - -```bash -cd scripts/tests -./test_integration.sh -``` - -## Test Coverage - -### Unit Tests (run_tests.py) - -Tests for `transform-content.py` functions: - -- Angle bracket escaping -- HTML tag preservation -- Code block handling -- Link rewriting -- YAML frontmatter quoting -- Directory name transformations - -### Integration Tests (test_integration.sh) - -- Configuration file existence -- Script executability -- Syntax validation (bash and python) -- Basic script functionality - -## Adding Tests - -### Adding Unit Tests - -Edit `run_tests.py` and add a new test function: - -```python -def test_my_new_feature(runner): - """Test description""" - result = my_function("input") - runner.assert_equal(result, "expected", "Error message") -``` - -Then add it to the `test_functions` list in `main()`. - -### Adding Integration Tests - -Edit `test_integration.sh` and add a new test block: - -```bash -echo -n "My new test... " -if my_command; then - echo "OK" && ((TESTS_PASSED++)) -else - echo "FAIL" && ((TESTS_FAILED++)) -fi -``` - -## Test Fixtures - -Sample markdown files for testing are in `fixtures/`: - -- `test_doc.md` - Various markdown features -- `colon_title.md` - YAML frontmatter with colons -- `with_frontmatter.md` - Existing frontmatter - -## Continuous Integration - -Tests should be run in CI before merging. Add to `.github/workflows/`: - -```yaml -- name: Run tests - run: | - cd scripts/tests - python3 run_tests.py - ./test_integration.sh -``` diff --git a/scripts/tests/run_all.sh b/scripts/tests/run_all.sh deleted file mode 100755 index f2870ea..0000000 --- a/scripts/tests/run_all.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/usr/bin/env bash -# Run all tests - -set -e - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" - -echo "Running unit tests..." -python3 "$SCRIPT_DIR/run_tests.py" - -echo "" -echo "All tests passed" diff --git a/scripts/tests/run_tests.py b/scripts/tests/run_tests.py deleted file mode 100755 index b28b4ff..0000000 --- a/scripts/tests/run_tests.py +++ /dev/null @@ -1,276 +0,0 @@ -#!/usr/bin/env python3 -""" -Test suite for documentation aggregation scripts - -Run all tests: - python3 run_tests.py - -Run specific test: - python3 run_tests.py test_escape_angle_brackets -""" - -import json -import os -import shutil -import sys -import tempfile -from pathlib import Path - -# Add parent directory to path to import modules -sys.path.insert(0, str(Path(__file__).parent.parent)) - -from transform_content import ( # type: ignore - ensure_frontmatter, - escape_angle_brackets, - escape_text_angle_brackets, - quote_yaml_value, - rewrite_links, -) - - -class TestRunner: - def __init__(self): - self.tests_passed = 0 - self.tests_failed = 0 - self.failures = [] - - def assert_equal(self, actual, expected, message=""): - if actual == expected: - self.tests_passed += 1 - return True - else: - self.tests_failed += 1 - error = ( - f"FAIL: {message}\n Expected: {repr(expected)}\n Got: {repr(actual)}" - ) - self.failures.append(error) - print(error) - return False - - def assert_contains(self, text, substring, message=""): - if substring in text: - self.tests_passed += 1 - return True - else: - self.tests_failed += 1 - error = f"FAIL: {message}\n Expected to find: {repr(substring)}\n In: {repr(text)}" - self.failures.append(error) - print(error) - return False - - def assert_not_contains(self, text, substring, message=""): - if substring not in text: - self.tests_passed += 1 - return True - else: - self.tests_failed += 1 - error = f"FAIL: {message}\n Expected NOT to find: {repr(substring)}\n In: {repr(text)}" - self.failures.append(error) - print(error) - return False - - def run_test(self, test_func): - test_name = test_func.__name__ - print(f"Running {test_name}...", end=" ") - try: - test_func(self) - print("OK") - except Exception as e: - self.tests_failed += 1 - error = f"FAIL: {test_name} raised exception: {e}" - self.failures.append(error) - print(error) - - def summary(self): - total = self.tests_passed + self.tests_failed - print(f"\n{'='*60}") - print(f"Tests run: {total}") - print(f"Passed: {self.tests_passed}") - print(f"Failed: {self.tests_failed}") - print(f"{'='*60}") - - if self.tests_failed > 0: - print("\nFailed tests:") - for failure in self.failures: - print(failure) - return 1 - return 0 - - -def test_escape_angle_brackets_in_text(runner): - """Test that angle brackets in plain text are escaped""" - content = "This is that needs escaping." - result = escape_angle_brackets(content) - runner.assert_contains( - result, "<placeholder text>", "Angle brackets in text should be escaped" - ) - - -def test_escape_angle_brackets_with_spaces(runner): - """Test that angle brackets with spaces inside are escaped""" - content = "Multiple should be escaped." - result = escape_angle_brackets(content) - runner.assert_contains( - result, "<words here>", "Angle brackets with spaces should be escaped" - ) - - -def test_preserve_html_tags(runner): - """Test that valid HTML tags are preserved""" - content = "This

is HTML

and should not be escaped." - result = escape_angle_brackets(content) - runner.assert_contains(result, "

", "HTML

tag should be preserved") - runner.assert_contains(result, "

", "HTML

tag should be preserved") - - -def test_preserve_code_blocks(runner): - """Test that code blocks are not escaped""" - content = """```python -x = "" -```""" - result = escape_angle_brackets(content) - runner.assert_contains( - result, "", "Code in triple backticks should not be escaped" - ) - - -def test_preserve_inline_code(runner): - """Test that inline code is not escaped""" - content = "Inline code like `` should not be escaped." - result = escape_angle_brackets(content) - runner.assert_contains(result, "``", "Inline code should not be escaped") - - -def test_rewrite_relative_links(runner): - """Test that relative links are rewritten correctly""" - content = "[Link](./other.md)" - result = rewrite_links(content, "gardenlinux", "introduction/index.md") - runner.assert_contains( - result, - "/projects/gardenlinux/introduction/other", - "Relative link should be rewritten", - ) - - -def test_rewrite_numbered_directory_links(runner): - """Test that numbered directories in links are transformed""" - content = "[Link](../01_developers/guide.md)" - result = rewrite_links(content, "gardenlinux", "introduction/index.md") - runner.assert_contains( - result, "developers/guide", "Numbered directory in link should be transformed" - ) - - -def test_preserve_external_links(runner): - """Test that external links are not modified""" - content = "[External](https://github.com/gardenlinux/gardenlinux)" - result = rewrite_links(content, "gardenlinux", "") - runner.assert_equal(result, content, "External links should not be modified") - - -def test_preserve_anchor_links(runner): - """Test that anchor links are preserved""" - content = "[Anchor](#section)" - result = rewrite_links(content, "gardenlinux", "") - runner.assert_equal(result, content, "Anchor links should not be modified") - - -def test_quote_yaml_value_with_colon(runner): - """Test that YAML values with colons are quoted""" - value = "Getting Started: Creating Images" - result = quote_yaml_value(value) - runner.assert_contains(result, '"', "Value with colon should be quoted") - runner.assert_contains( - result, "Getting Started: Creating Images", "Original value should be preserved" - ) - - -def test_quote_yaml_value_without_special_chars(runner): - """Test that simple YAML values are not quoted""" - value = "Simple Title" - result = quote_yaml_value(value) - runner.assert_equal(result, "Simple Title", "Simple value should not be quoted") - - -def test_ensure_frontmatter_no_change_when_missing(runner): - """Test that content without frontmatter is returned unchanged""" - content = "# Test Title\n\nContent here." - result = ensure_frontmatter(content) - runner.assert_equal(result, content, "Content without frontmatter should be unchanged") - - -def test_ensure_frontmatter_preserves_existing(runner): - """Test that existing frontmatter is preserved""" - content = "---\ntitle: Existing\n---\n\nContent" - result = ensure_frontmatter(content) - runner.assert_contains( - result, "title: Existing", "Existing frontmatter should be preserved" - ) - - -def test_ensure_frontmatter_fixes_colons(runner): - """Test that colons in existing frontmatter are quoted""" - content = "---\ntitle: Test: Example\n---\n\nContent" - result = ensure_frontmatter(content) - runner.assert_contains( - result, '"Test: Example"', "Colon in frontmatter should be quoted" - ) - - -def test_escape_text_angle_brackets_preserves_html(runner): - """Test that known HTML tags are preserved""" - text = "
content
" - result = escape_text_angle_brackets(text) - runner.assert_contains(result, "
", "div tag should be preserved") - - -def test_escape_text_angle_brackets_escapes_placeholders(runner): - """Test that placeholder text is escaped""" - text = "" - result = escape_text_angle_brackets(text) - runner.assert_contains( - result, "<placeholder>", "Placeholder should be escaped" - ) - - -def main(): - runner = TestRunner() - - # Get test to run from command line, or run all - if len(sys.argv) > 1: - test_name = sys.argv[1] - test_func = globals().get(test_name) - if test_func and callable(test_func): - runner.run_test(test_func) - else: - print(f"Test '{test_name}' not found") - return 1 - else: - # Run all tests - test_functions = [ - test_escape_angle_brackets_in_text, - test_escape_angle_brackets_with_spaces, - test_preserve_html_tags, - test_preserve_code_blocks, - test_preserve_inline_code, - test_rewrite_relative_links, - test_rewrite_numbered_directory_links, - test_preserve_external_links, - test_preserve_anchor_links, - test_quote_yaml_value_with_colon, - test_quote_yaml_value_without_special_chars, - test_ensure_frontmatter_no_change_when_missing, - test_ensure_frontmatter_preserves_existing, - test_ensure_frontmatter_fixes_colons, - test_escape_text_angle_brackets_preserves_html, - test_escape_text_angle_brackets_escapes_placeholders, - ] - - for test_func in test_functions: - runner.run_test(test_func) - - return runner.summary() - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/scripts/transform_content.py b/scripts/transform_content.py deleted file mode 100755 index 24eec1f..0000000 --- a/scripts/transform_content.py +++ /dev/null @@ -1,750 +0,0 @@ -#!/usr/bin/env python3 -""" -Transform documentation content for VitePress integration -- Renames numbered directories (00_introduction -> introduction) -- Rewrites internal links -- Adds/fixes frontmatter -- Converts RST to Markdown (if needed) -""" - -import argparse -import json -import os -import re -import shutil -from pathlib import Path - - -def load_config(config_path): - with open(config_path, "r") as f: - return json.load(f) - - -def transform_directory_structure( - source_dir, target_dir, structure_map, special_files=None, media_dirs=None -): - """ - Transform directory structure based on mapping - e.g., 00_introduction -> introduction - """ - source_path = Path(source_dir) - target_path = Path(target_dir) - target_path.mkdir(parents=True, exist_ok=True) - - special_files = special_files or {} - media_dirs = media_dirs or [] - - if isinstance(structure_map, dict): - # Structured transformation (e.g. with subdirectories specified in JSON file) - for old_name, new_name in structure_map.items(): - old_path = source_path / old_name - new_path = target_path / new_name - - if old_path.exists(): - print(f" Transforming: {old_name} -> {new_name}") - shutil.copytree(old_path, new_path, dirs_exist_ok=True) - - for item in source_path.iterdir(): - if item.name in structure_map: - continue - - if item.name in special_files: - target_subdir = target_path / special_files[item.name] - target_subdir.mkdir(parents=True, exist_ok=True) - if item.is_file(): - print(f" Moving {item.name} to {special_files[item.name]}") - shutil.copy2(item, target_subdir / item.name) - elif item.is_dir(): - print(f" Moving {item.name} to {special_files[item.name]}") - shutil.copytree(item, target_subdir / item.name, dirs_exist_ok=True) - elif item.name in media_dirs: - print(f" Copying media directory: {item.name}") - shutil.copytree(item, target_path / item.name, dirs_exist_ok=True) - elif item.is_file() and not item.name.startswith("_"): - shutil.copy2(item, target_path / item.name) - elif ( - item.is_dir() - and not item.name.startswith("_") - and not item.name.startswith(".") - ): - shutil.copytree(item, target_path / item.name, dirs_exist_ok=True) - elif structure_map == "flat": - # Flat structure. Only copy. - print(f" Copying flat structure") - for item in source_path.glob("*"): - if item.is_file(): - shutil.copy2(item, target_path / item.name) - elif item.is_dir(): - shutil.copytree(item, target_path / item.name, dirs_exist_ok=True) - elif structure_map == "sphinx": - # Sphinx structure. Copy and convert later - print(f" Copying Sphinx structure (RST files)") - for item in source_path.glob("*"): - target_item = target_path / item.name - if item.is_file(): - shutil.copy2(item, target_item) - elif item.is_dir(): - shutil.copytree(item, target_item, dirs_exist_ok=True) - else: - # Default: simply copy - shutil.copytree(source_path, target_path, dirs_exist_ok=True) - - -def rewrite_links( - content, - repo_name, - file_rel_path="", - base_path="/projects", - github_base="https://github.com/gardenlinux", -): - """ - Rewrite internal markdown links to work with VitePress structure - - Args: - content: The markdown content - repo_name: Name of the repository (e.g., "gardenlinux") - file_rel_path: Relative path of the file within the repo (e.g., "introduction/index.md") - base_path: Base path for projects (default: "/projects") - github_base: Base URL for GitHub organization (default: "https://github.com/gardenlinux") - - Examples: - [link](../01_developers/build.md) -> [link](/projects/gardenlinux/developers/build) - [link](./intro.md) -> [link](/projects/gardenlinux/introduction/intro) - [link](kernel.md) -> [link](/projects/gardenlinux/introduction/kernel) (when in introduction/) - [link](/SECURITY.md) -> [link](https://github.com/gardenlinux/gardenlinux/blob/main/SECURITY.md) - """ - - file_dir = str(Path(file_rel_path).parent) if file_rel_path else "" - if file_dir == ".": - file_dir = "" - - def replace_link(match): - text = match.group(1) - link = match.group(2) - - if link.startswith("http://") or link.startswith("https://"): - return match.group(0) - - if link.startswith("#"): - return match.group(0) - - # Skip if already a /projects/ link - if link.startswith(f"{base_path}/"): - return match.group(0) - - # handle relative paths for .media directory - if ".media/" in link: - media_part = link - while media_part.startswith("../"): - media_part = media_part[3:] - media_part = media_part.replace("./", "") - new_link = f"{base_path}/{repo_name}/{media_part}" - return f"[{text}]({new_link})" - - if link.startswith("../") or link.startswith("./"): - stripped_link = link.replace(".md", "") - - # For ./ links (same directory), use the file's directory - if link.startswith("./"): - stripped_link = stripped_link.replace("./", "") - if file_dir: - new_link = f"{base_path}/{repo_name}/{file_dir}/{stripped_link}" - else: - new_link = f"{base_path}/{repo_name}/{stripped_link}" - else: - # For ../ links, check if they go outside docs/ - # Count how many levels up we go - levels_up = link.count("../") - stripped_link = stripped_link.replace("../", "") - - # Do we go outside docs/ ? - if file_dir: - dir_depth = len(file_dir.split("/")) - if levels_up > dir_depth: - # Link to GitHub - new_link = f"{github_base}/{repo_name}/blob/main/{file_dir}" - return f"[{text}]({new_link})" - - stripped_link = re.sub(r"\d+_(\w+)", r"\1", stripped_link) - new_link = f"{base_path}/{repo_name}/{stripped_link}" - - return f"[{text}]({new_link})" - - # Handle absolute paths from root - if link.startswith("/"): - # If it's already pointing to /projects/, leave it - if link.startswith(f"{base_path}/"): - return match.group(0) - # Otherwise, this is a link to a file outside docs/ - point to GitHub - stripped_link = link.lstrip("/") - new_link = f"{github_base}/{repo_name}/blob/main/{stripped_link}" - return f"[{text}]({new_link})" - - # Handle simple filenames (same directory) - if "/" not in link: - stripped_link = link.replace(".md", "") - # If we know the file's directory, use it - if file_dir: - new_link = f"{base_path}/{repo_name}/{file_dir}/{stripped_link}" - else: - new_link = f"{base_path}/{repo_name}/{stripped_link}" - return f"[{text}]({new_link})" - - return match.group(0) - - # Apply transform - content = re.sub(r"\[([^\]]+)\]\(([^)]+)\)", replace_link, content) - - def replace_html_media_link(match): - attr_name = match.group(1) - link = match.group(2) - - if link.startswith(f"{base_path}/"): - return match.group(0) - if ".media/" in link: - media_part = link - while media_part.startswith("../"): - media_part = media_part[3:] - media_part = media_part.replace("./", "") - new_link = f"{base_path}/{repo_name}/{media_part}" - return f'{attr_name}="{new_link}"' - return match.group(0) - - content = re.sub( - r'(src|srcset)="([^"]*\.media/[^"]*)"', replace_html_media_link, content - ) - - return content - - -# def escape_angle_brackets(content): -# """ -# Escape angle brackets that are not part of HTML tags. -# -# This is needed for content like "" which should be -# displayed as text, not parsed as an HTML tag. -# -# Skip escaping inside: -# - Code blocks (``` or indented) -# - Inline code (``) -# """ -# # Split content by code blocks and inline code to process only text parts -# lines = content.split("\n") -# result_lines = [] -# in_code_block = False -# -# for line in lines: -# if line.strip().startswith("```"): -# in_code_block = not in_code_block -# result_lines.append(line) -# continue -# -# if in_code_block: -# result_lines.append(line) -# continue -# -# if line.startswith(" ") or line.startswith("\t"): -# result_lines.append(line) -# continue -# -# parts = [] -# in_inline_code = False -# current = "" -# i = 0 -# -# while i < len(line): -# if line[i] == "`": -# if current: -# if in_inline_code: -# parts.append(current) -# else: -# parts.append(escape_text_angle_brackets(current)) -# current = "" -# parts.append("`") -# in_inline_code = not in_inline_code -# i += 1 -# else: -# current += line[i] -# i += 1 -# -# if current: -# if in_inline_code: -# parts.append(current) -# else: -# parts.append(escape_text_angle_brackets(current)) -# -# result_lines.append("".join(parts)) -# -# return "\n".join(result_lines) -# -# -# def escape_text_angle_brackets(text): -# """ -# Escape angle brackets in plain text (not in code). -# Only escape if they look like placeholders, not HTML tags. -# """ -# import re -# -# known_html_tags = { -# "a", -# "b", -# "i", -# "u", -# "p", -# "div", -# "span", -# "br", -# "hr", -# "img", -# "picture", -# "source", -# "table", -# "tr", -# "td", -# "th", -# "ul", -# "ol", -# "li", -# "h1", -# "h2", -# "h3", -# "h4", -# "h5", -# "h6", -# "code", -# "pre", -# "blockquote", -# "em", -# "strong", -# "del", -# "ins", -# "sub", -# "sup", -# "html", -# "head", -# "body", -# "title", -# "link", -# "meta", -# "script", -# "style", -# "nav", -# "header", -# "footer", -# "section", -# "article", -# "aside", -# "main", -# "figure", -# "figcaption", -# "details", -# "summary", -# "video", -# "audio", -# "iframe", -# "canvas", -# "svg", -# "path", -# "form", -# "input", -# "button", -# "select", -# "option", -# "textarea", -# "label", -# } -# -# def replace_bracket(match): -# content = match.group(1) -# -# tag_content = content.strip() -# if tag_content.startswith("/"): -# tag_content = tag_content[1:] -# -# tag_name = ( -# tag_content.split()[0].lower() -# if " " in tag_content -# else tag_content.lower() -# ) -# -# if tag_name in known_html_tags: -# return f"<{content}>" -# -# return f"<{content}>" -# -# text = re.sub(r"<([^>]+)>", replace_bracket, text) -# -# return text - - -def ensure_frontmatter(content): - """ - Ensure frontmatter exists and fix YAML formatting. - Only fixes existing frontmatter - does not inject new fields. - - Args: - content: The markdown content - """ - # Check if frontmatter already exists - if content.startswith("---\n"): - try: - end_match = re.search(r"\n---\n", content[4:]) - if end_match: - frontmatter_content = content[4 : 4 + end_match.start()] - rest_content = content[4 + end_match.end() :] - - # Parse and fix the frontmatter - fixed_frontmatter = fix_yaml_frontmatter(frontmatter_content) - - return f"---\n{fixed_frontmatter}\n---\n\n{rest_content}" - except Exception: - print(f"[Warning] Frontmatter: Couldn't parse existing frontmatter!") - pass - - return content - - -def quote_yaml_value(value): - """Quote YAML value if needed, handling already-quoted values.""" - # If value is already properly quoted, return as-is - if value.startswith('"') and value.endswith('"'): - # Check if it's properly quoted (not escaped quotes) - if not value.startswith('"\\"'): - return value - - if value.startswith("'") and value.endswith("'"): - return value - - special_chars = [ - ":", - "#", - "@", - "`", - "|", - ">", - "*", - "&", - "!", - "%", - "[", - "]", - "{", - "}", - ",", - "?", - ] - - needs_quoting = any(char in value for char in special_chars) - - if value and (value[0] in ['"', "'", " "] or value[-1] in [" "]): - needs_quoting = True - - if needs_quoting: - # Don't escape quotes that are already inside the value - # Just wrap in quotes - if '"' not in value: - return f'"{value}"' - elif "'" not in value: - return f"'{value}'" - else: - # If both quote types exist, escape double quotes and use them - escaped_value = value.replace('"', '\\"') - return f'"{escaped_value}"' - - return value - - -def fix_yaml_frontmatter(frontmatter_text): - lines = frontmatter_text.split("\n") - fixed_lines = [] - - for line in lines: - if not line.strip(): - fixed_lines.append(line) - continue - - # Check if line contains a key-value pair - if ":" in line: - parts = line.split(":", 1) - if len(parts) == 2: - key = parts[0].strip() - value = parts[1].strip() - - quoted_value = quote_yaml_value(value) - fixed_lines.append(f"{key}: {quoted_value}") - continue - - fixed_lines.append(line) - - return "\n".join(fixed_lines) - - -def fix_broken_project_links( - content, - repo_name, - target_dir, - base_path="/projects", - github_base="https://github.com/gardenlinux", -): - """ - In case a link in /projects/ points to a file that doesn't exist, - replace it with a GitHub link. - """ - target_path = Path(target_dir) - - def check_and_fix_link(match): - text = match.group(1) - link = match.group(2) - - # Only process /projects/{repo}/ links - if not link.startswith(f"{base_path}/{repo_name}/"): - return match.group(0) - - # Extract the path after /projects/{repo}/ - rel_path = link[len(f"{base_path}/{repo_name}/") :] - - potential_file = target_path / f"{rel_path}.md" - potential_index = target_path / rel_path / "index.md" - potential_dir = target_path / rel_path - - # If file exists, or directory exists with index.md, keep the link - if ( - potential_file.exists() - or potential_index.exists() - or (potential_dir.exists() and potential_dir.is_dir() and (potential_dir / "index.md").exists()) - ): - return match.group(0) - - github_link = f"{github_base}/{repo_name}/blob/main/{rel_path}" - return f"[{text}]({github_link})" - - content = re.sub(r"\[([^\]]+)\]\(([^)]+)\)", check_and_fix_link, content) - - return content - - -def process_markdown_file(file_path, repo_name, target_dir, base_path="/projects"): - """ - Process a single markdown file: - - Escape angle brackets - - Rewrite links - - Fix broken project links - - Fix frontmatter YAML formatting - - Args: - file_path: Path to the markdown file - repo_name: Name of the repository - target_dir: Target directory where files are being processed - base_path: Base path for projects (default: "/projects") - """ - try: - with open(file_path, "r", encoding="utf-8") as f: - content = f.read() - - # Calculate relative path from target_dir - file_path_obj = Path(file_path) - target_path_obj = Path(target_dir) - try: - file_rel_path = str(file_path_obj.relative_to(target_path_obj)) - except ValueError: - file_rel_path = "" - - # content = escape_angle_brackets(content) - content = rewrite_links(content, repo_name, file_rel_path, base_path) - content = fix_broken_project_links(content, repo_name, target_dir, base_path) - content = ensure_frontmatter(content) - - with open(file_path, "w", encoding="utf-8") as f: - f.write(content) - - return True - except Exception as e: - print(f" [Warning] Error processing {file_path}: {e}") - return False - - -def process_all_markdown(target_dir, repo_name): - """ - Process all markdown files in target directory - - Args: - target_dir: Target directory containing markdown files - repo_name: Name of the repository - """ - target_path = Path(target_dir) - - # Rename all README.md to index.md for VitePress - readme_files = list(target_path.rglob("README.md")) - for readme in readme_files: - index_file = readme.parent / "index.md" - if not index_file.exists(): - readme.rename(index_file) - print(f" Renamed {readme.relative_to(target_path)} to index.md") - - md_files = list(target_path.rglob("*.md")) - - print(f" Processing {len(md_files)} markdown files...") - - success_count = 0 - for md_file in md_files: - if process_markdown_file(md_file, repo_name, target_dir): - success_count += 1 - - print(f" [Success] Processed {success_count}/{len(md_files)} files successfully") - - -def parse_frontmatter(content): - """ - Parse YAML frontmatter from markdown content. - Returns (frontmatter_dict, content_without_frontmatter) or (None, original_content) - - Uses simple key: value parsing (no external YAML library required). - Handles the subset of YAML used in frontmatter: simple string key-value pairs. - """ - if not content.startswith("---\n"): - return None, content - - try: - end_match = re.search(r"\n---\n", content[4:]) - if not end_match: - return None, content - - frontmatter_text = content[4 : 4 + end_match.start()] - rest_content = content[4 + end_match.end() :] - - frontmatter_dict = {} - for line in frontmatter_text.split("\n"): - line = line.strip() - if not line: - continue - if ":" in line: - key, value = line.split(":", 1) - key = key.strip() - value = value.strip().strip("\"'") - frontmatter_dict[key] = value - - return frontmatter_dict, rest_content - except Exception as e: - print(f" [Warning] Failed to parse frontmatter: {e}") - return None, content - - -def copy_targeted_docs(source_dir, docs_dir, repo_name): - """ - Copy markdown files with 'github_target_path:' frontmatter to their specified locations. - - Args: - source_dir: Source directory containing fetched docs (e.g., /tmp/xxx/gardenlinux) - docs_dir: Target docs directory (e.g., /path/to/docs-ng/docs) - repo_name: Name of the repository for logging - """ - source_path = Path(source_dir) - docs_path = Path(docs_dir) - - if not source_path.exists(): - print(f" [Warning] Source directory not found: {source_dir}") - return - - # Find all markdown files - md_files = list(source_path.rglob("*.md")) - targeted_files = [] - - print(f" Scanning {len(md_files)} files for 'github_target_path:' frontmatter...") - - for md_file in md_files: - try: - with open(md_file, "r", encoding="utf-8") as f: - content = f.read() - - frontmatter, _ = parse_frontmatter(content) - - # Check for 'github_target_path' in frontmatter - if frontmatter and ("github_target_path" in frontmatter): - target_path = frontmatter.get("github_target_path") or frontmatter.get("target") - - # Strip leading 'docs/' if present - if target_path.startswith("docs/"): - target_path = target_path[5:] - - target_file = docs_path / target_path - - # Create parent directories if needed - target_file.parent.mkdir(parents=True, exist_ok=True) - - # Copy the file - shutil.copy2(md_file, target_file) - - # Apply markdown processing (but not project-specific link rewriting) - # These files live in main docs tree, not under /projects/ - # content = escape_angle_brackets(content) - content = ensure_frontmatter(content) - - with open(target_file, "w", encoding="utf-8") as f: - f.write(content) - - targeted_files.append((md_file.relative_to(source_path), target_path)) - print(f" ✓ Copied: {md_file.name} → {target_path}") - - except Exception as e: - print(f" [Warning] Error processing {md_file.name}: {e}") - - if targeted_files: - print(f" [Success] Copied {len(targeted_files)} targeted file(s)") - else: - print(f" No files with 'github_target_path:' frontmatter found") - - -def transform_repo_docs(repo_config, docs_dir, temp_dir): - """ - Transform documentation for a single repository - """ - repo_name = repo_config["name"] - print(f"\nTransforming docs for: {repo_name}") - - source_dir = os.path.join(temp_dir, repo_name) - target_dir = os.path.join(docs_dir, repo_config["target_path"]) - - structure = repo_config.get("structure", "flat") - special_files = repo_config.get("special_files", {}) - media_dirs = repo_config.get("media_directories", []) - - # First, copy files with 'target:' frontmatter to their specified locations - print(f"\n Step 2a: Processing targeted files...") - copy_targeted_docs(source_dir, docs_dir, repo_name) - - # Then, do the standard structure transformation to projects/ directory - print(f"\n Step 2b: Transforming project structure...") - transform_directory_structure( - source_dir, target_dir, structure, special_files, media_dirs - ) - process_all_markdown(target_dir, repo_name) - - print(f"[Complete] Transformation complete for {repo_name}") - - -def main(): - parser = argparse.ArgumentParser(description="Transform documentation content") - parser.add_argument("--config", required=True, help="Path to repos-config.json") - parser.add_argument("--docs-dir", required=True, help="Path to docs directory") - parser.add_argument( - "--temp-dir", - required=True, - help="Path to temporary directory with fetched docs", - ) - parser.add_argument("--repo", help="Only transform specific repo (optional)") - - args = parser.parse_args() - - config = load_config(args.config) - - for repo in config["repos"]: - if args.repo and repo["name"] != args.repo: - continue - - transform_repo_docs(repo, args.docs_dir, args.temp_dir) - - print("\n[Complete] All transformations complete!") - - -if __name__ == "__main__": - main() diff --git a/scripts/update_config.py.backup.202603250900 b/scripts/update_config.py.backup.202603250900 deleted file mode 100755 index c148ef9..0000000 --- a/scripts/update_config.py.backup.202603250900 +++ /dev/null @@ -1,694 +0,0 @@ -#!/usr/bin/env python3 -""" -Update VitePress configuration with dynamically generated sidebars -for aggregated documentation from multiple repositories defined in repos-config.json. -""" - -import argparse -import json -import re -from pathlib import Path - - -def load_config(config_path): - with open(config_path, "r") as f: - return json.load(f) - - -def get_section_priority(section, priority_map): - section_name = section.get("text", "").lower() - for key, priority in priority_map.items(): - if key in section_name: - return priority - return 999 - - -def get_directory_structure(path, docs_dir=None): - """ - Scan directory and build sidebar structure - Returns list of sidebar items - """ - items = [] - path = Path(path) - - if not path.exists(): - return items - - # If docs_dir not provided, use path.parent for backward compatibility - if docs_dir is None: - docs_dir = path.parent - - # Get all markdown files and directories - # Sort with index or README files first, then alphabetically - def sort_key(entry): - if entry.name.lower() in ["index.md", "readme.md"]: - return (0, entry.name) - else: - return (1, entry.name) - - entries = sorted(path.iterdir(), key=sort_key) - - # Track added index files for project - index_added = False - - for entry in entries: - if entry.name.startswith(".") or entry.name.startswith("_"): - continue - - if entry.is_file() and entry.suffix == ".md": - title = get_title_from_file(entry) - if entry.name == "README.md" or entry.name == "index.md": - # Add index files (prefer index.md over README.md) - if not index_added: - link = "/" + str(entry.parent.relative_to(docs_dir)) - if not link.endswith("/"): - link += "/" - items.append( - { - "text": title - or entry.parent.name.replace("-", " ") - .replace("_", " ") - .title(), - "link": link, - } - ) - index_added = True - else: - link = "/" + str(entry.relative_to(docs_dir)).replace(".md", "") - items.append( - { - "text": title - or entry.stem.replace("-", " ").replace("_", " ").title(), - "link": link, - } - ) - - elif entry.is_dir(): - sub_items = get_directory_structure(entry, docs_dir) - - if sub_items: - dir_item = { - "text": entry.name.replace("-", " ").replace("_", " ").title(), - "collapsed": True, - "items": sub_items, - } - items.append(dir_item) - - return items - - -def get_title_from_file(file_path): - """ - Extract title from markdown file (frontmatter or first heading) - """ - try: - with open(file_path, "r", encoding="utf-8") as f: - content = f.read() - - frontmatter_match = re.search( - r"^---\s*\ntitle:\s*(.+?)\s*\n", content, re.MULTILINE - ) - if frontmatter_match: - return frontmatter_match.group(1).strip() - - heading_match = re.search(r"^#\s+(.+)$", content, re.MULTILINE) - if heading_match: - return heading_match.group(1).strip() - except Exception: - pass - - return None - - -def find_important_guides(repo_docs_path, docs_dir): - """ - Find important guides like installation, quickstart, getting started, etc. - Returns a dict with guide type as important and link as value - """ - important_guides = {} - - guide_keywords = { - "getting_started": [ - "getting_started", - "getting-started", - "gettingstarted", - "get_started", - "get-started", - ], - "quickstart": ["quickstart", "quick_start", "quick-start"], - "installation": ["installation", "installing", "setup"], - } - - for md_file in repo_docs_path.rglob("*.md"): - if md_file.name.startswith(".") or md_file.name.startswith("_"): - continue - - filename = md_file.stem.lower() - - for guide_type, keywords in guide_keywords.items(): - if guide_type not in important_guides: - for keyword in keywords: - if keyword in filename: - rel_path = md_file.relative_to(Path(docs_dir)) - link = "/" + str(rel_path).replace(".md", "") - - title = get_title_from_file(md_file) - if not title: - title = ( - md_file.stem.replace("-", " ").replace("_", " ").title() - ) - - important_guides[guide_type] = {"link": link, "title": title} - break - - return important_guides - - -def create_missing_index_files(docs_dir, repos): - """ - Create index.md files for directories that don't have them. - This prevents dead links when linking to directory paths. - Also fixes links in existing markdown files to add trailing slashes. - """ - created_files = [] - directories_with_new_indexes = set() - - for repo in repos: - target_path = repo["target_path"] - repo_docs_path = Path(docs_dir) / target_path - - if not repo_docs_path.exists(): - continue - - for dirpath in repo_docs_path.rglob("*"): - if not dirpath.is_dir(): - continue - - if dirpath.name.startswith(".") or dirpath.name.startswith("_"): - continue - - has_index = (dirpath / "index.md").exists() or ( - dirpath / "README.md" - ).exists() - - if not has_index: - md_files = sorted([f for f in dirpath.glob("*.md") if f.is_file()]) - - if md_files: - index_path = dirpath / "index.md" - - dir_name = dirpath.name.replace("-", " ").replace("_", " ").title() - - content = f"# {dir_name}\n\n" - content += f"This section contains the following guides:\n\n" - - for md_file in md_files: - title = get_title_from_file(md_file) - if not title: - title = ( - md_file.stem.replace("-", " ").replace("_", " ").title() - ) - - link = md_file.stem - content += f"- [{title}](./{link})\n" - - with open(index_path, "w", encoding="utf-8") as f: - f.write(content) - - created_files.append(str(index_path.relative_to(docs_dir))) - dir_path_str = "/" + str(dirpath.relative_to(docs_dir)) - directories_with_new_indexes.add(dir_path_str) - print( - f" [Success] Created index for: {dirpath.relative_to(docs_dir)}" - ) - - if directories_with_new_indexes: - print("\n Fixing links to newly indexed directories...") - for repo in repos: - target_path = repo["target_path"] - repo_docs_path = Path(docs_dir) / target_path - - if not repo_docs_path.exists(): - continue - - for md_file in repo_docs_path.rglob("*.md"): - if not md_file.is_file(): - continue - - try: - with open(md_file, "r", encoding="utf-8") as f: - content = f.read() - - modified = False - for dir_path in directories_with_new_indexes: - # Look for links to this directory without trailing slash - # Pattern: ](/path/to/dir) or ](/path/to/dir "title") - import re - - pattern = re.compile(f"\\]\\({re.escape(dir_path)}(\\)|\\s)") - if pattern.search(content): - content = pattern.sub(f"]({dir_path}/\\1", content) - modified = True - - if modified: - with open(md_file, "w", encoding="utf-8") as f: - f.write(content) - print( - f" [Success] Fixed links in: {md_file.relative_to(docs_dir)}" - ) - - except Exception as e: - print(f" [Warning] Could not process {md_file}: {e}") - - return created_files - - -def generate_sidebar_config(repo_config, docs_dir, section_priorities): - """ - Generate sidebar configuration for a repository - """ - repo_name = repo_config["name"] - target_path = repo_config["target_path"] - - repo_docs_path = Path(docs_dir) / target_path - - if not repo_docs_path.exists(): - print(f" [Warning] Docs path not found for {repo_name}: {repo_docs_path}") - return None - - print(f" Generating sidebar for: {repo_name}") - - items = get_directory_structure(repo_docs_path, Path(docs_dir)) - - if not items: - print(f" [Warning] No items found for {repo_name}") - return None - - items = sorted(items, key=lambda s: get_section_priority(s, section_priorities)) - - sidebar_path = f"/projects/{repo_name}/" - - key_guides = find_important_guides(repo_docs_path, docs_dir) - print(f" Found key guides: {list(key_guides.keys())}") - - has_overview = (repo_docs_path / "index.md").exists() or ( - repo_docs_path / "README.md" - ).exists() - print(f" Has overview page: {has_overview}") - - return { - "path": sidebar_path, - "items": items, - "key_guides": key_guides, - "has_overview": has_overview, - } - - -def generate_nav_items(repos, sidebars): - """ - Generate navigation dropdown items for projects - For nav, we use simple links (not nested) since VitePress nav only supports 2 levels - """ - nav_items = [] - - # Create a map of repo name to sidebar for quick lookup - sidebar_map = {s["path"].strip("/").split("/")[-1]: s for s in sidebars if s} - - for repo in repos: - repo_name = repo["name"] - # Use display name if configured, otherwise use repo name - display_name = repo.get("display_name", repo_name.replace("-", " ").title()) - - sidebar = sidebar_map.get(repo_name) - - link = None - - if sidebar: - # Try to use one of the important guides as first link ("getting started", "quickstart", etc.) - important_guides = sidebar.get("important_guides", {}) - for guide_type in ["getting_started", "quickstart", "installation"]: - if guide_type in important_guides: - link = important_guides[guide_type]["link"] - break - - # If there is no important guide, check if there's an overview - if not link and sidebar.get("has_overview"): - link = f"/projects/{repo_name}/" - - # If still no link, use first section's first item - if not link and sidebar.get("items") and len(sidebar["items"]) > 0: - first_item = sidebar["items"][0] - if "items" in first_item and len(first_item["items"]) > 0: - link = first_item["items"][0].get("link") - elif "link" in first_item: - link = first_item["link"] - - if not link: - link = f"/projects/{repo_name}/" - - nav_items.append({"text": display_name, "link": link}) - - return nav_items - - -def generate_technical_docs_sidebar_items(repos, sidebars): - """ - Generate expandable sidebar items for the Technical Documentation section. - Uses the full sidebar structure with proper expandable sections. - """ - sidebar_items = [] - - sidebar_map = {s["path"].strip("/").split("/")[-1]: s for s in sidebars if s} - - for repo in repos: - repo_name = repo["name"] - display_name = repo.get("display_name", repo_name.replace("-", " ").title()) - - sidebar = sidebar_map.get(repo_name) - - if not sidebar or not sidebar.get("items"): - # Simple link if no sidebar found - sidebar_items.append( - {"text": display_name, "link": f"/projects/{repo_name}/"} - ) - continue - - project_item = {"text": display_name, "collapsed": True, "items": []} - - important_guides = sidebar.get("important_guides", {}) - guide_order = ["quickstart", "getting_started", "installation"] - important_guide_links = set() - - for guide_type in guide_order: - if guide_type in important_guides: - guide = important_guides[guide_type] - project_item["items"].append( - {"text": guide["title"], "link": guide["link"]} - ) - important_guide_links.add(guide["link"]) - - # Add the full sidebar items (sections like Introduction, Developers, Operators) - # Sort sections to put Introduction first - sections = sidebar.get("items", []) - - section_priority = { - "introduction": 0, - "overview": 0, - "developers": 1, - "operators": 2, - } - - def get_section_priority(section): - section_name = section.get("text", "").lower() - for key, priority in section_priority.items(): - if key in section_name: - return priority - return 999 - - sorted_sections = sorted(sections, key=get_section_priority) - - for section in sorted_sections: - filtered_section = filter_section_items(section, important_guide_links) - if filtered_section: - project_item["items"].append(filtered_section) - - sidebar_items.append(project_item) - - return sidebar_items - - -def filter_section_items(section, exclude_links): - """ - Recursively filter out items that are in the exclude_links set. - Returns None if the section becomes empty after filtering. - """ - if "items" in section: - # This is a section with subitems - filtered_items = [] - for item in section["items"]: - filtered_item = filter_section_items(item, exclude_links) - if filtered_item: - filtered_items.append(filtered_item) - - if filtered_items: - return { - "text": section["text"], - "collapsed": section.get("collapsed", True), - "items": filtered_items, - } - else: - return None - elif "link" in section: - # This is a direct link item - if section["link"] not in exclude_links: - return {"text": section["text"], "link": section["link"]} - else: - return None - else: - # Unknown structure, pass - return section - - -def format_items_as_typescript(items, indent_level=3): - """ - Format items array as TypeScript code - - Args: - items: List of item dictionaries - indent_level: Indentation level (3 = 12 spaces for alignment in nav, 4 = 14 spaces in sidebar) - """ - indent = " " * indent_level - lines = [] - - for item in items: - if "items" in item: - # Expandable item with subitems - lines.append(f"{indent}{{") - lines.append(f"{indent} text: '{item['text']}',") - if "collapsed" in item: - collapsed = "true" if item["collapsed"] else "false" - lines.append(f"{indent} collapsed: {collapsed},") - lines.append(f"{indent} items: [") - - # Process subitems (can be links or nested sections) - for subitem in item["items"]: - if "items" in subitem: - # Nested section - nested_lines = format_items_as_typescript( - [subitem], indent_level + 2 - ) - lines.append(nested_lines) - else: - # Simple link: clean the title and add it - title = subitem["text"].strip('"').replace("'", "\\'") - lines.append( - f"{indent} {{ text: '{title}', link: '{subitem['link']}' }}," - ) - - lines.append(f"{indent} ]") - lines.append(f"{indent}}},") - else: - title = item["text"].strip('"').replace("'", "\\'") - lines.append(f"{indent}{{ text: '{title}', link: '{item['link']}' }},") - - return "\n".join(lines) - - -def update_vitepress_config(config_path, sidebars, nav_items, technical_docs_items): - """ - Update VitePress config.mts file with generated sidebars and nav items - Automatically replaces Technical Documentation sections in both nav and sidebar - """ - print(f"\nUpdating VitePress config: {config_path}") - - try: - with open(config_path, "r", encoding="utf-8") as f: - lines = f.readlines() - except FileNotFoundError: - print(f" [ERROR] Config file not found: {config_path}") - return False - - # Find and replace Technical Documentation sections - i = 0 - sections_updated = 0 - - while i < len(lines): - line = lines[i] - - if "text: 'Technical Documentation'" in line: - j = i + 1 - while j < len(lines) and "items: [" not in lines[j]: - j += 1 - - if j >= len(lines): - i += 1 - continue - - items_line_indent = len(lines[j]) - len(lines[j].lstrip()) - - k = j + 1 - bracket_count = 1 - while k < len(lines) and bracket_count > 0: - bracket_count += lines[k].count("[") - lines[k].count("]") - if bracket_count == 0: - break - k += 1 - - if k >= len(lines): - i += 1 - continue - - # k now points to the line with the closing ] - # Determine which section this is (nav or sidebar) by checking if we're before or after 'sidebar:' - is_nav_section = True - for check_line in lines[:i]: - if "sidebar:" in check_line: - is_nav_section = False - break - - # Generate the replacement content - if is_nav_section: - replacement_items = format_items_as_typescript( - nav_items, indent_level=7 - ) - else: - replacement_items = format_items_as_typescript( - technical_docs_items, indent_level=6 - ) - - new_lines = lines[: j + 1] + [replacement_items + "\n"] + lines[k:] - lines = new_lines - - sections_updated += 1 - print( - f" [Success] Updated Technical Documentation ({'nav' if is_nav_section else 'sidebar'} section)" - ) - - i = j + 2 - else: - i += 1 - - if sections_updated == 0: - print(" [Warning] Could not find any Technical Documentation sections") - return False - - # Update project-specific sidebars (e.g., '/projects/gardenlinux/') - print("\n Updating project-specific sidebars...") - for sidebar in sidebars: - if not sidebar: - continue - - project_path = sidebar["path"] - project_items = sidebar["items"] - - i = 0 - while i < len(lines): - if f"'{project_path}': [" in lines[i] or f'"{project_path}": [' in lines[i]: - j = i - while j < len(lines) and "[" not in lines[j]: - j += 1 - - if j >= len(lines): - i += 1 - continue - - k = j + 1 - bracket_count = 1 - while k < len(lines) and bracket_count > 0: - bracket_count += lines[k].count("[") - lines[k].count("]") - if bracket_count == 0: - break - k += 1 - - if k >= len(lines): - i += 1 - continue - - replacement_items = format_items_as_typescript( - project_items, indent_level=4 - ) - - new_lines = lines[: j + 1] + [replacement_items + "\n"] + lines[k:] - lines = new_lines - - sections_updated += 1 - print(f" [Success] Updated {project_path} sidebar") - - i = j + 2 - break - else: - i += 1 - - try: - with open(config_path, "w", encoding="utf-8") as f: - f.writelines(lines) - print(f" [Success] Successfully updated {config_path}") - except Exception as e: - print(f" [ERROR] Error writing config file: {e}") - return False - - output_file = config_path.replace(".mts", ".generated.json") - with open(output_file, "w", encoding="utf-8") as f: - json.dump( - { - "sidebars": sidebars, - "nav_items": nav_items, - "technical_docs_sidebar_items": technical_docs_items, - }, - f, - indent=2, - ) - - print(f"\n Generated config also saved to: {output_file}") - print(f"\n Summary:") - print(f" - Project sidebars: {len([s for s in sidebars if s])}") - print(f" - Nav items: {len(nav_items)}") - print(f" - Technical docs sidebar items: {len(technical_docs_items)}") - print(f" - Sections updated: {sections_updated}") - - return True - - -def main(): - parser = argparse.ArgumentParser(description="Update VitePress configuration") - parser.add_argument("--config", required=True, help="Path to repos-config.json") - parser.add_argument("--docs-dir", required=True, help="Path to docs directory") - parser.add_argument( - "--vitepress-config", required=True, help="Path to VitePress config.mts" - ) - - args = parser.parse_args() - - print("Generating VitePress configuration...") - - config = load_config(args.config) - section_priorities = config.get("section_priorities", {}) - - print("\nChecking for directories without index files...") - created_files = create_missing_index_files(args.docs_dir, config["repos"]) - if created_files: - print(f" Created {len(created_files)} index file(s)") - else: - print(" All directories have index files") - - sidebars = [] - for repo in config["repos"]: - sidebar = generate_sidebar_config(repo, args.docs_dir, section_priorities) - if sidebar: - sidebars.append(sidebar) - - nav_items = generate_nav_items(config["repos"], sidebars) - - technical_docs_items = generate_technical_docs_sidebar_items( - config["repos"], sidebars - ) - - update_vitepress_config( - args.vitepress_config, sidebars, nav_items, technical_docs_items - ) - - print("\n[Completed] Configuration update complete!") - - -if __name__ == "__main__": - main() diff --git a/src/README.md b/src/README.md new file mode 120000 index 0000000..9841fb0 --- /dev/null +++ b/src/README.md @@ -0,0 +1 @@ +../docs/reference/supporting_tools/docs-ng/reference/technical.md \ No newline at end of file diff --git a/src/aggregate.py b/src/aggregate.py new file mode 100755 index 0000000..5d439db --- /dev/null +++ b/src/aggregate.py @@ -0,0 +1,233 @@ +#!/usr/bin/env python3 +""" +Unified documentation aggregation script for docs-ng + +This script orchestrates documentation aggregation from multiple repositories. +All heavy lifting is done by the aggregation package modules. +""" + +import argparse +import sys +import tempfile +from pathlib import Path + +from aggregation import ( + load_config, + save_config, + DocsFetcher, + transform_directory_structure, + copy_targeted_docs, + process_all_markdown, +) + + +def transform_repo_docs( + repo, + docs_dir: Path, + temp_dir: Path, +) -> bool: + """Transform documentation for a single repository.""" + repo_name = repo.name + print(f"\n{'='*60}") + print(f"Transforming docs for: {repo_name}") + print(f"{'='*60}") + + source_dir = temp_dir / repo_name + target_dir = docs_dir / repo.target_path + + # Step 1: Copy files with 'github_target_path:' frontmatter + print(f"\nStep 1: Processing targeted files...") + copy_targeted_docs(str(source_dir), str(docs_dir), repo_name, repo.media_directories, repo.root_files) + + # Step 2: Transform project structure + print(f"\nStep 2: Transforming project structure...") + transform_directory_structure( + str(source_dir), + str(target_dir), + repo.structure, + repo.special_files, + repo.media_directories, + ) + + # Step 3: Process markdown files + print(f"\nStep 3: Processing markdown files...") + process_all_markdown(str(target_dir), repo_name) + + print(f"\n✓ Transformation complete for {repo_name}") + return True + + +def aggregate_repo( + repo, + docs_dir: Path, + temp_dir: Path, + fetcher: DocsFetcher, +) -> tuple: + """ + Aggregate documentation for a single repository. + + Returns: + Tuple of (success, resolved_commit_hash) + """ + print(f"\n{'='*60}") + print(f"Aggregating: {repo.name}") + print(f"{'='*60}") + + # Create output directory for this repo + repo_output_dir = temp_dir / repo.name + repo_output_dir.mkdir(parents=True, exist_ok=True) + + # Fetch the repository + result = fetcher.fetch(repo, repo_output_dir) + + if not result.success: + print(f"✗ Failed to fetch {repo.name}") + return False, result.resolved_commit + + # Transform the fetched docs + transform_success = transform_repo_docs(repo, docs_dir, temp_dir) + + if not transform_success: + print(f"✗ Failed to transform {repo.name}") + return False, result.resolved_commit + + return True, result.resolved_commit + + +def main() -> int: + """Main entry point.""" + parser = argparse.ArgumentParser( + description="Aggregate documentation from multiple repositories", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + # Aggregate all repositories + %(prog)s + + # Aggregate with local config (file:// URLs, no git) + %(prog)s --config repos-config.local.json + + # Aggregate specific repository + %(prog)s --repo gardenlinux + + # Update commit locks (fetch and update config with resolved commit hashes) + %(prog)s --update-locks + """, + ) + + parser.add_argument( + "--config", + default="repos-config.json", + help="Path to repos-config.json (default: repos-config.json)", + ) + parser.add_argument( + "--docs-dir", + default="docs", + help="Path to docs directory (default: docs)", + ) + parser.add_argument( + "--repo", + help="Only aggregate specific repository", + ) + parser.add_argument( + "--update-locks", + action="store_true", + help="Update commit locks: fetch and update config with resolved commit hashes", + ) + + args = parser.parse_args() + + # Determine script directory + script_dir = Path(__file__).parent.resolve() + project_root = script_dir.parent + + # Resolve paths + # Config files are in project root, not in src/ + if not Path(args.config).is_absolute(): + config_path = project_root / args.config + else: + config_path = Path(args.config) + + if not Path(args.docs_dir).is_absolute(): + docs_dir = project_root / args.docs_dir + else: + docs_dir = Path(args.docs_dir) + + # Load configuration + print(f"{'='*60}") + print("Garden Linux Documentation Aggregation") + print(f"{'='*60}\n") + print(f"Configuration: {config_path}") + print(f"Docs directory: {docs_dir}") + if args.repo: + print(f"Repository filter: {args.repo}") + if args.update_locks: + print("Update commit locks: ENABLED") + print() + + repos = load_config(str(config_path)) + + # Create temporary directory for fetched docs + with tempfile.TemporaryDirectory() as temp_dir_str: + temp_dir = Path(temp_dir_str) + print(f"Temporary directory: {temp_dir}\n") + + # Initialize fetcher + fetcher = DocsFetcher(project_root, update_locks=args.update_locks) + + # Track resolved commits for locking + resolved_commits = {} + success_count = 0 + fail_count = 0 + + # Aggregate each repository + for repo in repos: + # Filter by repo if specified + if args.repo and repo.name != args.repo: + continue + + success, resolved_commit = aggregate_repo( + repo, + docs_dir, + temp_dir, + fetcher, + ) + + if success: + success_count += 1 + if resolved_commit: + resolved_commits[repo.name] = resolved_commit + else: + fail_count += 1 + + # Update config with resolved commits if locking + if args.update_locks and resolved_commits: + print(f"\n{'='*60}") + print("Updating config with resolved commits...") + print(f"{'='*60}\n") + + for repo in repos: + if repo.name in resolved_commits: + repo.commit = resolved_commits[repo.name] + print(f" {repo.name}: {resolved_commits[repo.name]}") + + save_config(str(config_path), repos) + print(f"\n✓ Config updated: {config_path}") + + # Summary + print(f"\n{'='*60}") + print("Documentation aggregation complete!") + print(f"{'='*60}\n") + print(f"Successful: {success_count}") + print(f"Failed: {fail_count}") + + print("\nNext steps:") + print(" 1. Review the changes in docs/projects/") + print(" 2. Run 'make dev' or 'pnpm run docs:dev' to preview") + print(" 3. Commit the changes if satisfied") + + return 0 if fail_count == 0 else 1 + + +if __name__ == "__main__": + sys.exit(main()) \ No newline at end of file diff --git a/src/aggregation/__init__.py b/src/aggregation/__init__.py new file mode 100644 index 0000000..98390e9 --- /dev/null +++ b/src/aggregation/__init__.py @@ -0,0 +1,38 @@ +"""Aggregation package for docs-ng documentation aggregation.""" + +# Re-export commonly used functions for backward compatibility with tests +from .transformer import ( + rewrite_links, + ensure_frontmatter, + quote_yaml_value, + parse_frontmatter, +) + +from .models import RepoConfig, AggregateResult +from .config import load_config, save_config +from .fetcher import DocsFetcher +from .structure import ( + transform_directory_structure, + copy_targeted_docs, + process_all_markdown, +) + +__all__ = [ + # Models + "RepoConfig", + "AggregateResult", + # Config + "load_config", + "save_config", + # Fetcher + "DocsFetcher", + # Transformer (for tests) + "rewrite_links", + "ensure_frontmatter", + "quote_yaml_value", + "parse_frontmatter", + # Structure + "transform_directory_structure", + "copy_targeted_docs", + "process_all_markdown", +] \ No newline at end of file diff --git a/src/aggregation/config.py b/src/aggregation/config.py new file mode 100644 index 0000000..16af74f --- /dev/null +++ b/src/aggregation/config.py @@ -0,0 +1,71 @@ +"""Configuration loading and saving for documentation aggregation.""" + +import json +import sys +from typing import Dict, List + +from .models import RepoConfig + + +def load_config(config_path: str) -> List[RepoConfig]: + """ + Load and validate repository configuration. + + Args: + config_path: Path to JSON configuration file + + Returns: + List of validated RepoConfig objects + """ + try: + with open(config_path, "r", encoding="utf-8") as f: + config = json.load(f) + + if "repos" not in config: + raise ValueError("Configuration must have 'repos' array") + + repos = [] + for repo_dict in config["repos"]: + repo = RepoConfig.from_dict(repo_dict) + repo.validate() + repos.append(repo) + + return repos + except json.JSONDecodeError as e: + print(f"Error: Invalid JSON in config file: {e}", file=sys.stderr) + sys.exit(1) + except Exception as e: + print(f"Error loading config: {e}", file=sys.stderr) + sys.exit(1) + + +def save_config(config_path: str, repos: List[RepoConfig]) -> None: + """ + Save repository configuration to JSON file. + + Args: + config_path: Path to JSON configuration file + repos: List of RepoConfig objects to save + """ + # Build config dict + config = { + "repos": [ + { + "name": repo.name, + "url": repo.url, + "docs_path": repo.docs_path, + "target_path": repo.target_path, + **({"ref": repo.ref} if repo.ref else {}), + **({"commit": repo.commit} if repo.commit else {}), + **({"root_files": repo.root_files} if repo.root_files else {}), + **({"structure": repo.structure} if repo.structure != "flat" else {}), + **({"special_files": repo.special_files} if repo.special_files else {}), + **({"media_directories": repo.media_directories} if repo.media_directories else {}), + } + for repo in repos + ] + } + + with open(config_path, "w", encoding="utf-8") as f: + json.dump(config, f, indent=2) + f.write("\n") \ No newline at end of file diff --git a/src/aggregation/fetcher.py b/src/aggregation/fetcher.py new file mode 100644 index 0000000..02dadcd --- /dev/null +++ b/src/aggregation/fetcher.py @@ -0,0 +1,236 @@ +"""Repository fetching for documentation aggregation.""" + +import shutil +import subprocess +import sys +import tempfile +from pathlib import Path +from typing import Tuple, Optional + +from .models import RepoConfig, AggregateResult + + +class DocsFetcher: + """Handles fetching documentation from remote or local repositories.""" + + def __init__(self, project_root: Path, update_locks: bool = False): + """ + Initialize fetcher. + + Args: + project_root: Root directory of docs-ng project + update_locks: Whether we're in update-locks mode (allows commit mismatches) + """ + self.project_root = project_root + self.update_locks = update_locks + + def fetch(self, repo: RepoConfig, output_dir: Path) -> AggregateResult: + """ + Fetch documentation for a repository. + + Args: + repo: Repository configuration + output_dir: Where to copy fetched files + + Returns: + AggregateResult with success status and resolved commit + """ + if repo.is_local: + success = self._fetch_local(repo, output_dir) + return AggregateResult(repo.name, success, None) + else: + success, commit = self._fetch_remote(repo, output_dir) + return AggregateResult(repo.name, success, commit) + + def _fetch_remote( + self, + repo: RepoConfig, + output_dir: Path, + ) -> Tuple[bool, Optional[str]]: + """Fetch from remote repository using git sparse checkout.""" + temp_dir = Path(tempfile.mkdtemp()) + + try: + print(f" Fetching from: {repo.url}") + print(f" Ref: {repo.ref}") + if repo.root_files: + print(f" Root files: {', '.join(repo.root_files)}") + print(f" Output: {output_dir}") + + # Initialize sparse checkout + subprocess.run(["git", "init"], check=True, capture_output=True, cwd=temp_dir) + subprocess.run( + ["git", "remote", "add", "origin", repo.url], + check=True, + capture_output=True, + cwd=temp_dir, + ) + subprocess.run( + ["git", "config", "core.sparseCheckout", "true"], + check=True, + capture_output=True, + cwd=temp_dir, + ) + + # Configure sparse checkout patterns + sparse_checkout_file = temp_dir / ".git" / "info" / "sparse-checkout" + with open(sparse_checkout_file, "w") as f: + f.write(f"{repo.docs_path}/*\n") + for root_file in repo.root_files: + f.write(f"{root_file}\n") + + # Fetch and checkout + print(" Cloning (sparse checkout)...") + subprocess.run( + ["git", "fetch", "--depth=1", "origin", repo.ref], + check=True, + capture_output=True, + cwd=temp_dir, + ) + subprocess.run( + ["git", "checkout", repo.ref], + check=True, + capture_output=True, + cwd=temp_dir, + ) + + # Get resolved commit hash + result = subprocess.run( + ["git", "rev-parse", "HEAD"], + check=True, + capture_output=True, + text=True, + cwd=temp_dir, + ) + resolved_commit = result.stdout.strip() + print(f" Resolved commit: {resolved_commit}") + + # Verify commit lock if specified + if repo.commit: + if resolved_commit != repo.commit: + if self.update_locks: + # In update-locks mode, commit mismatch is expected + print(f" Updating lock: {repo.commit[:8]} → {resolved_commit[:8]}") + else: + # In normal mode, commit mismatch is an error + print(f" Warning: Commit mismatch!", file=sys.stderr) + print(f" Expected: {repo.commit}", file=sys.stderr) + print(f" Got: {resolved_commit}", file=sys.stderr) + return False, resolved_commit + else: + print(f" ✓ Commit lock verified") + + # Copy docs to output directory + docs_source = temp_dir / repo.docs_path + if docs_source.exists(): + print(f" Copying docs to {output_dir}") + self._copy_docs(docs_source, output_dir) + else: + print(f" Warning: docs_path '{repo.docs_path}' not found in repository") + + # Copy root files if specified + self._copy_root_files(temp_dir, repo.root_files, output_dir) + + print(" ✓ Fetch complete") + return True, resolved_commit + + except subprocess.CalledProcessError as e: + print(f" Error: Git command failed: {e}", file=sys.stderr) + if e.stderr: + print(f" {e.stderr.decode()}", file=sys.stderr) + return False, None + except Exception as e: + print(f" Error: {e}", file=sys.stderr) + return False, None + finally: + # Cleanup + shutil.rmtree(temp_dir, ignore_errors=True) + + def _fetch_local( + self, + repo: RepoConfig, + output_dir: Path, + ) -> bool: + """Fetch from local repository via direct filesystem copy.""" + try: + # Resolve repo path (handle relative paths) + repo_path = Path(repo.local_path) + if not repo_path.is_absolute(): + repo_abs_path = (self.project_root / repo_path).resolve() + else: + repo_abs_path = repo_path.resolve() + + print(f" Copying from: {repo_abs_path}") + if repo.root_files: + print(f" Root files: {', '.join(repo.root_files)}") + print(f" Output: {output_dir}") + + if not repo_abs_path.exists(): + print(f" Error: Local repository not found: {repo_abs_path}", file=sys.stderr) + return False + + # Copy docs directory + docs_source = repo_abs_path / repo.docs_path + if docs_source.exists(): + print(f" Copying docs from {repo.docs_path}/") + self._copy_docs(docs_source, output_dir) + else: + print(f" Warning: docs_path '{repo.docs_path}' not found in local repository") + + # Copy root files if specified + self._copy_root_files(repo_abs_path, repo.root_files, output_dir) + + print(" ✓ Copy complete") + return True + + except Exception as e: + print(f" Error: {e}", file=sys.stderr) + return False + + @staticmethod + def _copy_docs(source: Path, dest: Path) -> None: + """ + Copy documentation directory contents. + + Args: + source: Source docs directory + dest: Destination directory + """ + dest.mkdir(parents=True, exist_ok=True) + + # Copy all regular files and directories + for item in source.iterdir(): + target = dest / item.name + if item.is_file(): + shutil.copy2(item, target) + elif item.is_dir(): + shutil.copytree(item, target, dirs_exist_ok=True) + + # Also copy hidden directories (like .media) + for item in source.glob(".*"): + if item.is_dir() and item.name not in [".", ".."]: + target = dest / item.name + shutil.copytree(item, target, dirs_exist_ok=True) + + @staticmethod + def _copy_root_files(repo_root: Path, root_files: list, dest: Path) -> None: + """ + Copy specified root-level files from repository. + + Args: + repo_root: Root directory of the repository + root_files: List of filenames to copy + dest: Destination directory + """ + if not root_files: + return + + print(" Copying root files") + for filename in root_files: + src = repo_root / filename + if src.exists(): + target = dest / src.name + shutil.copy2(src, target) + print(f" ✓ {filename}") + else: + print(f" Warning: {filename} not found") diff --git a/src/aggregation/models.py b/src/aggregation/models.py new file mode 100644 index 0000000..de4133e --- /dev/null +++ b/src/aggregation/models.py @@ -0,0 +1,68 @@ +"""Data models for documentation aggregation.""" + +from dataclasses import dataclass, field +from typing import Dict, List, Optional, Union + + +@dataclass +class RepoConfig: + """Configuration for a single repository.""" + + name: str + url: str + docs_path: str + target_path: str + ref: Optional[str] = None + commit: Optional[str] = None + root_files: List[str] = field(default_factory=list) + structure: Union[str, Dict[str, str]] = "flat" + special_files: Dict[str, str] = field(default_factory=dict) + media_directories: List[str] = field(default_factory=list) + + @property + def is_local(self) -> bool: + """Check if this is a local file:// repository.""" + return self.url.startswith("file://") + + @property + def is_remote(self) -> bool: + """Check if this is a remote https:// repository.""" + return self.url.startswith("https://") + + @property + def local_path(self) -> str: + """Get local path by stripping file:// prefix.""" + return self.url[7:] if self.is_local else "" + + def validate(self) -> None: + """Validate repository configuration.""" + if not (self.is_local or self.is_remote): + raise ValueError(f"Invalid URL scheme for {self.name}: {self.url}") + + if self.is_remote and not self.ref: + raise ValueError(f"Remote repository {self.name} must have 'ref' field") + + @classmethod + def from_dict(cls, data: Dict) -> "RepoConfig": + """Create RepoConfig from dictionary.""" + return cls( + name=data["name"], + url=data["url"], + docs_path=data["docs_path"], + target_path=data["target_path"], + ref=data.get("ref"), + commit=data.get("commit"), + root_files=data.get("root_files", []), + structure=data.get("structure", "flat"), + special_files=data.get("special_files", {}), + media_directories=data.get("media_directories", []), + ) + + +@dataclass +class AggregateResult: + """Result of aggregating a single repository.""" + + repo_name: str + success: bool + resolved_commit: Optional[str] = None \ No newline at end of file diff --git a/src/aggregation/structure.py b/src/aggregation/structure.py new file mode 100644 index 0000000..500120a --- /dev/null +++ b/src/aggregation/structure.py @@ -0,0 +1,287 @@ +"""Directory structure transformation and markdown processing.""" + +import shutil +from pathlib import Path +from typing import Dict, List, Optional + +from .transformer import ( + rewrite_links, + fix_broken_project_links, + ensure_frontmatter, + parse_frontmatter, +) + + +def transform_directory_structure( + source_dir: str, + target_dir: str, + structure_map, + special_files: Optional[Dict] = None, + media_dirs: Optional[List[str]] = None, +) -> None: + """ + Transform directory structure based on mapping. + + Args: + source_dir: Source directory with fetched docs + target_dir: Target directory in docs/projects/ + structure_map: Directory structure mapping or copy mode + special_files: Map of files to move to specific locations + media_dirs: List of media directories to preserve + """ + source_path = Path(source_dir) + target_path = Path(target_dir) + target_path.mkdir(parents=True, exist_ok=True) + + special_files = special_files or {} + media_dirs = media_dirs or [] + + if isinstance(structure_map, dict): + # Structured transformation with subdirectories specified + for old_name, new_name in structure_map.items(): + old_path = source_path / old_name + new_path = target_path / new_name + + if old_path.exists(): + print(f" Transforming: {old_name} -> {new_name}") + shutil.copytree(old_path, new_path, dirs_exist_ok=True) + + # Handle special files + for item in source_path.iterdir(): + if item.name in structure_map: + continue + + if item.name in special_files: + target_subdir = target_path / special_files[item.name] + target_subdir.mkdir(parents=True, exist_ok=True) + if item.is_file(): + print(f" Moving {item.name} to {special_files[item.name]}") + shutil.copy2(item, target_subdir / item.name) + elif item.is_dir(): + print(f" Moving {item.name} to {special_files[item.name]}") + shutil.copytree(item, target_subdir / item.name, dirs_exist_ok=True) + elif item.name in media_dirs: + print(f" Copying media directory: {item.name}") + shutil.copytree(item, target_path / item.name, dirs_exist_ok=True) + elif item.is_file() and not item.name.startswith("_"): + shutil.copy2(item, target_path / item.name) + elif ( + item.is_dir() + and not item.name.startswith("_") + and not item.name.startswith(".") + ): + shutil.copytree(item, target_path / item.name, dirs_exist_ok=True) + + else: + # Flat/sphinx structure - copy all files as-is (merged logic) + print(f" Copying {structure_map} structure") + for item in source_path.glob("*"): + target_item = target_path / item.name + if item.is_file(): + shutil.copy2(item, target_item) + elif item.is_dir(): + shutil.copytree(item, target_item, dirs_exist_ok=True) + + +def copy_targeted_docs(source_dir: str, docs_dir: str, repo_name: str, media_dirs: Optional[List[str]] = None, root_files: Optional[List[str]] = None) -> None: + """ + Copy markdown files with 'github_target_path:' frontmatter to their specified locations. + Also copies media directories to the common target path of targeted files. + + Args: + source_dir: Source directory with fetched docs + docs_dir: Docs root directory + repo_name: Repository name + media_dirs: List of media directories to copy alongside targeted files + root_files: List of root-level files to scan for github_target_path (e.g., README.md) + """ + source_path = Path(source_dir) + docs_path = Path(docs_dir) + + if not source_path.exists(): + print(f" [Warning] Source directory not found: {source_dir}") + return + + # Find all markdown files (recursively in source_dir) + md_files = list(source_path.rglob("*.md")) + + # Also check root_files if provided + # Note: root_files may have been flattened by the fetcher (e.g., src/README.md -> README.md) + # So we need to check both the original path and just the basename + if root_files: + print(f" Checking {len(root_files)} root_files for github_target_path...") + for root_file in root_files: + # Try the full path first + root_file_path = source_path / root_file + + # If that doesn't exist, try just the basename (in case fetcher flattened it) + if not root_file_path.exists(): + root_file_path = source_path / Path(root_file).name + + print(f" Checking: {root_file} -> {root_file_path}") + print(f" Exists: {root_file_path.exists()}, Is file: {root_file_path.is_file() if root_file_path.exists() else 'N/A'}, Ends with .md: {root_file.endswith('.md')}") + + if root_file_path.exists() and root_file_path.is_file() and root_file.endswith('.md'): + # Add to list if not already there + if root_file_path not in md_files: + md_files.append(root_file_path) + print(f" ✓ Added to scan list") + else: + print(f" Already in list") + else: + print(f" ✗ Skipped") + + targeted_files = [] + + print(f" Scanning {len(md_files)} files for 'github_target_path:' frontmatter...") + + for md_file in md_files: + try: + with open(md_file, "r", encoding="utf-8") as f: + content = f.read() + + frontmatter, _ = parse_frontmatter(content) + + # Check for 'github_target_path' in frontmatter + if frontmatter and ("github_target_path" in frontmatter): + target_path = frontmatter.get("github_target_path") or frontmatter.get("target") + + # Strip leading 'docs/' if present + if target_path.startswith("docs/"): + target_path = target_path[5:] + + target_file = docs_path / target_path + + # Create parent directories if needed + target_file.parent.mkdir(parents=True, exist_ok=True) + + # Copy the file + shutil.copy2(md_file, target_file) + + # Apply markdown processing + content = ensure_frontmatter(content) + + with open(target_file, "w", encoding="utf-8") as f: + f.write(content) + + targeted_files.append((md_file.relative_to(source_path), target_path)) + print(f" ✓ Copied: {md_file.name} → {target_path}") + + except Exception as e: + print(f" [Warning] Error processing {md_file.name}: {e}") + + if targeted_files: + print(f" ✓ Copied {len(targeted_files)} targeted file(s)") + + # Copy media directories to maintain relative paths with targeted files + if media_dirs: + print(f" Copying media directories recursively...") + + # Compute common ancestor of all targeted files for root-level media + target_paths = [Path(target_path) for _, target_path in targeted_files] + common_parent = None + if target_paths: + # Get all parent directories and find the most common one + all_parents = [list(p.parents) for p in target_paths] + if all_parents: + # Find the deepest common ancestor + for p in target_paths[0].parents: + if all(p in parents for parents in all_parents): + common_parent = p + break + + for media_dir_name in media_dirs: + # Recursively find all instances of this media directory in the source + for media_dir in source_path.rglob(media_dir_name): + if media_dir.is_dir(): + # Calculate relative path from source_path + rel_path = media_dir.relative_to(source_path) + + # Determine if this is a root-level or nested media directory + if len(rel_path.parts) == 1: + # Root-level media directory: copy to common ancestor of targeted files + if common_parent and common_parent != Path('.'): + target_media = docs_path / common_parent / media_dir_name + target_media.parent.mkdir(parents=True, exist_ok=True) + shutil.copytree(media_dir, target_media, dirs_exist_ok=True) + print(f" ✓ Copied media: {common_parent / media_dir_name}") + else: + # Nested media directory: copy to same relative path + target_media = docs_path / rel_path + target_media.parent.mkdir(parents=True, exist_ok=True) + shutil.copytree(media_dir, target_media, dirs_exist_ok=True) + print(f" ✓ Copied media: {rel_path}") + else: + print(" No files with 'github_target_path:' frontmatter found") + + +def process_markdown_file( + file_path: Path, + repo_name: str, + target_dir: str, + base_path: str = "/projects", +) -> bool: + """ + Process a single markdown file: rewrite links, fix frontmatter. + + Args: + file_path: Path to markdown file + repo_name: Repository name + target_dir: Target directory path + base_path: Base path for projects + + Returns: + Success status + """ + try: + with open(file_path, "r", encoding="utf-8") as f: + content = f.read() + + # Calculate relative path from target_dir + file_path_obj = Path(file_path) + target_path_obj = Path(target_dir) + try: + file_rel_path = str(file_path_obj.relative_to(target_path_obj)) + except ValueError: + file_rel_path = "" + + content = rewrite_links(content, repo_name, file_rel_path, base_path) + content = fix_broken_project_links(content, repo_name, target_dir, base_path) + content = ensure_frontmatter(content) + + with open(file_path, "w", encoding="utf-8") as f: + f.write(content) + + return True + except Exception as e: + print(f" [Warning] Error processing {file_path}: {e}") + return False + + +def process_all_markdown(target_dir: str, repo_name: str) -> None: + """ + Process all markdown files in target directory. + + Args: + target_dir: Target directory path + repo_name: Repository name + """ + target_path = Path(target_dir) + + # Rename all README.md to index.md for VitePress + readme_files = list(target_path.rglob("README.md")) + for readme in readme_files: + index_file = readme.parent / "index.md" + if not index_file.exists(): + readme.rename(index_file) + print(f" Renamed {readme.relative_to(target_path)} to index.md") + + md_files = list(target_path.rglob("*.md")) + print(f" Processing {len(md_files)} markdown files...") + + success_count = 0 + for md_file in md_files: + if process_markdown_file(md_file, repo_name, target_dir): + success_count += 1 + + print(f" ✓ Processed {success_count}/{len(md_files)} files successfully") diff --git a/src/aggregation/transformer.py b/src/aggregation/transformer.py new file mode 100644 index 0000000..dad2c47 --- /dev/null +++ b/src/aggregation/transformer.py @@ -0,0 +1,331 @@ +"""Content transformation functions for documentation aggregation.""" + +import re +from pathlib import Path +from typing import Optional, Dict, Tuple + + +def rewrite_links( + content: str, + repo_name: str, + file_rel_path: str = "", + base_path: str = "/projects", + github_base: str = "https://github.com/gardenlinux", +) -> str: + """ + Rewrite internal markdown links to work with VitePress structure. + + Args: + content: The markdown content + repo_name: Name of the repository + file_rel_path: Relative path of the file within the repo + base_path: Base path for projects + github_base: Base URL for GitHub organization + + Returns: + Content with rewritten links + """ + file_dir = str(Path(file_rel_path).parent) if file_rel_path else "" + if file_dir == ".": + file_dir = "" + + def replace_link(match): + text = match.group(1) + link = match.group(2) + + # Skip external links + if link.startswith("http://") or link.startswith("https://"): + return match.group(0) + + # Skip special protocols (mailto, tel, javascript, etc.) + if ":" in link and not link.startswith("/") and not link.startswith("./") and not link.startswith("../"): + return match.group(0) + + # Skip anchors + if link.startswith("#"): + return match.group(0) + + # Skip if already a /projects/ link + if link.startswith(f"{base_path}/"): + return match.group(0) + + # Handle relative paths for .media directory + if ".media/" in link: + media_part = link + while media_part.startswith("../"): + media_part = media_part[3:] + media_part = media_part.replace("./", "") + new_link = f"{base_path}/{repo_name}/{media_part}" + return f"[{text}]({new_link})" + + # Handle relative links + if link.startswith("../") or link.startswith("./"): + stripped_link = link.replace(".md", "") + + # For ./ links (same directory) + if link.startswith("./"): + stripped_link = stripped_link.replace("./", "") + if file_dir: + new_link = f"{base_path}/{repo_name}/{file_dir}/{stripped_link}" + else: + new_link = f"{base_path}/{repo_name}/{stripped_link}" + else: + # For ../ links, check if they go outside docs/ + levels_up = link.count("../") + stripped_link = stripped_link.replace("../", "") + + # Check if we go outside docs/ + if file_dir: + dir_depth = len(file_dir.split("/")) + if levels_up > dir_depth: + # Link to GitHub + new_link = f"{github_base}/{repo_name}/blob/main/{stripped_link}" + return f"[{text}]({new_link})" + + # Remove numbered prefixes + stripped_link = re.sub(r"\d+_(\w+)", r"\1", stripped_link) + new_link = f"{base_path}/{repo_name}/{stripped_link}" + + return f"[{text}]({new_link})" + + # Handle absolute paths from root + if link.startswith("/"): + if link.startswith(f"{base_path}/"): + return match.group(0) + # Link to file outside docs/ - point to GitHub + stripped_link = link.lstrip("/") + new_link = f"{github_base}/{repo_name}/blob/main/{stripped_link}" + return f"[{text}]({new_link})" + + # Handle simple filenames (same directory) + if "/" not in link: + stripped_link = link.replace(".md", "") + if file_dir: + new_link = f"{base_path}/{repo_name}/{file_dir}/{stripped_link}" + else: + new_link = f"{base_path}/{repo_name}/{stripped_link}" + return f"[{text}]({new_link})" + + return match.group(0) + + # Apply transform to markdown links + content = re.sub(r"\[([^\]]+)\]\(([^)]+)\)", replace_link, content) + + # Handle HTML media links + def replace_html_media_link(match): + attr_name = match.group(1) + link = match.group(2) + + if link.startswith(f"{base_path}/"): + return match.group(0) + if ".media/" in link: + media_part = link + while media_part.startswith("../"): + media_part = media_part[3:] + media_part = media_part.replace("./", "") + new_link = f"{base_path}/{repo_name}/{media_part}" + return f'{attr_name}="{new_link}"' + return match.group(0) + + content = re.sub( + r'(src|srcset)="([^"]*\.media/[^"]*)"', + replace_html_media_link, + content, + ) + + return content + + +def quote_yaml_value(value: str) -> str: + """ + Quote YAML value if needed, handling already-quoted values. + + Args: + value: YAML value to potentially quote + + Returns: + Quoted value if needed, otherwise original value + """ + # If value is already properly quoted, return as-is + if value.startswith('"') and value.endswith('"'): + if not value.startswith('"\\"'): + return value + + if value.startswith("'") and value.endswith("'"): + return value + + special_chars = [ + ":", "#", "@", "`", "|", ">", "*", "&", "!", + "%", "[", "]", "{", "}", ",", "?", + ] + + needs_quoting = any(char in value for char in special_chars) + + if value and (value[0] in ['"', "'", " "] or value[-1] in [" "]): + needs_quoting = True + + if needs_quoting: + if '"' not in value: + return f'"{value}"' + elif "'" not in value: + return f"'{value}'" + else: + escaped_value = value.replace('"', '\\"') + return f'"{escaped_value}"' + + return value + + +def parse_frontmatter(content: str) -> Tuple[Optional[Dict[str, str]], str]: + """ + Parse YAML frontmatter from markdown content. + + Args: + content: Markdown content potentially with frontmatter + + Returns: + Tuple of (frontmatter_dict, content_without_frontmatter) + or (None, original_content) if no frontmatter found. + """ + if not content.startswith("---\n"): + return None, content + + try: + end_match = re.search(r"\n---\n", content[4:]) + if not end_match: + return None, content + + frontmatter_text = content[4 : 4 + end_match.start()] + rest_content = content[4 + end_match.end() :] + + frontmatter_dict = {} + for line in frontmatter_text.split("\n"): + line = line.strip() + if not line or ":" not in line: + continue + + key, value = line.split(":", 1) + key = key.strip() + value = value.strip().strip("\"'") + frontmatter_dict[key] = value + + return frontmatter_dict, rest_content + except Exception as e: + print(f" [Warning] Failed to parse frontmatter: {e}") + return None, content + + +def fix_yaml_frontmatter(frontmatter_text: str) -> str: + """ + Fix YAML frontmatter formatting. + + Args: + frontmatter_text: Frontmatter content (without --- markers) + + Returns: + Fixed frontmatter text + """ + lines = frontmatter_text.split("\n") + fixed_lines = [] + + for line in lines: + if not line.strip(): + fixed_lines.append(line) + continue + + if ":" in line: + parts = line.split(":", 1) + if len(parts) == 2: + key = parts[0].strip() + value = parts[1].strip() + quoted_value = quote_yaml_value(value) + fixed_lines.append(f"{key}: {quoted_value}") + continue + + fixed_lines.append(line) + + return "\n".join(fixed_lines) + + +def ensure_frontmatter(content: str) -> str: + """ + Ensure frontmatter exists and fix YAML formatting. + + Args: + content: Markdown content + + Returns: + Content with fixed frontmatter + """ + if content.startswith("---\n"): + try: + end_match = re.search(r"\n---\n", content[4:]) + if end_match: + frontmatter_content = content[4 : 4 + end_match.start()] + rest_content = content[4 + end_match.end() :] + + # Parse and fix the frontmatter + fixed_frontmatter = fix_yaml_frontmatter(frontmatter_content) + + return f"---\n{fixed_frontmatter}\n---\n\n{rest_content}" + except Exception: + print(" [Warning] Couldn't parse existing frontmatter!") + + return content + + +def fix_broken_project_links( + content: str, + repo_name: str, + target_dir: str, + base_path: str = "/projects", + github_base: str = "https://github.com/gardenlinux", +) -> str: + """ + Fix links in /projects/ that point to non-existent files. + Replace with GitHub links. + + Args: + content: Markdown content + repo_name: Repository name + target_dir: Target directory path + base_path: Base path for projects + github_base: GitHub base URL + + Returns: + Content with fixed links + """ + target_path = Path(target_dir) + + def check_and_fix_link(match): + text = match.group(1) + link = match.group(2) + + # Only process /projects/{repo}/ links + if not link.startswith(f"{base_path}/{repo_name}/"): + return match.group(0) + + # Extract the path after /projects/{repo}/ + rel_path = link[len(f"{base_path}/{repo_name}/") :] + + potential_file = target_path / f"{rel_path}.md" + potential_index = target_path / rel_path / "index.md" + potential_dir = target_path / rel_path + + # If file exists, or directory exists with index.md, keep the link + if ( + potential_file.exists() + or potential_index.exists() + or ( + potential_dir.exists() + and potential_dir.is_dir() + and (potential_dir / "index.md").exists() + ) + ): + return match.group(0) + + github_link = f"{github_base}/{repo_name}/blob/main/{rel_path}" + return f"[{text}]({github_link})" + + content = re.sub(r"\[([^\]]+)\]\(([^)]+)\)", check_and_fix_link, content) + return content diff --git a/scripts/migration_tracker.py b/src/migration_tracker.py similarity index 100% rename from scripts/migration_tracker.py rename to src/migration_tracker.py diff --git a/tests/README.md b/tests/README.md new file mode 120000 index 0000000..ece7573 --- /dev/null +++ b/tests/README.md @@ -0,0 +1 @@ +../docs/reference/supporting_tools/docs-ng/reference/testing.md \ No newline at end of file diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..9cf1019 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,8 @@ +"""Pytest configuration and shared fixtures.""" + +import sys +from pathlib import Path + +# Add project root to path so we can import src/aggregation +project_root = Path(__file__).parent.parent +sys.path.insert(0, str(project_root / "src")) diff --git a/scripts/tests/fixtures/colon_title.md b/tests/fixtures/colon_title.md similarity index 100% rename from scripts/tests/fixtures/colon_title.md rename to tests/fixtures/colon_title.md diff --git a/scripts/tests/fixtures/test_doc.md b/tests/fixtures/test_doc.md similarity index 100% rename from scripts/tests/fixtures/test_doc.md rename to tests/fixtures/test_doc.md diff --git a/scripts/tests/fixtures/with_frontmatter.md b/tests/fixtures/with_frontmatter.md similarity index 100% rename from scripts/tests/fixtures/with_frontmatter.md rename to tests/fixtures/with_frontmatter.md diff --git a/tests/integration/test_aggregation.py b/tests/integration/test_aggregation.py new file mode 100644 index 0000000..41a8b0d --- /dev/null +++ b/tests/integration/test_aggregation.py @@ -0,0 +1,106 @@ +"""Integration tests for documentation aggregation.""" + +import tempfile +from pathlib import Path + +import pytest +from aggregation import DocsFetcher, RepoConfig, process_all_markdown + + +class TestDocsFetcher: + """Integration tests for DocsFetcher.""" + + def test_fetch_local_with_temp_dir(self, tmp_path): + """Test fetching from a local directory structure.""" + # Create a mock local repository + repo_path = tmp_path / "mock-repo" + docs_path = repo_path / "docs" + docs_path.mkdir(parents=True) + + # Create some test files + (docs_path / "index.md").write_text("# Test Documentation\n\nContent here.") + (docs_path / "guide.md").write_text("# Guide\n\nSome guide content.") + + subdir = docs_path / "tutorials" + subdir.mkdir() + (subdir / "tutorial1.md").write_text("# Tutorial 1\n\nTutorial content.") + + # Create repo config + repo = RepoConfig( + name="test-repo", + url=f"file://{repo_path}", + docs_path="docs", + target_path="projects/test-repo", + ) + + # Fetch the docs + output_dir = tmp_path / "output" + output_dir.mkdir() + + fetcher = DocsFetcher(tmp_path) + result = fetcher.fetch(repo, output_dir) + + # Verify success + assert result.success is True + assert result.resolved_commit is None # Local repos don't have commits + + # Verify files were copied + assert (output_dir / "index.md").exists() + assert (output_dir / "guide.md").exists() + assert (output_dir / "tutorials" / "tutorial1.md").exists() + + # Verify content + assert "Test Documentation" in (output_dir / "index.md").read_text() + + +class TestMarkdownProcessing: + """Integration tests for markdown processing.""" + + def test_process_all_markdown(self, tmp_path): + """Test processing markdown files in a directory.""" + # Create test directory structure + target_dir = tmp_path / "target" + target_dir.mkdir() + + # Create test markdown files + (target_dir / "README.md").write_text( + "# README\n\n[Link](./guide.md)\n[External](https://example.com)" + ) + (target_dir / "index.md").write_text("# Index\n\nContent") + + subdir = target_dir / "docs" + subdir.mkdir() + (subdir / "guide.md").write_text("# Guide\n\n[Back](../README.md)") + + # Process the markdown + process_all_markdown(str(target_dir), "test-repo") + + # Verify README was renamed to index (but we already have index.md, so it won't be) + # The function only renames if index.md doesn't exist + assert (target_dir / "README.md").exists() + + # Verify links were rewritten in index.md (which was already there) + index_content = (target_dir / "index.md").read_text() + assert "# Index" in index_content + + # Verify guide links were rewritten + guide_content = (subdir / "guide.md").read_text() + assert "/projects/test-repo" in guide_content or "README" in guide_content + + def test_process_markdown_with_frontmatter(self, tmp_path): + """Test that frontmatter is properly handled.""" + target_dir = tmp_path / "target" + target_dir.mkdir() + + # Create markdown with problematic frontmatter + (target_dir / "test.md").write_text( + "---\ntitle: Test: Example\ntags: tag1, tag2\n---\n\n# Content" + ) + + # Process + process_all_markdown(str(target_dir), "test-repo") + + # Verify frontmatter was fixed + content = (target_dir / "test.md").read_text() + assert '"Test: Example"' in content # Colon should be quoted + assert '"tag1, tag2"' in content # Comma should be quoted \ No newline at end of file diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py new file mode 100644 index 0000000..d6b6d64 --- /dev/null +++ b/tests/unit/test_config.py @@ -0,0 +1,225 @@ +"""Unit tests for aggregation.config module.""" + +import json +import tempfile +from pathlib import Path + +import pytest +from aggregation import load_config, save_config, RepoConfig + + +class TestLoadConfig: + """Tests for load_config function.""" + + def test_load_valid_config(self, tmp_path): + """Test loading valid configuration.""" + config_data = { + "repos": [ + { + "name": "test-repo", + "url": "https://github.com/test/repo", + "docs_path": "docs", + "target_path": "projects/test", + "ref": "main", + } + ] + } + config_file = tmp_path / "config.json" + config_file.write_text(json.dumps(config_data)) + + repos = load_config(str(config_file)) + assert len(repos) == 1 + assert repos[0].name == "test-repo" + assert repos[0].url == "https://github.com/test/repo" + + def test_load_multiple_repos(self, tmp_path): + """Test loading config with multiple repositories.""" + config_data = { + "repos": [ + { + "name": "repo1", + "url": "https://github.com/test/repo1", + "docs_path": "docs", + "target_path": "projects/repo1", + "ref": "main", + }, + { + "name": "repo2", + "url": "file://../repo2", + "docs_path": "docs", + "target_path": "projects/repo2", + }, + ] + } + config_file = tmp_path / "config.json" + config_file.write_text(json.dumps(config_data)) + + repos = load_config(str(config_file)) + assert len(repos) == 2 + assert repos[0].name == "repo1" + assert repos[1].name == "repo2" + + def test_load_config_with_optional_fields(self, tmp_path): + """Test loading config with optional fields.""" + config_data = { + "repos": [ + { + "name": "test-repo", + "url": "https://github.com/test/repo", + "docs_path": "docs", + "target_path": "projects/test", + "ref": "main", + "commit": "abc123", + "root_files": ["README.md"], + "structure": {"old": "new"}, + } + ] + } + config_file = tmp_path / "config.json" + config_file.write_text(json.dumps(config_data)) + + repos = load_config(str(config_file)) + assert repos[0].commit == "abc123" + assert repos[0].root_files == ["README.md"] + assert repos[0].structure == {"old": "new"} + + def test_load_invalid_json(self, tmp_path): + """Test that invalid JSON causes exit.""" + config_file = tmp_path / "config.json" + config_file.write_text("{ invalid json") + + with pytest.raises(SystemExit): + load_config(str(config_file)) + + def test_load_missing_repos_key(self, tmp_path): + """Test that missing 'repos' key causes exit.""" + config_data = {"other": "data"} + config_file = tmp_path / "config.json" + config_file.write_text(json.dumps(config_data)) + + with pytest.raises(SystemExit): + load_config(str(config_file)) + + +class TestSaveConfig: + """Tests for save_config function.""" + + def test_save_single_repo(self, tmp_path): + """Test saving configuration with single repository.""" + repos = [ + RepoConfig( + name="test-repo", + url="https://github.com/test/repo", + docs_path="docs", + target_path="projects/test", + ref="main", + ) + ] + config_file = tmp_path / "config.json" + + save_config(str(config_file), repos) + + # Verify file was created and contains correct data + assert config_file.exists() + with open(config_file) as f: + data = json.load(f) + + assert "repos" in data + assert len(data["repos"]) == 1 + assert data["repos"][0]["name"] == "test-repo" + assert data["repos"][0]["ref"] == "main" + + def test_save_multiple_repos(self, tmp_path): + """Test saving configuration with multiple repositories.""" + repos = [ + RepoConfig( + name="repo1", + url="https://github.com/test/repo1", + docs_path="docs", + target_path="projects/repo1", + ref="main", + ), + RepoConfig( + name="repo2", + url="file://../repo2", + docs_path="docs", + target_path="projects/repo2", + ), + ] + config_file = tmp_path / "config.json" + + save_config(str(config_file), repos) + + with open(config_file) as f: + data = json.load(f) + + assert len(data["repos"]) == 2 + + def test_save_with_commit_lock(self, tmp_path): + """Test saving configuration with commit field.""" + repos = [ + RepoConfig( + name="test-repo", + url="https://github.com/test/repo", + docs_path="docs", + target_path="projects/test", + ref="main", + commit="abc123", + ) + ] + config_file = tmp_path / "config.json" + + save_config(str(config_file), repos) + + with open(config_file) as f: + data = json.load(f) + + assert data["repos"][0]["commit"] == "abc123" + + def test_save_omits_empty_optional_fields(self, tmp_path): + """Test that empty optional fields are omitted.""" + repos = [ + RepoConfig( + name="test-repo", + url="https://github.com/test/repo", + docs_path="docs", + target_path="projects/test", + ref="main", + ) + ] + config_file = tmp_path / "config.json" + + save_config(str(config_file), repos) + + with open(config_file) as f: + data = json.load(f) + + # Should not have empty optional fields + assert "commit" not in data["repos"][0] + assert "root_files" not in data["repos"][0] + + def test_round_trip(self, tmp_path): + """Test that load/save round-trip preserves data.""" + original_repos = [ + RepoConfig( + name="test-repo", + url="https://github.com/test/repo", + docs_path="docs", + target_path="projects/test", + ref="main", + commit="abc123", + root_files=["README.md"], + ) + ] + config_file = tmp_path / "config.json" + + # Save and load + save_config(str(config_file), original_repos) + loaded_repos = load_config(str(config_file)) + + # Compare + assert len(loaded_repos) == 1 + assert loaded_repos[0].name == original_repos[0].name + assert loaded_repos[0].url == original_repos[0].url + assert loaded_repos[0].commit == original_repos[0].commit + assert loaded_repos[0].root_files == original_repos[0].root_files diff --git a/tests/unit/test_models.py b/tests/unit/test_models.py new file mode 100644 index 0000000..437a738 --- /dev/null +++ b/tests/unit/test_models.py @@ -0,0 +1,131 @@ +"""Unit tests for aggregation.models module.""" + +import pytest +from aggregation import RepoConfig, AggregateResult + + +class TestRepoConfig: + """Tests for RepoConfig dataclass.""" + + def test_from_dict_minimal(self): + """Test creating RepoConfig from minimal valid dict.""" + data = { + "name": "test-repo", + "url": "https://github.com/test/repo", + "docs_path": "docs", + "target_path": "projects/test-repo", + "ref": "main", + } + repo = RepoConfig.from_dict(data) + assert repo.name == "test-repo" + assert repo.url == "https://github.com/test/repo" + assert repo.docs_path == "docs" + assert repo.target_path == "projects/test-repo" + assert repo.ref == "main" + assert repo.commit is None + assert repo.root_files == [] + assert repo.structure == "flat" + + def test_from_dict_full(self): + """Test creating RepoConfig from dict with all fields.""" + data = { + "name": "test-repo", + "url": "https://github.com/test/repo", + "docs_path": "docs", + "target_path": "projects/test-repo", + "ref": "main", + "commit": "abc123", + "root_files": ["README.md", "LICENSE"], + "structure": {"old": "new"}, + "special_files": {"file.md": "special/"}, + "media_directories": [".media"], + } + repo = RepoConfig.from_dict(data) + assert repo.commit == "abc123" + assert repo.root_files == ["README.md", "LICENSE"] + assert repo.structure == {"old": "new"} + assert repo.special_files == {"file.md": "special/"} + assert repo.media_directories == [".media"] + + def test_is_local_file_url(self): + """Test is_local property with file:// URL.""" + repo = RepoConfig( + name="local", + url="file://../gardenlinux", + docs_path="docs", + target_path="projects/gardenlinux", + ) + assert repo.is_local is True + assert repo.is_remote is False + + def test_is_remote_https_url(self): + """Test is_remote property with https:// URL.""" + repo = RepoConfig( + name="remote", + url="https://github.com/test/repo", + docs_path="docs", + target_path="projects/test", + ref="main", + ) + assert repo.is_remote is True + assert repo.is_local is False + + def test_local_path_property(self): + """Test local_path property strips file:// prefix.""" + repo = RepoConfig( + name="local", + url="file://../gardenlinux", + docs_path="docs", + target_path="projects/gardenlinux", + ) + assert repo.local_path == "../gardenlinux" + + def test_validate_local_without_ref(self): + """Test that local repos don't require ref.""" + repo = RepoConfig( + name="local", + url="file://../gardenlinux", + docs_path="docs", + target_path="projects/gardenlinux", + ) + repo.validate() # Should not raise + + def test_validate_remote_requires_ref(self): + """Test that remote repos must have ref.""" + repo = RepoConfig( + name="remote", + url="https://github.com/test/repo", + docs_path="docs", + target_path="projects/test", + ) + with pytest.raises(ValueError, match="must have 'ref' field"): + repo.validate() + + def test_validate_invalid_url_scheme(self): + """Test that invalid URL schemes are rejected.""" + repo = RepoConfig( + name="invalid", + url="ftp://example.com/repo", + docs_path="docs", + target_path="projects/test", + ) + with pytest.raises(ValueError, match="Invalid URL scheme"): + repo.validate() + + +class TestAggregateResult: + """Tests for AggregateResult dataclass.""" + + def test_success_result(self): + """Test creating success result.""" + result = AggregateResult("test-repo", True, "abc123") + assert result.repo_name == "test-repo" + assert result.success is True + assert result.resolved_commit == "abc123" + + def test_failure_result(self): + """Test creating failure result.""" + result = AggregateResult("test-repo", False, None) + assert result.repo_name == "test-repo" + assert result.success is False + assert result.resolved_commit is None \ No newline at end of file diff --git a/tests/unit/test_transformer.py b/tests/unit/test_transformer.py new file mode 100644 index 0000000..ace21e6 --- /dev/null +++ b/tests/unit/test_transformer.py @@ -0,0 +1,115 @@ +"""Unit tests for aggregation.transformer module.""" + +import pytest +from aggregation import ( + rewrite_links, + quote_yaml_value, + ensure_frontmatter, +) + + +class TestRewriteLinks: + """Tests for rewrite_links function.""" + + def test_relative_links(self): + """Test that relative links are rewritten correctly.""" + content = "[Link](./other.md)" + result = rewrite_links(content, "gardenlinux", "introduction/index.md") + assert "/projects/gardenlinux/introduction/other" in result + + def test_numbered_directory_links(self): + """Test that numbered directories in links are transformed.""" + content = "[Link](../01_developers/guide.md)" + result = rewrite_links(content, "gardenlinux", "introduction/index.md") + assert "developers/guide" in result + + def test_preserve_external_links(self): + """Test that external links are not modified.""" + content = "[External](https://github.com/gardenlinux/gardenlinux)" + result = rewrite_links(content, "gardenlinux", "") + assert result == content + + def test_preserve_anchor_links(self): + """Test that anchor links are preserved.""" + content = "[Anchor](#section)" + result = rewrite_links(content, "gardenlinux", "") + assert result == content + + def test_media_links(self): + """Test that .media/ links are rewritten correctly.""" + content = "[Image](../.media/image.png)" + result = rewrite_links(content, "gardenlinux", "introduction/index.md") + assert "/projects/gardenlinux/.media/image.png" in result + + def test_absolute_paths_to_github(self): + """Test that absolute paths outside docs/ link to GitHub.""" + content = "[File](/README.md)" + result = rewrite_links(content, "gardenlinux", "") + assert "https://github.com/gardenlinux/gardenlinux/blob/main/README.md" in result + + +class TestQuoteYamlValue: + """Tests for quote_yaml_value function.""" + + def test_quote_value_with_colon(self): + """Test that YAML values with colons are quoted.""" + value = "Getting Started: Creating Images" + result = quote_yaml_value(value) + assert '"' in result + assert "Getting Started: Creating Images" in result + + def test_simple_value_not_quoted(self): + """Test that simple YAML values are not quoted.""" + value = "Simple Title" + result = quote_yaml_value(value) + assert result == "Simple Title" + + def test_already_quoted_value(self): + """Test that already-quoted values are not double-quoted.""" + value = '"Already Quoted"' + result = quote_yaml_value(value) + assert result == '"Already Quoted"' + + def test_value_with_hash(self): + """Test that values with # are quoted.""" + value = "Title #1" + result = quote_yaml_value(value) + assert '"' in result + + def test_value_with_special_chars(self): + """Test various special characters that require quoting.""" + special_chars = [":", "#", "@", "*", "&", "!"] + for char in special_chars: + value = f"Text {char} more" + result = quote_yaml_value(value) + assert '"' in result or "'" in result + + +class TestEnsureFrontmatter: + """Tests for ensure_frontmatter function.""" + + def test_no_change_when_missing(self): + """Test that content without frontmatter is returned unchanged.""" + content = "# Test Title\n\nContent here." + result = ensure_frontmatter(content) + assert result == content + + def test_preserve_existing(self): + """Test that existing frontmatter is preserved.""" + content = "---\ntitle: Existing\n---\n\nContent" + result = ensure_frontmatter(content) + assert "title: Existing" in result + + def test_fix_colons(self): + """Test that colons in existing frontmatter are quoted.""" + content = "---\ntitle: Test: Example\n---\n\nContent" + result = ensure_frontmatter(content) + assert '"Test: Example"' in result + + def test_fix_multiple_fields(self): + """Test that multiple frontmatter fields are fixed.""" + content = "---\ntitle: Test: Example\nauthor: John Doe\ntags: tag1, tag2\n---\n\nContent" + result = ensure_frontmatter(content) + assert '"Test: Example"' in result + assert "John Doe" in result + assert '"tag1, tag2"' in result \ No newline at end of file