From 2671824dac09b091b322a5b59dee365a5fb30187 Mon Sep 17 00:00:00 2001
From: Eike Waldt
Date: Wed, 1 Apr 2026 11:53:16 +0200
Subject: [PATCH 1/5] feat: refactor bash scripts into src/aggregate.py
Signed-off-by: Eike Waldt
On-behalf-of: SAP
---
.gitignore | 3 +
Makefile | 63 +-
README.md | 432 +++-------
docs/reference/glossary.md | 8 +-
.../repos-config.json => repos-config.json | 36 +-
...nfig.local.json => repos-config.local.json | 11 +-
requirements.txt | 1 +
scripts/README.md | 295 -------
scripts/aggregate-docs.sh | 157 ----
scripts/fetch-repo-docs.sh | 89 ---
scripts/test-local.sh | 46 --
scripts/tests/README.md | 97 ---
scripts/tests/run_all.sh | 12 -
scripts/tests/run_tests.py | 276 -------
scripts/transform_content.py | 750 ------------------
scripts/update_config.py.backup.202603250900 | 694 ----------------
src/README.md | 73 ++
src/aggregate.py | 233 ++++++
src/aggregation/__init__.py | 38 +
src/aggregation/config.py | 71 ++
src/aggregation/fetcher.py | 236 ++++++
src/aggregation/models.py | 68 ++
src/aggregation/structure.py | 259 ++++++
src/aggregation/transformer.py | 327 ++++++++
{scripts => src}/migration_tracker.py | 0
tests/README.md | 59 ++
tests/conftest.py | 8 +
.../tests => tests}/fixtures/colon_title.md | 0
{scripts/tests => tests}/fixtures/test_doc.md | 0
.../fixtures/with_frontmatter.md | 0
tests/integration/test_aggregation.py | 106 +++
tests/unit/test_config.py | 225 ++++++
tests/unit/test_models.py | 131 +++
tests/unit/test_transformer.py | 115 +++
34 files changed, 2154 insertions(+), 2765 deletions(-)
rename scripts/repos-config.json => repos-config.json (55%)
rename scripts/repos-config.local.json => repos-config.local.json (77%)
delete mode 100644 scripts/README.md
delete mode 100755 scripts/aggregate-docs.sh
delete mode 100755 scripts/fetch-repo-docs.sh
delete mode 100755 scripts/test-local.sh
delete mode 100644 scripts/tests/README.md
delete mode 100755 scripts/tests/run_all.sh
delete mode 100755 scripts/tests/run_tests.py
delete mode 100755 scripts/transform_content.py
delete mode 100755 scripts/update_config.py.backup.202603250900
create mode 100644 src/README.md
create mode 100755 src/aggregate.py
create mode 100644 src/aggregation/__init__.py
create mode 100644 src/aggregation/config.py
create mode 100644 src/aggregation/fetcher.py
create mode 100644 src/aggregation/models.py
create mode 100644 src/aggregation/structure.py
create mode 100644 src/aggregation/transformer.py
rename {scripts => src}/migration_tracker.py (100%)
create mode 100644 tests/README.md
create mode 100644 tests/conftest.py
rename {scripts/tests => tests}/fixtures/colon_title.md (100%)
rename {scripts/tests => tests}/fixtures/test_doc.md (100%)
rename {scripts/tests => tests}/fixtures/with_frontmatter.md (100%)
create mode 100644 tests/integration/test_aggregation.py
create mode 100644 tests/unit/test_config.py
create mode 100644 tests/unit/test_models.py
create mode 100644 tests/unit/test_transformer.py
diff --git a/.gitignore b/.gitignore
index f399ac6..a45231e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,6 +8,9 @@ shell.nix
# added by aggregation
docs/projects
+docs/**/.media
+docs/**/_static
+docs/**/assets
# Section directories - aggregated content is ignored, index.md is tracked
docs/contributing/**
diff --git a/Makefile b/Makefile
index b449361..3b6bda3 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,4 @@
-.PHONY: help run dev build preview aggregate aggregate-dry aggregate-repo test-aggregate-local clean clean-projects clean-aggregated-git test test-unit test-integration check spelling linkcheck woke
+.PHONY: help run dev build preview aggregate aggregate-repo test-aggregate-local clean clean-projects clean-aggregated-git test test-unit test-integration check spelling linkcheck woke
help:
@echo "Garden Linux Documentation Hub - Available targets:"
@@ -9,8 +9,9 @@ help:
@echo " preview - Preview production build locally"
@echo ""
@echo " Testing:"
- @echo " test - Run full test suite"
- @echo " test-unit - Run unit tests only"
+ @echo " test - Run full test suite (38 tests: unit + integration)"
+ @echo " test-unit - Run unit tests only (35 tests)"
+ @echo " test-integration - Run integration tests only (3 tests)"
@echo ""
@echo " Quality Checks:"
@echo " check - Run all quality checks (spelling, links, inclusive language)"
@@ -19,11 +20,11 @@ help:
@echo " woke - Check inclusive language with woke"
@echo ""
@echo " Documentation Aggregation:"
- @echo " aggregate-local - Aggregate from local repos using relative paths (../gardenlinux ../builder ../python-gardenlinux-lib)"
- @echo " test-aggregate-local - Test aggregation with local repos (recommended first)"
- @echo " aggregate - Fetch and aggregate docs from all source repos"
- @echo " aggregate-dry - Test aggregation without modifying docs/"
- @echo " aggregate-repo - Aggregate specific repo (usage: make aggregate-repo REPO=gardenlinux)"
+ @echo " aggregate-local - Aggregate from local repos (file:// URLs in repos-config.local.json)"
+ @echo " aggregate - Aggregate from locked commits (repos-config.json)"
+ @echo " aggregate-update - Fetch latest from remotes and update commit locks"
+ @echo " aggregate-repo - Aggregate single repo (usage: make aggregate-repo REPO=gardenlinux)"
+ @echo " aggregate-update-repo - Update single repo to latest (usage: make aggregate-update-repo REPO=gardenlinux)"
@echo ""
@echo " Utilities:"
@echo " clean - Clean aggregated docs and build artifacts"
@@ -33,7 +34,7 @@ install:
@echo "Installing dependencies..."
pnpm install
-run: install
+dev: install
pnpm run docs:dev
build: install clean aggregate
@@ -43,13 +44,16 @@ preview: install
pnpm run docs:preview
# Testing
-test: install
- @echo "Running full test suite..."
- @cd scripts/tests && ./run_all.sh
+test: test-unit test-integration
+ @echo "All tests passed!"
-test-unit: install
+test-unit:
@echo "Running unit tests..."
- @cd scripts/tests && python3 run_tests.py
+ python3 -m pytest tests/unit/ -v
+
+test-integration:
+ @echo "Running integration tests..."
+ python3 -m pytest tests/integration/ -v
# Quality Checks
check: spelling linkcheck woke
@@ -68,21 +72,17 @@ woke: install
@pnpm run docs:woke
# Documentation Aggregation
-test-aggregate-local: install
- @echo "Testing aggregation with local repositories..."
- ./scripts/test-local.sh --dry-run
-
aggregate-local: install
@echo "Aggregating from local repositories (relative paths)..."
- CONFIG_FILE=scripts/repos-config.local.json ./scripts/aggregate-docs.sh
+ python3 src/aggregate.py --config repos-config.local.json
aggregate: install
- @echo "Aggregating documentation from source repositories..."
- ./scripts/aggregate-docs.sh
+ @echo "Aggregating documentation from locked source repositories..."
+ python3 src/aggregate.py
-aggregate-dry: install
- @echo "Dry run: Testing aggregation without modifying docs directory..."
- ./scripts/aggregate-docs.sh --dry-run
+aggregate-update: install
+ @echo "Aggregating documentation from latest source repositories..."
+ python3 src/aggregate.py --update-locks
aggregate-repo: install
@if [ -z "$(REPO)" ]; then \
@@ -90,8 +90,17 @@ aggregate-repo: install
echo "Usage: make aggregate-repo REPO=gardenlinux"; \
exit 1; \
fi
- @echo "Aggregating documentation for repository: $(REPO)"
- ./scripts/aggregate-docs.sh --repo $(REPO)
+ @echo "Aggregating documentation for locked repository: $(REPO)"
+ python3 src/aggregate.py --repo $(REPO)
+
+aggregate-update-repo: install
+ @if [ -z "$(REPO)" ]; then \
+ echo "Error: REPO variable not set"; \
+ echo "Usage: make aggregate-update-repo REPO=gardenlinux"; \
+ exit 1; \
+ fi
+ @echo "Aggregating documentation for locked repository: $(REPO)"
+ python3 src/aggregate.py --update-locks --repo $(REPO)
# Utilities
clean:
@@ -101,7 +110,7 @@ clean:
rm -rf docs/projects
@# Clean aggregated (untracked) content from section directories, preserving git-tracked files
@if [ -d .git ]; then \
- git clean -fd docs/contributing/ docs/explanation/ docs/how-to/ docs/reference/ docs/tutorials/ 2>/dev/null || true; \
+ git clean -fdX docs/contributing/ docs/explanation/ docs/how-to/ docs/reference/ docs/tutorials/ 2>/dev/null || true; \
else \
rm -rf docs/contributing docs/explanation docs/how-to docs/reference docs/tutorials; \
fi
diff --git a/README.md b/README.md
index 6c41264..a183681 100644
--- a/README.md
+++ b/README.md
@@ -1,355 +1,185 @@
-# docs-ng
+# Garden Linux Documentation Hub
-Build the Garden Linux documentation with aggregated content from multiple repositories.
-
-## Overview
-
-This project provides a unified documentation hub for Garden Linux that aggregates content from multiple source repositories (gardenlinux, builder, python-gardenlinux-lib) and presents it in a cohesive VitePress site.
-
-### Documentation Structure
-
-The system uses a **dual-path approach** for documentation:
-
-1. **Targeted Documentation** — Files with `github_target_path` frontmatter are copied to specific locations in the main docs tree (e.g., `docs/tutorials/`, `docs/how-to/`)
-2. **Project Mirror** — All repository documentation is also mirrored under `docs/projects//` for legacy access and comprehensive coverage
+Build unified documentation from multiple Garden Linux repositories.
## Quick Start
```bash
-# Run development server
-make run
-
# Aggregate documentation from repos
make aggregate
-# Run tests
-make test
-```
-
-## Architecture Overview
-
-```
-┌─────────────────────────────────────────────────────────────┐
-│ Source Repositories │
-│ (gardenlinux, builder, python-gardenlinux-lib) │
-└─────────────────────┬───────────────────────────────────────┘
- │
- │ 1. Fetch (sparse checkout)
- │ scripts/fetch-repo-docs.sh
- ▼
- ┌───────────────┐
- │ Temp Storage │
- └───────┬───────┘
- │
- │ 2. Transform
- │ scripts/transform_content.py
- │ • Targeted doc placement (github_target_path)
- │ • Directory restructuring
- │ • Link rewriting
- │ • Frontmatter YAML fixing
- ▼
- ┌───────────────────────────────────┐
- │ docs/ directory │
- │ ├── tutorials/ │
- │ ├── how-to/ │
- │ ├── explanation/ │
- │ ├── reference/ │
- │ ├── contributing/ │
- │ └── projects/ │
- │ ├── gardenlinux/ │
- │ ├── builder/ │
- │ └── python-gardenlinux-lib/ │
- └───────────────────────────────────┘
- │
- │ VitePress builds the site
- │ using vitepress-sidebar
- ▼
- ┌───────────────────────────┐
- │ VitePress Site │
- │ (Development/Production)│
- └───────────────────────────┘
+# Run development server
+make dev
```
-## Documentation Aggregation
-
-The documentation aggregation system pulls content from multiple Git repositories and transforms it for VitePress. The process consists of two main steps orchestrated by `scripts/aggregate-docs.sh`:
-
-### Step 1: Fetch Documentation
-
-**Script:** `scripts/fetch-repo-docs.sh`
-
-Uses sparse Git checkout to efficiently fetch only the documentation directories from source repositories. This minimizes clone size and speeds up the process.
-
-**Configuration:** `scripts/repos-config.json`
-
-Each repository is defined with:
-
-- `name` — Repository identifier
-- `url` — Git repository URL
-- `branch` — Branch to fetch from
-- `docs_path` — Path to documentation within the repo (e.g., `docs`)
-- `target_path` — Where to place docs in the aggregated site (e.g., `projects/gardenlinux`)
-- `github_org` / `github_repo` — Used for "Edit on GitHub" links
-- `structure` — How to transform the directory structure (see below)
-
-**Structure Types:**
-
-- `flat` — Copy files as-is without transformation
-- `sphinx` — Copy Sphinx documentation structure (RST files)
-- `{ "dir1": "newdir1", "dir2": "newdir2" }` — Map source directories to target directories (e.g., `{ "tutorials": "tutorials", "how-to": "how-to" }`)
-
-### Step 2: Transform Content
-
-**Script:** `scripts/transform_content.py`
+## Overview
-Performs multiple transformations on the fetched documentation:
+This project aggregates content from multiple source repositories (gardenlinux, builder, python-gardenlinux-lib) into a cohesive VitePress site.
-#### 2a. Targeted Documentation Placement
+### Documentation Paths
-Files with `github_target_path` frontmatter are copied to their specified locations in the main docs tree. This allows documentation from source repos to be integrated directly into the primary documentation structure.
+1. **Targeted Documentation** — Files with `github_target_path` frontmatter → `docs/tutorials/`, `docs/how-to/`, etc.
+2. **Project Mirror** — All repo docs mirrored under `docs/projects//`
-Example frontmatter:
+## Architecture
-```yaml
----
-title: "Tutorials"
-github_target_path: "docs/tutorials/index.md"
----
```
-
-This file would be copied to `docs/tutorials/index.md` in addition to being mirrored in `docs/projects/`.
-
-#### 2b. Directory Structure Transformation
-
-Transforms the fetched documentation according to the `structure` configuration:
-
-- Renames numbered directories (e.g., `00_introduction` → `introduction`)
-- Applies custom directory mappings
-- Handles special files and media directories
-
-#### 2c. Link Rewriting
-
-Converts repository-relative links to work in the aggregated site:
-
-- `[link](../01_developers/build.md)` → `[link](/projects/gardenlinux/developers/build)`
-- `[link](./intro.md)` → `[link](/projects/gardenlinux/introduction/intro)`
-- Links to files outside `docs/` → GitHub URLs
-- Handles `.media/` directories correctly
-
-#### 2d. Frontmatter YAML Fixing
-
-- Fixes YAML formatting in existing frontmatter
-- Quotes YAML values with special characters (e.g., titles containing `:` or `#`)
-- Ensures proper frontmatter structure
-
-#### 2e. Content Sanitization
-
-- Escapes angle brackets that aren't HTML tags (e.g., ``)
-- Preserves code blocks and inline code
-- Handles README.md → index.md conversion
-
-## Sidebar Menu Construction
-
-**File:** `docs/.vitepress/sidebar.ts`
-
-The documentation sidebar uses the `vitepress-sidebar` library with automatic generation:
-
-```typescript
-generateSidebar({
- documentRootPath: "docs",
- scanStartPath: "",
- resolvePath: "/",
- collapsed: true,
- useTitleFromFileHeading: true,
- useTitleFromFrontmatter: true,
- useFolderLinkFromIndexFile: true,
- useFolderTitleFromIndexFile: true,
- excludePattern: ["projects"],
- sortMenusByFrontmatterOrder: true,
- frontmatterOrderDefaultValue: 999,
-});
+Source Repos → Fetch (git/local) → Transform → docs/ → VitePress
```
-**Key features:**
-
-- Automatically scans the `docs/` directory
-- Excludes `docs/projects/` (legacy content, will be removed)
-- Uses frontmatter `order` field for sorting (lower numbers appear first)
-- Falls back to file/folder names for titles if not in frontmatter
-- Respects `index.md` files for folder titles and links
-
-## Frontmatter Fields
-
-Frontmatter fields control how pages are displayed, organized, and linked. Here's a comprehensive reference:
-
-### Core Fields
-
-| Field | Purpose | Used By | Example |
-| ------------- | --------------------- | ------------------------------- | ---------------------------------- |
-| `title` | Page title | VitePress, sidebar, browser tab | `title: "Getting Started"` |
-| `description` | Page meta description | SEO, social sharing | `description: "Quick start guide"` |
-| `order` | Sidebar sort order | `vitepress-sidebar` | `order: 10` |
-
-### GitHub Integration
-
-| Field | Purpose | Used By | Example |
-| -------------------- | -------------------------- | -------------------- | --------------------------------------------- |
-| `github_org` | GitHub organization | Edit link generation | `github_org: gardenlinux` |
-| `github_repo` | Repository name | Edit link generation | `github_repo: gardenlinux` |
-| `github_branch` | Branch name | Edit link generation | `github_branch: main` |
-| `github_source_path` | Original file path in repo | Edit link generation | `github_source_path: docs/tutorials/index.md` |
+**Aggregation Pipeline:**
-### Aggregation & Targeting
+1. **Fetch** — `src/aggregation/fetcher.py` pulls docs via git sparse checkout or local copy
+2. **Transform** — `src/aggregation/transformer.py` rewrites links, fixes frontmatter
+3. **Structure** — `src/aggregation/structure.py` reorganizes directories and copies media
+4. **Output** — VitePress builds the site
-| Field | Purpose | Used By | Example |
-| -------------------- | --------------------------------- | ------------------ | ----------------------------------------------- |
-| `github_target_path` | Target location in main docs tree | Aggregation system | `github_target_path: "docs/tutorials/index.md"` |
+**Key Mechanisms:**
-**Note:** Files with `github_target_path` or `target` are copied to the specified path in addition to being mirrored under `docs/projects/`. This enables documentation from source repos to appear in the primary documentation structure.
+- **Targeted Documentation**: Files with `github_target_path` frontmatter are copied directly to specified paths (e.g., `docs/tutorials/cloud/first-boot-aws.md`). This is the primary mechanism for aggregating content from source repos into the unified documentation structure.
+
+- **Media Directories**: Directories listed in `media_directories` (e.g., `_static`, `assets`) are automatically discovered and copied. For nested media dirs (like `tutorials/assets`), they're copied to the same relative path. For root-level media dirs (like `_static`), they're copied to the common ancestor of all targeted files.
-### Migration Tracking
+- **Commit Locking**: The `commit` field in `repos-config.json` locks to a specific commit for reproducibility. Use `make aggregate-update` to fetch the latest commits and update the locks automatically.
-| Field | Purpose | Used By | Example |
-| ----------------------- | ------------------------ | ------------------ | --------------------------------------- |
-| `migration_status` | Migration state | Documentation team | `migration_status: "new"` |
-| `migration_source` | Original source location | Documentation team | `migration_source: "old-docs/guide.md"` |
-| `migration_issue` | Related GitHub issue | Documentation team | `migration_issue: "#123"` |
-| `migration_stakeholder` | Responsible person/team | Documentation team | `migration_stakeholder: "@username"` |
-| `migration_approved` | Approval status | Documentation team | `migration_approved: false` |
+- **Project Mirror**: In addition to targeted docs, the entire `docs/` directory from each repo is mirrored under `docs/projects//` for reference.
-These fields help track the documentation reorganization effort and are not used by VitePress itself.
+## Configuration
-### How Frontmatter Fields Are Used
+### repos-config.json
-#### Sidebar Ordering
+Located at project root. Defines repositories to aggregate:
-The `order` field controls the position of pages in the sidebar:
-
-- Lower numbers appear first (e.g., `order: 10` before `order: 20`)
-- Default order is `999` (via `frontmatterOrderDefaultValue`)
-- Works with `vitepress-sidebar`'s `sortMenusByFrontmatterOrder: true`
-
-#### "Edit on GitHub" Links
-
-The VitePress config uses GitHub metadata to generate edit links:
-
-```typescript
-editLink: {
- pattern: ({ filePath, frontmatter }) => {
- // If page has GitHub metadata from aggregated content
- if (
- frontmatter.github_org &&
- frontmatter.github_repo &&
- frontmatter.github_source_path
- ) {
- const branch = frontmatter.github_branch || "main";
- return `https://github.com/${frontmatter.github_org}/${frontmatter.github_repo}/edit/${branch}/${frontmatter.github_source_path}`;
+```json
+{
+ "repos": [
+ {
+ "name": "gardenlinux",
+ "url": "https://github.com/gardenlinux/gardenlinux",
+ "docs_path": "docs",
+ "target_path": "projects/gardenlinux",
+ "ref": "docs-ng",
+ "commit": "abc123...",
+ "root_files": ["CONTRIBUTING.md", "SECURITY.md"],
+ "structure": {
+ "tutorials": "tutorials",
+ "how-to": "how-to",
+ "explanation": "explanation",
+ "reference": "reference"
+ },
+ "media_directories": [".media", "assets", "_static"]
+ },
+ {
+ "name": "python-gardenlinux-lib",
+ "url": "https://github.com/gardenlinux/python-gardenlinux-lib",
+ "docs_path": "docs",
+ "target_path": "projects/python-gardenlinux-lib",
+ "ref": "docs-ng",
+ "commit": "def456...",
+ "structure": "sphinx",
+ "media_directories": ["_static"]
}
- // Fallback for native docs-ng pages
- return `https://github.com/gardenlinux/docs-ng/edit/main/docs/${filePath}`;
- };
+ ]
}
```
-This ensures that users editing aggregated documentation are directed to the correct source repository.
+**Key fields:**
-#### Targeted Documentation Placement
+- `ref` — branch/tag to fetch
+- `commit` — (optional) commit lock for reproducibility; use `--update-locks` to update
+- `root_files` — (optional) root-level files to copy (e.g., CONTRIBUTING.md)
+- `structure` — directory mapping, `"flat"` for as-is copy, or `"sphinx"` for Sphinx docs
+- `media_directories` — (optional) list of media directories to copy (relative paths searched recursively)
-When a file includes `github_target_path` or `target`, the aggregation system copies it to that specific location:
+For local testing, use `repos-config.local.json` with `file://` URLs.
-```yaml
----
-title: "Tutorials"
-github_target_path: "docs/tutorials/index.md"
----
-```
+## Commands
-This file will be placed at `docs/tutorials/index.md` (in addition to `docs/projects//tutorials/index.md`).
+### Development
-## Testing
+```bash
+make dev # Start dev server
+make build # Production build
+make preview # Preview production build
+```
-Run the test suite to verify scripts work correctly:
+### Testing
```bash
-make test # Run all tests
-make test-unit # Run unit tests only
-make test-integration # Run integration tests only
+make test # Run all tests
+make test-unit # Unit tests
+make test-integration # Integration tests
```
-See `scripts/tests/README.md` for more details.
+### Aggregation
-## Available Commands
+```bash
+make aggregate-local # From local repos (file://)
+make aggregate # From remote repos (locked commits)
+make aggregate-repo REPO=... # Single repository
+make aggregate-update # Fetch latest + update commit locks
+make aggregate-update-repo REPO=... # Single repository
+```
-Run `make help` for all available commands:
+### Utilities
```bash
-# Development
-make run # Run docs development server
-make build # Build documentation for production
-make preview # Preview production build locally
-
-# Testing
-make test # Run full test suite
-make test-unit # Run unit tests only
-
-# Documentation Aggregation
-make aggregate # Fetch and aggregate docs from all source repos
-make aggregate-dry # Test aggregation without modifying docs/
-make aggregate-repo REPO= # Aggregate specific repo only
-make test-aggregate-local # Test with local repos (for development)
-
-# Utilities
-make clean # Clean aggregated docs and build artifacts
-make clean-projects # Remove only aggregated project docs
-make clean-aggregated-git # Remove uncommitted aggregated docs
+make clean # Clean build artifacts and aggregated docs
+```
+
+## Project Structure
+
+```
+docs-ng/
+├── repos-config.json # Repository configuration
+├── repos-config.local.json # Local development config
+├── src/ # Source code
+│ ├── aggregate.py # CLI entry point
+│ ├── aggregation/ # Core package
+│ │ ├── models.py # Data classes
+│ │ ├── config.py # Config I/O
+│ │ ├── fetcher.py # Repository fetching
+│ │ ├── transformer.py # Content transformation
+│ │ └── structure.py # Directory operations
+│ └── README.md # Technical documentation
+├── tests/ # Test suite
+│ ├── unit/ # Unit tests (pure functions)
+│ ├── integration/ # Integration tests (filesystem)
+│ └── README.md # Test documentation
+└── docs/ # Generated documentation
+ ├── projects/ # Mirrored repository docs
+ ├── tutorials/ # Aggregated tutorials (via github_target_path)
+ ├── how-to/ # Aggregated guides (via github_target_path)
+ ├── explanation/ # Aggregated explanations (via github_target_path)
+ ├── reference/ # Aggregated reference docs (via github_target_path)
+ │ └── supporting_tools/ # Builder, python-gardenlinux-lib docs
+ └── contributing/ # Aggregated contributing docs (via github_target_path)
```
-## Repository Configuration
+## Adding Repositories
-The `scripts/repos-config.json` file defines which repositories to aggregate and how to transform them:
+1. Add to `repos-config.json`:
```json
{
- "repos": [
- {
- "name": "gardenlinux",
- "url": "https://github.com/gardenlinux/gardenlinux",
- "github_org": "gardenlinux",
- "github_repo": "gardenlinux",
- "docs_path": "docs",
- "target_path": "projects/gardenlinux",
- "branch": "docs-ng",
- "structure": {
- "tutorials": "tutorials",
- "how-to": "how-to",
- "explanation": "explanation",
- "reference": "reference",
- "contributing": "contributing"
- },
- "special_files": {},
- "media_directories": [".media"]
- }
- ]
+ "name": "new-repo",
+ "url": "https://github.com/gardenlinux/new-repo",
+ "docs_path": "docs",
+ "target_path": "projects/new-repo",
+ "ref": "main",
+ "structure": "flat"
}
```
-### Configuration Fields
-
-- `name` — Repository identifier used in paths
-- `url` — Git repository URL for cloning
-- `github_org` / `github_repo` — Used for edit links
-- `docs_path` — Path to documentation within the repo
-- `target_path` — Destination in aggregated site (e.g., `projects/gardenlinux`)
-- `branch` — Git branch to fetch
-- `structure` — How to transform directory structure:
- - Object: Directory mapping (e.g., `{"old-name": "new-name"}`)
- - `"flat"`: Copy as-is
- - `"sphinx"`: Sphinx documentation structure
-- `special_files` — Files to place in specific locations
-- `media_directories` — Directories to copy (e.g., `.media`)
-
-## Additional Resources
-
-- **VitePress Documentation**: https://vitepress.dev/
-- **vitepress-sidebar Documentation**: https://vitepress-sidebar.cdget.com/
-- **Garden Linux Main Repository**: https://github.com/gardenlinux/gardenlinux
+2. Test: `make aggregate-repo REPO=new-repo`
+3. Preview: `make dev`
+
+## Documentation
+
+- **User Guide**: This README
+- **Technical Docs**: `src/README.md`
+- **Test Docs**: `tests/README.md`
+
+## Contributing
+
+See `CONTRIBUTING.md` for development guidelines.
diff --git a/docs/reference/glossary.md b/docs/reference/glossary.md
index e540e29..a1a48c3 100644
--- a/docs/reference/glossary.md
+++ b/docs/reference/glossary.md
@@ -23,11 +23,11 @@ The processor architecture for which a Garden Linux image is built. Supported ar
### AWS
-Amazon Web Services. One of the major cloud platforms supported by Garden Linux. Garden Linux provides AWS-specific images through the [`aws`](https://github.com/gardenlinux/gardenlinux/blob/main/features/aws/README.md) platform feature with cloud-init integration and AWS-specific kernel modules. See [AWS platform guide](../how-to/platform-specific/aws.md) and [AWS first boot tutorial](../tutorials/first-boot-aws.md) for usage details.
+Amazon Web Services. One of the major cloud platforms supported by Garden Linux. Garden Linux provides AWS-specific images through the [`aws`](https://github.com/gardenlinux/gardenlinux/blob/main/features/aws/README.md) platform feature with cloud-init integration and AWS-specific kernel modules. See [AWS platform guide](../how-to/platform-specific/aws.md) and [AWS first boot tutorial](../tutorials/cloud/first-boot-aws.md) for usage details.
### Azure
-Microsoft Azure. A major cloud platform supported by Garden Linux through the [`azure`](https://github.com/gardenlinux/gardenlinux/blob/main/features/azure/README.md) platform feature with platform-specific image configurations and optimizations. See [Azure platform guide](../how-to/platform-specific/azure.md) and [Azure first boot tutorial](../tutorials/first-boot-azure.md) for usage details.
+Microsoft Azure. A major cloud platform supported by Garden Linux through the [`azure`](https://github.com/gardenlinux/gardenlinux/blob/main/features/azure/README.md) platform feature with platform-specific image configurations and optimizations. See [Azure platform guide](../how-to/platform-specific/azure.md) and [Azure first boot tutorial](../tutorials/cloud/first-boot-azure.md) for usage details.
---
@@ -35,7 +35,7 @@ Microsoft Azure. A major cloud platform supported by Garden Linux through the [`
### Bare Metal
-A platform target for Garden Linux images designed to run directly on physical hardware without a hypervisor through the [`baremetal`](https://github.com/gardenlinux/gardenlinux/blob/main/features/baremetal/README.md) platform feature. Also referred to as [`metal`](https://github.com/gardenlinux/gardenlinux/blob/main/features/metal/README.md) in build configurations. See [Bare Metal platform guide](../how-to/platform-specific/bare-metal.md) and [Bare Metal first boot tutorial](../tutorials/first-boot-bare-metal.md) for usage details.
+A platform target for Garden Linux images designed to run directly on physical hardware without a hypervisor through the [`baremetal`](https://github.com/gardenlinux/gardenlinux/blob/main/features/baremetal/README.md) platform feature. Also referred to as [`metal`](https://github.com/gardenlinux/gardenlinux/blob/main/features/metal/README.md) in build configurations. See [Bare Metal platform guide](../how-to/platform-specific/bare-metal.md) and [Bare Metal first boot tutorial](../tutorials/on-premises/first-boot-bare-metal.md) for usage details.
### Builder
@@ -127,7 +127,7 @@ A Debian GNU/Linux derivative designed to provide small, auditable Linux images
### GCP
-Google Cloud Platform. A major cloud platform supported by Garden Linux through the [`gcp`](https://github.com/gardenlinux/gardenlinux/blob/main/features/gcp/README.md) platform feature with platform-specific configurations. See [GCP platform guide](../how-to/platform-specific/gcp.md) and [GCP first boot tutorial](../tutorials/first-boot-gcp.md) for usage details.
+Google Cloud Platform. A major cloud platform supported by Garden Linux through the [`gcp`](https://github.com/gardenlinux/gardenlinux/blob/main/features/gcp/README.md) platform feature with platform-specific configurations. See [GCP platform guide](../how-to/platform-specific/gcp.md) and [GCP first boot tutorial](../tutorials/cloud/first-boot-gcp.md) for usage details.
### GitHub Actions
diff --git a/scripts/repos-config.json b/repos-config.json
similarity index 55%
rename from scripts/repos-config.json
rename to repos-config.json
index 1b5ef3c..617041c 100644
--- a/scripts/repos-config.json
+++ b/repos-config.json
@@ -3,15 +3,18 @@
{
"name": "gardenlinux",
"url": "https://github.com/gardenlinux/gardenlinux",
- "github_org": "gardenlinux",
- "github_repo": "gardenlinux",
"docs_path": "docs",
"root_files": [
"CONTRIBUTING.md",
"SECURITY.md"
],
"target_path": "projects/gardenlinux",
- "branch": "docs-ng",
+ "ref": "docs-ng",
+ "commit": "c2cb572a8773779031dd5aaac75442caf9ee9f32",
+ "root_files": [
+ "CONTRIBUTING.md",
+ "SECURITY.md"
+ ],
"structure": {
"tutorials": "tutorials",
"how-to": "how-to",
@@ -22,25 +25,36 @@
"special_files": {},
"media_directories": [
".media"
+ "assets",
+ "_static"
]
},
{
"name": "builder",
"url": "https://github.com/gardenlinux/builder",
- "github_org": "gardenlinux",
- "github_repo": "builder",
"docs_path": "docs",
- "branch": "docs-ng",
- "structure": "flat"
+ "target_path": "projects/builder",
+ "ref": "docs-ng",
+ "commit": "b10476ad8c46130f310e36daa42c6e2c14fb51a9",
+ "media_directories": [
+ ".media",
+ "assets",
+ "_static"
+ ]
},
{
"name": "python-gardenlinux-lib",
"url": "https://github.com/gardenlinux/python-gardenlinux-lib",
- "github_org": "gardenlinux",
- "github_repo": "python-gardenlinux-lib",
"docs_path": "docs",
- "branch": "docs-ng",
- "structure": "sphinx"
+ "target_path": "projects/python-gardenlinux-lib",
+ "ref": "docs-ng",
+ "commit": "9142fccc3d83ab51759db7d328fa19166bc1df63",
+ "structure": "sphinx",
+ "media_directories": [
+ ".media",
+ "assets",
+ "_static"
+ ]
}
]
}
diff --git a/scripts/repos-config.local.json b/repos-config.local.json
similarity index 77%
rename from scripts/repos-config.local.json
rename to repos-config.local.json
index 6768ac1..f312178 100644
--- a/scripts/repos-config.local.json
+++ b/repos-config.local.json
@@ -6,7 +6,6 @@
"docs_path": "docs",
"root_files": ["CONTRIBUTING.md", "SECURITY.md"],
"target_path": "projects/gardenlinux",
- "branch": "docs-ng",
"structure": {
"tutorials": "tutorials",
"how-to": "how-to",
@@ -15,23 +14,23 @@
"contributing": "contributing"
},
"special_files": {},
- "media_directories": [".media"]
+ "media_directories": [".media", "assets", "_static"]
},
{
"name": "builder",
"url": "file://../builder",
"docs_path": "docs",
"target_path": "projects/builder",
- "branch": "main",
- "structure": "flat"
+ "structure": "flat",
+ "media_directories": [".media", "assets", "_static"]
},
{
"name": "python-gardenlinux-lib",
"url": "file://../python-gardenlinux-lib",
"docs_path": "docs",
"target_path": "projects/python-gardenlinux-lib",
- "branch": "main",
- "structure": "sphinx"
+ "structure": "sphinx",
+ "media_directories": [".media", "assets", "_static"]
}
]
}
diff --git a/requirements.txt b/requirements.txt
index 7c541de..a6cec20 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1 +1,2 @@
codespell==2.4.2
+pytest
diff --git a/scripts/README.md b/scripts/README.md
deleted file mode 100644
index 3f04a56..0000000
--- a/scripts/README.md
+++ /dev/null
@@ -1,295 +0,0 @@
-# Documentation Aggregation Scripts
-
-This directory contains scripts for aggregating documentation from multiple
-Garden Linux repositories into this centralized documentation hub.
-
-## Overview
-
-The aggregation system fetches documentation from source repositories,
-transforms them to work with VitePress, and integrates them into the docs hub.
-
-## Architecture
-
-```
-Source Repos docs-ng (this repo)
-┌─────────────┐ ┌──────────────────┐
-│ gardenlinux │ │ │
-│ /docs/ │───┐ │ docs/projects/ │
-└─────────────┘ │ │ ├─ gardenlinux/ │
- │ │ ├─ builder/ │
-┌─────────────┐ │ Fetch & │ └─ python-lib/ │
-│ builder │ ├─ Transform │ │
-│ /docs/ │───┤ ─────────>│ VitePress Site │
-└─────────────┘ │ │ │
- │ └──────────────────┘
-┌─────────────┐ │
-│ python-lib │ │
-│ /docs/ │───┘
-└─────────────┘
-```
-
-## Configuration
-
-### repos-config.json
-
-Repository configuration with the following parameters:
-
-- `name`: Repository name
-- `url`: Git repository URL
-- `docs_path`: Path to docs within the repository
-- `target_path`: Where to place docs in this project (relative to docs/)
-- `branch`: Git branch to fetch from
-- `structure`: Directory structure mapping or copy mode
- - Object with mappings like `{"00_introduction": "introduction"}` for
- structured repos
- - `"flat"` to copy all files as-is
- - `"sphinx"` for Sphinx documentation
-- `special_files` (optional): Map of files/directories to move to specific
- locations
- - Example: `{"boot_modes.md": "introduction", "architecture": "introduction"}`
- - Files are moved during transformation, useful for organizing root-level
- content
-- `media_directories` (optional): List of directories to copy (including hidden
- directories)
- - Example: `[".media"]`
- - These directories are copied as-is to preserve media assets
-
-### Example Configuration
-
-```json
-{
- "name": "gardenlinux",
- "url": "https://github.com/gardenlinux/gardenlinux",
- "docs_path": "docs",
- "target_path": "projects/gardenlinux",
- "branch": "main",
- "structure": {
- "00_introduction": "introduction",
- "01_developers": "developers",
- "02_operators": "operators"
- },
- "special_files": {
- "boot_modes.md": "introduction",
- "architecture": "introduction"
- },
- "media_directories": [".media"]
-}
-```
-
-## Scripts
-
-### `repos-config.json`
-
-Configuration file defining which repositories to aggregate from.
-
-It also maps the apparent structure of the docs file into their own sections.
-This is an example for the docs located in the main Gardenlinux repository.
-
-**Structure:**
-
-```json
-{
- "repos": [
- {
- "name": "gardenlinux",
- "url": "https://github.com/gardenlinux/gardenlinux.git",
- "docs_path": "docs",
- "target_path": "projects/gardenlinux",
- "branch": "main",
- "structure": {
- "00_introduction": "introduction",
- "01_developers": "developers",
- "02_operators": "operators"
- }
- }
- ]
-}
-```
-
-### `fetch-repo-docs.sh`
-
-Fetches documentation from a repository using git sparse checkout.
-
-**Usage:**
-
-```bash
-./fetch-repo-docs.sh
-```
-
-**Example:**
-
-```bash
-./fetch-repo-docs.sh https://github.com/gardenlinux/gardenlinux.git main docs /tmp/gl-docs
-```
-
-### `transform_content.py`
-
-Transforms documentation content to work with VitePress:
-
-- Renames numbered directories (e.g., `00_introduction` → `introduction`)
-- Rewrites internal links to work with new structure
-- Adds/fixes frontmatter
-- Handles different documentation structures
-
-**Usage:**
-
-```bash
-./transform_content.py --config repos-config.json \
- --docs-dir ../docs \
- --temp-dir /tmp/fetched-docs
-```
-
-### `aggregate-docs.sh`
-
-Main orchestration script that runs the entire aggregation pipeline.
-
-**Usage:**
-
-```bash
-# Aggregate all repositories
-./aggregate-docs.sh
-
-# Dry run (don't modify docs directory)
-./aggregate-docs.sh --dry-run
-
-# Aggregate specific repository
-./aggregate-docs.sh --repo gardenlinux
-```
-
-## Makefile Targets
-
-For convenience, use these Makefile targets:
-
-```bash
-# Test aggregation without modifying docs/
-make aggregate-dry
-
-# Aggregate all repositories
-make aggregate
-
-# Aggregate specific repository
-make aggregate-repo REPO=gardenlinux
-
-# Clean aggregated docs
-make clean-projects
-```
-
-## Workflow
-
-1. **Fetch**: Use sparse checkout to clone only the `docs/` directory from
- source repos
-2. **Transform**:
- - Restructure directories according to `structure` mapping
- - Rewrite internal links to work with new paths
- - Add frontmatter to markdown files
-3. **Build**: VitePress builds the unified documentation site
-
-## Testing Locally
-
-```bash
-# 1. Run dry-run to test without modifying docs/
-make aggregate-dry
-
-# 2. If successful, run actual aggregation
-make aggregate
-
-# 3. Preview the documentation
-make run
-
-# 4. Visit http://localhost:5173 to see aggregated docs
-```
-
-## CI/CD Integration
-
-The aggregation runs automatically via GitHub Actions:
-
-- **Schedule**: Daily at 2 AM UTC
-- **Manual**: Via workflow dispatch in GitHub UI
-- **Webhook**: Can be triggered by source repositories
-
-See `.github/workflows/aggregate-docs.yml` for details.
-
-## Adding New Repositories
-
-1. Add repository configuration to `repos-config.json`:
-
-```json
-{
- "name": "new-repo",
- "url": "https://github.com/gardenlinux/new-repo.git",
- "docs_path": "docs",
- "target_path": "projects/new-repo",
- "branch": "main",
- "structure": "flat"
-}
-```
-
-> [!IMPORTANT]
-> When the `docs/` directory contains subdirectories, mirror this structure in
-> the `repos-config.json`.
-
-```json
-{
- "name": "new-repo",
- "url": "https://github.com/gardenlinux/new-repo.git",
- "docs_path": "docs",
- "target_path": "projects/new-repo",
- "branch": "main",
- "structure": {
- "00_introduction": "introduction",
- "01_developers": "developers",
- "02_operators": "operators"
- }
-}
-```
-
-2. Test aggregation:
-
-```bash
-make aggregate-repo REPO=new-repo
-```
-
-3. Verify in development server:
-
-```bash
-make run
-```
-
-## Testsuite
-
-Run the test suite before making changes:
-
-```bash
-cd scripts/tests
-./run_all.sh
-```
-
-See `tests/README.md` for more details.
-
-## Troubleshooting
-
-### Links are broken after aggregation
-
-- Check the `rewrite_links()` function in `transform-content.py`
-- Verify link patterns in source documentation
-- Test with: `make aggregate-dry`
-
-### Directory structure not matching
-
-- Review `structure` configuration in `repos-config.json`
-- Check `transform_directory_structure()` in `transform-content.py`
-
-### Fetch failing
-
-- Verify repository URL and branch in `repos-config.json`
-- Check network connectivity
-- Ensure sparse checkout is working: `git config core.sparseCheckout true`
-
-## Notes
-
-- **Local repos**: If testing with local repos, you can use `file://` URLs in
- config
-- **Authentication**: For private repos, ensure GitHub token has appropriate
- permissions
-- **Large docs**: To keep fetching sizes small, the sparse checkout only fetches
- `docs/`. Any additional documentation should be comitted here directly.
diff --git a/scripts/aggregate-docs.sh b/scripts/aggregate-docs.sh
deleted file mode 100755
index 932351a..0000000
--- a/scripts/aggregate-docs.sh
+++ /dev/null
@@ -1,157 +0,0 @@
-#!/usr/bin/env bash
-# Main script to aggregate documentation from multiple repositories
-# This orchestrates the entire process: fetch -> transform -> update config
-
-set -e
-
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
-DOCS_DIR="$PROJECT_ROOT/docs"
-TEMP_DIR=$(mktemp -d)
-CONFIG_FILE="${CONFIG_FILE:-$SCRIPT_DIR/repos-config.json}"
-
-echo "=============================================================="
-echo " Garden Linux Documentation Aggregation"
-echo "=============================================================="
-echo ""
-
-cleanup() {
- echo "Cleaning up temporary files..."
- rm -rf "$TEMP_DIR"
-}
-
-trap cleanup EXIT
-trap cleanup SIGINT
-trap cleanup SIGTERM
-
-DRY_RUN=false
-REPO_FILTER=""
-
-while [[ $# -gt 0 ]]; do
- case $1 in
- --dry-run)
- DRY_RUN=true
- shift
- ;;
- --repo)
- REPO_FILTER="$2"
- shift 2
- ;;
- --help)
- echo "Usage: $0 [OPTIONS]"
- echo ""
- echo "Options:"
- echo " --dry-run Fetch and transform but don't update docs directory"
- echo " --repo Only process specific repository"
- echo " --help Show this help message"
- exit 0
- ;;
- *)
- echo "Unknown option: $1"
- exit 1
- ;;
- esac
-done
-
-echo "Configuration:"
-echo " Config file: $CONFIG_FILE"
-echo " Docs directory: $DOCS_DIR"
-echo " Temp directory: $TEMP_DIR"
-echo " Dry run: $DRY_RUN"
-if [ -n "$REPO_FILTER" ]; then
- echo " Repository filter: $REPO_FILTER"
-fi
-echo ""
-
-if [ ! -f "$CONFIG_FILE" ]; then
- echo "Error: Config file not found: $CONFIG_FILE"
- exit 1
-fi
-
-# Check if required scripts exist
-FETCH_SCRIPT="$SCRIPT_DIR/fetch-repo-docs.sh"
-TRANSFORM_SCRIPT="$SCRIPT_DIR/transform_content.py"
-
-for script in "$FETCH_SCRIPT" "$TRANSFORM_SCRIPT"; do
- if [ ! -f "$script" ]; then
- echo "Error: Required script not found: $script"
- exit 1
- fi
-done
-
-echo "Step 1: Fetching documentation from repositories"
-echo "-------------------------------------------------------------"
-
-repos=$(python3 -c "
-import json
-with open('$CONFIG_FILE') as f:
- config = json.load(f)
- for repo in config['repos']:
- if '$REPO_FILTER' and repo['name'] != '$REPO_FILTER':
- continue
- root_files = repo.get('root_files', [])
- root_files_str = ' '.join(root_files) if root_files else ''
- print(f\"{repo['name']}|{repo['url']}|{repo['branch']}|{repo['docs_path']}|{root_files_str}\")
-")
-
-if [ -z "$repos" ]; then
- echo "Error: No repositories to process"
- exit 1
-fi
-
-while IFS='|' read -r name url branch docs_path root_files; do
- echo ""
- echo "Repository: $name"
-
- repo_temp_dir="$TEMP_DIR/$name"
- mkdir -p "$repo_temp_dir"
-
- # Fetch docs using sparse checkout
- # shellcheck disable=SC2086
- if ! "$FETCH_SCRIPT" "$url" "$branch" "$docs_path" "$repo_temp_dir" $root_files; then
- echo "Warning: Failed to fetch docs for $name"
- continue
- fi
-done <<<"$repos"
-
-echo ""
-echo "Fetch complete!"
-echo ""
-
-echo "Step 2: Transforming documentation content"
-echo "-------------------------------------------------------------"
-
-if [ "$DRY_RUN" = true ]; then
- echo "Dry run mode: Transforming to temporary location"
- TRANSFORM_TARGET="$TEMP_DIR/transformed-docs"
- mkdir -p "$TRANSFORM_TARGET"
-else
- TRANSFORM_TARGET="$DOCS_DIR"
-fi
-
-transform_args="--config $CONFIG_FILE --docs-dir $TRANSFORM_TARGET --temp-dir $TEMP_DIR"
-
-if [ -n "$REPO_FILTER" ]; then
- transform_args="$transform_args --repo $REPO_FILTER"
-fi
-
-# shellcheck disable=SC2086
-if ! python3 "$TRANSFORM_SCRIPT" $transform_args; then
- echo "Error: Transformation failed"
- exit 1
-fi
-
-echo ""
-echo "=============================================================="
-echo " Documentation aggregation complete!"
-echo "=============================================================="
-echo ""
-
-if [ "$DRY_RUN" = true ]; then
- echo "To apply changes, run without --dry-run flag"
-else
- echo "Next steps:"
- echo " 1. Review the changes in docs/projects/"
- echo " 2. Run 'make dev' or 'pnpm run docs:dev' to preview"
- echo " 3. Commit the changes if satisfied"
-fi
diff --git a/scripts/fetch-repo-docs.sh b/scripts/fetch-repo-docs.sh
deleted file mode 100755
index eb60e69..0000000
--- a/scripts/fetch-repo-docs.sh
+++ /dev/null
@@ -1,89 +0,0 @@
-#!/usr/bin/env bash
-# Script to fetch documentation from remote repositories using sparse checkout
-# Usage: ./fetch-repo-docs.sh [root_files...]
-
-set -e
-
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-REPO_URL="$1"
-BRANCH="${2:-main}"
-DOCS_PATH="${3:-docs}"
-OUTPUT_DIR="$4"
-shift 4
-ROOT_FILES=("$@")
-TEMP_DIR=$(mktemp -d)
-
-if [ -z "$REPO_URL" ] || [ -z "$OUTPUT_DIR" ]; then
- echo "Usage: $0 [root_files...]"
- echo "Example: $0 https://github.com/gardenlinux/gardenlinux.git main docs /tmp/output CONTRIBUTING.md SECURITY.md"
- exit 1
-fi
-
-# Convert relative file:// URLs to absolute paths
-if [[ "$REPO_URL" == file://../* ]]; then
- RELATIVE_PATH="${REPO_URL#file://}"
- ABSOLUTE_PATH="$(cd "$SCRIPT_DIR/.." && cd "$RELATIVE_PATH" && pwd)"
- REPO_URL="file://$ABSOLUTE_PATH"
-fi
-
-echo "Fetching docs from: $REPO_URL"
-echo " Branch: $BRANCH"
-echo " Docs path: $DOCS_PATH"
-if [ ${#ROOT_FILES[@]} -gt 0 ]; then
- echo " Root files: ${ROOT_FILES[*]}"
-fi
-echo " Output: $OUTPUT_DIR"
-
-# Initialize sparse checkout
-cd "$TEMP_DIR"
-git init
-git remote add origin "$REPO_URL"
-git config core.sparseCheckout true
-
-echo "$DOCS_PATH/*" >> .git/info/sparse-checkout
-
-# Add root files to sparse checkout if specified
-for root_file in "${ROOT_FILES[@]}"; do
- if [ -n "$root_file" ]; then
- echo "$root_file" >> .git/info/sparse-checkout
- fi
-done
-
-echo "Cloning (sparse checkout)..."
-git fetch --depth=1 origin "$BRANCH"
-git checkout "$BRANCH"
-
-if [ -d "$DOCS_PATH" ]; then
- echo "Copying docs to $OUTPUT_DIR"
- mkdir -p "$OUTPUT_DIR"
- cp -r "$DOCS_PATH"/* "$OUTPUT_DIR/" 2>/dev/null || true
- # Handle hidden directories for media
- shopt -s dotglob
- for item in "$DOCS_PATH"/.*; do
- if [ -e "$item" ] && [ "$(basename "$item")" != "." ] && [ "$(basename "$item")" != ".." ]; then
- cp -r "$item" "$OUTPUT_DIR/" 2>/dev/null || true
- fi
- done
- shopt -u dotglob
-else
- echo "Warning: $DOCS_PATH directory not found in repository"
-fi
-
-# Copy root files if specified
-if [ ${#ROOT_FILES[@]} -gt 0 ]; then
- echo "Copying root files to $OUTPUT_DIR"
- for root_file in "${ROOT_FILES[@]}"; do
- if [ -f "$root_file" ]; then
- cp "$root_file" "$OUTPUT_DIR/"
- echo " Copied: $root_file"
- else
- echo " Warning: $root_file not found"
- fi
- done
-fi
-
-echo "Fetch complete!"
-
-# Cleanup
-cd - > /dev/null
-rm -rf "$TEMP_DIR"
diff --git a/scripts/test-local.sh b/scripts/test-local.sh
deleted file mode 100755
index 36cde2d..0000000
--- a/scripts/test-local.sh
+++ /dev/null
@@ -1,46 +0,0 @@
-#!/usr/bin/env bash
-# Convenience script for testing aggregation with local repositories
-# This uses repos-config.local.json which points to local file:// paths
-
-set -e
-
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-
-echo "Testing aggregation with local repositories..."
-echo ""
-
-# Check if local repos exist
-REPOS=(
- "/home/$USER/*/gardenlinux/gardenlinux"
- "/home/$USER/*/gardenlinux/builder"
- "/home/$USER/*/gardenlinux/python-gardenlinux-lib"
-)
-
-for repo in "${REPOS[@]}"; do
- if [ ! -d "$repo" ]; then
- echo "Error: Local repo not found: $repo"
- exit 1
- fi
-done
-
-echo "All local repositories found"
-echo ""
-
-# Backup original config
-if [ -f "$SCRIPT_DIR/repos-config.json" ]; then
- cp "$SCRIPT_DIR/repos-config.json" "$SCRIPT_DIR/repos-config.json.bak"
- echo "Backed up repos-config.json to repos-config.json.bak"
-fi
-
-cp "$SCRIPT_DIR/repos-config.local.json" "$SCRIPT_DIR/repos-config.json"
-echo "Using local configuration"
-echo ""
-
-"$SCRIPT_DIR/aggregate-docs.sh" "$@"
-
-# Restore original config
-if [ -f "$SCRIPT_DIR/repos-config.json.bak" ]; then
- mv "$SCRIPT_DIR/repos-config.json.bak" "$SCRIPT_DIR/repos-config.json"
- echo ""
- echo "Restored original repos-config.json"
-fi
diff --git a/scripts/tests/README.md b/scripts/tests/README.md
deleted file mode 100644
index eac9f55..0000000
--- a/scripts/tests/README.md
+++ /dev/null
@@ -1,97 +0,0 @@
-# Test Suite
-
-Test suite for the documentation aggregation scripts.
-
-## Running Tests
-
-### Unit Tests
-
-Test individual functions in the transformation scripts:
-
-```bash
-cd scripts/tests
-python3 run_tests.py
-```
-
-Run a specific test:
-
-```bash
-python3 run_tests.py test_escape_angle_brackets_in_text
-```
-
-### Integration Tests
-
-Test overall script functionality:
-
-```bash
-cd scripts/tests
-./test_integration.sh
-```
-
-## Test Coverage
-
-### Unit Tests (run_tests.py)
-
-Tests for `transform-content.py` functions:
-
-- Angle bracket escaping
-- HTML tag preservation
-- Code block handling
-- Link rewriting
-- YAML frontmatter quoting
-- Directory name transformations
-
-### Integration Tests (test_integration.sh)
-
-- Configuration file existence
-- Script executability
-- Syntax validation (bash and python)
-- Basic script functionality
-
-## Adding Tests
-
-### Adding Unit Tests
-
-Edit `run_tests.py` and add a new test function:
-
-```python
-def test_my_new_feature(runner):
- """Test description"""
- result = my_function("input")
- runner.assert_equal(result, "expected", "Error message")
-```
-
-Then add it to the `test_functions` list in `main()`.
-
-### Adding Integration Tests
-
-Edit `test_integration.sh` and add a new test block:
-
-```bash
-echo -n "My new test... "
-if my_command; then
- echo "OK" && ((TESTS_PASSED++))
-else
- echo "FAIL" && ((TESTS_FAILED++))
-fi
-```
-
-## Test Fixtures
-
-Sample markdown files for testing are in `fixtures/`:
-
-- `test_doc.md` - Various markdown features
-- `colon_title.md` - YAML frontmatter with colons
-- `with_frontmatter.md` - Existing frontmatter
-
-## Continuous Integration
-
-Tests should be run in CI before merging. Add to `.github/workflows/`:
-
-```yaml
-- name: Run tests
- run: |
- cd scripts/tests
- python3 run_tests.py
- ./test_integration.sh
-```
diff --git a/scripts/tests/run_all.sh b/scripts/tests/run_all.sh
deleted file mode 100755
index f2870ea..0000000
--- a/scripts/tests/run_all.sh
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/usr/bin/env bash
-# Run all tests
-
-set -e
-
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-
-echo "Running unit tests..."
-python3 "$SCRIPT_DIR/run_tests.py"
-
-echo ""
-echo "All tests passed"
diff --git a/scripts/tests/run_tests.py b/scripts/tests/run_tests.py
deleted file mode 100755
index b28b4ff..0000000
--- a/scripts/tests/run_tests.py
+++ /dev/null
@@ -1,276 +0,0 @@
-#!/usr/bin/env python3
-"""
-Test suite for documentation aggregation scripts
-
-Run all tests:
- python3 run_tests.py
-
-Run specific test:
- python3 run_tests.py test_escape_angle_brackets
-"""
-
-import json
-import os
-import shutil
-import sys
-import tempfile
-from pathlib import Path
-
-# Add parent directory to path to import modules
-sys.path.insert(0, str(Path(__file__).parent.parent))
-
-from transform_content import ( # type: ignore
- ensure_frontmatter,
- escape_angle_brackets,
- escape_text_angle_brackets,
- quote_yaml_value,
- rewrite_links,
-)
-
-
-class TestRunner:
- def __init__(self):
- self.tests_passed = 0
- self.tests_failed = 0
- self.failures = []
-
- def assert_equal(self, actual, expected, message=""):
- if actual == expected:
- self.tests_passed += 1
- return True
- else:
- self.tests_failed += 1
- error = (
- f"FAIL: {message}\n Expected: {repr(expected)}\n Got: {repr(actual)}"
- )
- self.failures.append(error)
- print(error)
- return False
-
- def assert_contains(self, text, substring, message=""):
- if substring in text:
- self.tests_passed += 1
- return True
- else:
- self.tests_failed += 1
- error = f"FAIL: {message}\n Expected to find: {repr(substring)}\n In: {repr(text)}"
- self.failures.append(error)
- print(error)
- return False
-
- def assert_not_contains(self, text, substring, message=""):
- if substring not in text:
- self.tests_passed += 1
- return True
- else:
- self.tests_failed += 1
- error = f"FAIL: {message}\n Expected NOT to find: {repr(substring)}\n In: {repr(text)}"
- self.failures.append(error)
- print(error)
- return False
-
- def run_test(self, test_func):
- test_name = test_func.__name__
- print(f"Running {test_name}...", end=" ")
- try:
- test_func(self)
- print("OK")
- except Exception as e:
- self.tests_failed += 1
- error = f"FAIL: {test_name} raised exception: {e}"
- self.failures.append(error)
- print(error)
-
- def summary(self):
- total = self.tests_passed + self.tests_failed
- print(f"\n{'='*60}")
- print(f"Tests run: {total}")
- print(f"Passed: {self.tests_passed}")
- print(f"Failed: {self.tests_failed}")
- print(f"{'='*60}")
-
- if self.tests_failed > 0:
- print("\nFailed tests:")
- for failure in self.failures:
- print(failure)
- return 1
- return 0
-
-
-def test_escape_angle_brackets_in_text(runner):
- """Test that angle brackets in plain text are escaped"""
- content = "This is that needs escaping."
- result = escape_angle_brackets(content)
- runner.assert_contains(
- result, "<placeholder text>", "Angle brackets in text should be escaped"
- )
-
-
-def test_escape_angle_brackets_with_spaces(runner):
- """Test that angle brackets with spaces inside are escaped"""
- content = "Multiple should be escaped."
- result = escape_angle_brackets(content)
- runner.assert_contains(
- result, "<words here>", "Angle brackets with spaces should be escaped"
- )
-
-
-def test_preserve_html_tags(runner):
- """Test that valid HTML tags are preserved"""
- content = "This is HTML
and should not be escaped."
- result = escape_angle_brackets(content)
- runner.assert_contains(result, "", "HTML
tag should be preserved")
- runner.assert_contains(result, "
", "HTML
tag should be preserved")
-
-
-def test_preserve_code_blocks(runner):
- """Test that code blocks are not escaped"""
- content = """```python
-x = ""
-```"""
- result = escape_angle_brackets(content)
- runner.assert_contains(
- result, "", "Code in triple backticks should not be escaped"
- )
-
-
-def test_preserve_inline_code(runner):
- """Test that inline code is not escaped"""
- content = "Inline code like `` should not be escaped."
- result = escape_angle_brackets(content)
- runner.assert_contains(result, "``", "Inline code should not be escaped")
-
-
-def test_rewrite_relative_links(runner):
- """Test that relative links are rewritten correctly"""
- content = "[Link](./other.md)"
- result = rewrite_links(content, "gardenlinux", "introduction/index.md")
- runner.assert_contains(
- result,
- "/projects/gardenlinux/introduction/other",
- "Relative link should be rewritten",
- )
-
-
-def test_rewrite_numbered_directory_links(runner):
- """Test that numbered directories in links are transformed"""
- content = "[Link](../01_developers/guide.md)"
- result = rewrite_links(content, "gardenlinux", "introduction/index.md")
- runner.assert_contains(
- result, "developers/guide", "Numbered directory in link should be transformed"
- )
-
-
-def test_preserve_external_links(runner):
- """Test that external links are not modified"""
- content = "[External](https://github.com/gardenlinux/gardenlinux)"
- result = rewrite_links(content, "gardenlinux", "")
- runner.assert_equal(result, content, "External links should not be modified")
-
-
-def test_preserve_anchor_links(runner):
- """Test that anchor links are preserved"""
- content = "[Anchor](#section)"
- result = rewrite_links(content, "gardenlinux", "")
- runner.assert_equal(result, content, "Anchor links should not be modified")
-
-
-def test_quote_yaml_value_with_colon(runner):
- """Test that YAML values with colons are quoted"""
- value = "Getting Started: Creating Images"
- result = quote_yaml_value(value)
- runner.assert_contains(result, '"', "Value with colon should be quoted")
- runner.assert_contains(
- result, "Getting Started: Creating Images", "Original value should be preserved"
- )
-
-
-def test_quote_yaml_value_without_special_chars(runner):
- """Test that simple YAML values are not quoted"""
- value = "Simple Title"
- result = quote_yaml_value(value)
- runner.assert_equal(result, "Simple Title", "Simple value should not be quoted")
-
-
-def test_ensure_frontmatter_no_change_when_missing(runner):
- """Test that content without frontmatter is returned unchanged"""
- content = "# Test Title\n\nContent here."
- result = ensure_frontmatter(content)
- runner.assert_equal(result, content, "Content without frontmatter should be unchanged")
-
-
-def test_ensure_frontmatter_preserves_existing(runner):
- """Test that existing frontmatter is preserved"""
- content = "---\ntitle: Existing\n---\n\nContent"
- result = ensure_frontmatter(content)
- runner.assert_contains(
- result, "title: Existing", "Existing frontmatter should be preserved"
- )
-
-
-def test_ensure_frontmatter_fixes_colons(runner):
- """Test that colons in existing frontmatter are quoted"""
- content = "---\ntitle: Test: Example\n---\n\nContent"
- result = ensure_frontmatter(content)
- runner.assert_contains(
- result, '"Test: Example"', "Colon in frontmatter should be quoted"
- )
-
-
-def test_escape_text_angle_brackets_preserves_html(runner):
- """Test that known HTML tags are preserved"""
- text = "content
"
- result = escape_text_angle_brackets(text)
- runner.assert_contains(result, "", "div tag should be preserved")
-
-
-def test_escape_text_angle_brackets_escapes_placeholders(runner):
- """Test that placeholder text is escaped"""
- text = "
"
- result = escape_text_angle_brackets(text)
- runner.assert_contains(
- result, "<placeholder>", "Placeholder should be escaped"
- )
-
-
-def main():
- runner = TestRunner()
-
- # Get test to run from command line, or run all
- if len(sys.argv) > 1:
- test_name = sys.argv[1]
- test_func = globals().get(test_name)
- if test_func and callable(test_func):
- runner.run_test(test_func)
- else:
- print(f"Test '{test_name}' not found")
- return 1
- else:
- # Run all tests
- test_functions = [
- test_escape_angle_brackets_in_text,
- test_escape_angle_brackets_with_spaces,
- test_preserve_html_tags,
- test_preserve_code_blocks,
- test_preserve_inline_code,
- test_rewrite_relative_links,
- test_rewrite_numbered_directory_links,
- test_preserve_external_links,
- test_preserve_anchor_links,
- test_quote_yaml_value_with_colon,
- test_quote_yaml_value_without_special_chars,
- test_ensure_frontmatter_no_change_when_missing,
- test_ensure_frontmatter_preserves_existing,
- test_ensure_frontmatter_fixes_colons,
- test_escape_text_angle_brackets_preserves_html,
- test_escape_text_angle_brackets_escapes_placeholders,
- ]
-
- for test_func in test_functions:
- runner.run_test(test_func)
-
- return runner.summary()
-
-
-if __name__ == "__main__":
- sys.exit(main())
diff --git a/scripts/transform_content.py b/scripts/transform_content.py
deleted file mode 100755
index 24eec1f..0000000
--- a/scripts/transform_content.py
+++ /dev/null
@@ -1,750 +0,0 @@
-#!/usr/bin/env python3
-"""
-Transform documentation content for VitePress integration
-- Renames numbered directories (00_introduction -> introduction)
-- Rewrites internal links
-- Adds/fixes frontmatter
-- Converts RST to Markdown (if needed)
-"""
-
-import argparse
-import json
-import os
-import re
-import shutil
-from pathlib import Path
-
-
-def load_config(config_path):
- with open(config_path, "r") as f:
- return json.load(f)
-
-
-def transform_directory_structure(
- source_dir, target_dir, structure_map, special_files=None, media_dirs=None
-):
- """
- Transform directory structure based on mapping
- e.g., 00_introduction -> introduction
- """
- source_path = Path(source_dir)
- target_path = Path(target_dir)
- target_path.mkdir(parents=True, exist_ok=True)
-
- special_files = special_files or {}
- media_dirs = media_dirs or []
-
- if isinstance(structure_map, dict):
- # Structured transformation (e.g. with subdirectories specified in JSON file)
- for old_name, new_name in structure_map.items():
- old_path = source_path / old_name
- new_path = target_path / new_name
-
- if old_path.exists():
- print(f" Transforming: {old_name} -> {new_name}")
- shutil.copytree(old_path, new_path, dirs_exist_ok=True)
-
- for item in source_path.iterdir():
- if item.name in structure_map:
- continue
-
- if item.name in special_files:
- target_subdir = target_path / special_files[item.name]
- target_subdir.mkdir(parents=True, exist_ok=True)
- if item.is_file():
- print(f" Moving {item.name} to {special_files[item.name]}")
- shutil.copy2(item, target_subdir / item.name)
- elif item.is_dir():
- print(f" Moving {item.name} to {special_files[item.name]}")
- shutil.copytree(item, target_subdir / item.name, dirs_exist_ok=True)
- elif item.name in media_dirs:
- print(f" Copying media directory: {item.name}")
- shutil.copytree(item, target_path / item.name, dirs_exist_ok=True)
- elif item.is_file() and not item.name.startswith("_"):
- shutil.copy2(item, target_path / item.name)
- elif (
- item.is_dir()
- and not item.name.startswith("_")
- and not item.name.startswith(".")
- ):
- shutil.copytree(item, target_path / item.name, dirs_exist_ok=True)
- elif structure_map == "flat":
- # Flat structure. Only copy.
- print(f" Copying flat structure")
- for item in source_path.glob("*"):
- if item.is_file():
- shutil.copy2(item, target_path / item.name)
- elif item.is_dir():
- shutil.copytree(item, target_path / item.name, dirs_exist_ok=True)
- elif structure_map == "sphinx":
- # Sphinx structure. Copy and convert later
- print(f" Copying Sphinx structure (RST files)")
- for item in source_path.glob("*"):
- target_item = target_path / item.name
- if item.is_file():
- shutil.copy2(item, target_item)
- elif item.is_dir():
- shutil.copytree(item, target_item, dirs_exist_ok=True)
- else:
- # Default: simply copy
- shutil.copytree(source_path, target_path, dirs_exist_ok=True)
-
-
-def rewrite_links(
- content,
- repo_name,
- file_rel_path="",
- base_path="/projects",
- github_base="https://github.com/gardenlinux",
-):
- """
- Rewrite internal markdown links to work with VitePress structure
-
- Args:
- content: The markdown content
- repo_name: Name of the repository (e.g., "gardenlinux")
- file_rel_path: Relative path of the file within the repo (e.g., "introduction/index.md")
- base_path: Base path for projects (default: "/projects")
- github_base: Base URL for GitHub organization (default: "https://github.com/gardenlinux")
-
- Examples:
- [link](../01_developers/build.md) -> [link](/projects/gardenlinux/developers/build)
- [link](./intro.md) -> [link](/projects/gardenlinux/introduction/intro)
- [link](kernel.md) -> [link](/projects/gardenlinux/introduction/kernel) (when in introduction/)
- [link](/SECURITY.md) -> [link](https://github.com/gardenlinux/gardenlinux/blob/main/SECURITY.md)
- """
-
- file_dir = str(Path(file_rel_path).parent) if file_rel_path else ""
- if file_dir == ".":
- file_dir = ""
-
- def replace_link(match):
- text = match.group(1)
- link = match.group(2)
-
- if link.startswith("http://") or link.startswith("https://"):
- return match.group(0)
-
- if link.startswith("#"):
- return match.group(0)
-
- # Skip if already a /projects/ link
- if link.startswith(f"{base_path}/"):
- return match.group(0)
-
- # handle relative paths for .media directory
- if ".media/" in link:
- media_part = link
- while media_part.startswith("../"):
- media_part = media_part[3:]
- media_part = media_part.replace("./", "")
- new_link = f"{base_path}/{repo_name}/{media_part}"
- return f"[{text}]({new_link})"
-
- if link.startswith("../") or link.startswith("./"):
- stripped_link = link.replace(".md", "")
-
- # For ./ links (same directory), use the file's directory
- if link.startswith("./"):
- stripped_link = stripped_link.replace("./", "")
- if file_dir:
- new_link = f"{base_path}/{repo_name}/{file_dir}/{stripped_link}"
- else:
- new_link = f"{base_path}/{repo_name}/{stripped_link}"
- else:
- # For ../ links, check if they go outside docs/
- # Count how many levels up we go
- levels_up = link.count("../")
- stripped_link = stripped_link.replace("../", "")
-
- # Do we go outside docs/ ?
- if file_dir:
- dir_depth = len(file_dir.split("/"))
- if levels_up > dir_depth:
- # Link to GitHub
- new_link = f"{github_base}/{repo_name}/blob/main/{file_dir}"
- return f"[{text}]({new_link})"
-
- stripped_link = re.sub(r"\d+_(\w+)", r"\1", stripped_link)
- new_link = f"{base_path}/{repo_name}/{stripped_link}"
-
- return f"[{text}]({new_link})"
-
- # Handle absolute paths from root
- if link.startswith("/"):
- # If it's already pointing to /projects/, leave it
- if link.startswith(f"{base_path}/"):
- return match.group(0)
- # Otherwise, this is a link to a file outside docs/ - point to GitHub
- stripped_link = link.lstrip("/")
- new_link = f"{github_base}/{repo_name}/blob/main/{stripped_link}"
- return f"[{text}]({new_link})"
-
- # Handle simple filenames (same directory)
- if "/" not in link:
- stripped_link = link.replace(".md", "")
- # If we know the file's directory, use it
- if file_dir:
- new_link = f"{base_path}/{repo_name}/{file_dir}/{stripped_link}"
- else:
- new_link = f"{base_path}/{repo_name}/{stripped_link}"
- return f"[{text}]({new_link})"
-
- return match.group(0)
-
- # Apply transform
- content = re.sub(r"\[([^\]]+)\]\(([^)]+)\)", replace_link, content)
-
- def replace_html_media_link(match):
- attr_name = match.group(1)
- link = match.group(2)
-
- if link.startswith(f"{base_path}/"):
- return match.group(0)
- if ".media/" in link:
- media_part = link
- while media_part.startswith("../"):
- media_part = media_part[3:]
- media_part = media_part.replace("./", "")
- new_link = f"{base_path}/{repo_name}/{media_part}"
- return f'{attr_name}="{new_link}"'
- return match.group(0)
-
- content = re.sub(
- r'(src|srcset)="([^"]*\.media/[^"]*)"', replace_html_media_link, content
- )
-
- return content
-
-
-# def escape_angle_brackets(content):
-# """
-# Escape angle brackets that are not part of HTML tags.
-#
-# This is needed for content like "" which should be
-# displayed as text, not parsed as an HTML tag.
-#
-# Skip escaping inside:
-# - Code blocks (``` or indented)
-# - Inline code (``)
-# """
-# # Split content by code blocks and inline code to process only text parts
-# lines = content.split("\n")
-# result_lines = []
-# in_code_block = False
-#
-# for line in lines:
-# if line.strip().startswith("```"):
-# in_code_block = not in_code_block
-# result_lines.append(line)
-# continue
-#
-# if in_code_block:
-# result_lines.append(line)
-# continue
-#
-# if line.startswith(" ") or line.startswith("\t"):
-# result_lines.append(line)
-# continue
-#
-# parts = []
-# in_inline_code = False
-# current = ""
-# i = 0
-#
-# while i < len(line):
-# if line[i] == "`":
-# if current:
-# if in_inline_code:
-# parts.append(current)
-# else:
-# parts.append(escape_text_angle_brackets(current))
-# current = ""
-# parts.append("`")
-# in_inline_code = not in_inline_code
-# i += 1
-# else:
-# current += line[i]
-# i += 1
-#
-# if current:
-# if in_inline_code:
-# parts.append(current)
-# else:
-# parts.append(escape_text_angle_brackets(current))
-#
-# result_lines.append("".join(parts))
-#
-# return "\n".join(result_lines)
-#
-#
-# def escape_text_angle_brackets(text):
-# """
-# Escape angle brackets in plain text (not in code).
-# Only escape if they look like placeholders, not HTML tags.
-# """
-# import re
-#
-# known_html_tags = {
-# "a",
-# "b",
-# "i",
-# "u",
-# "p",
-# "div",
-# "span",
-# "br",
-# "hr",
-# "img",
-# "picture",
-# "source",
-# "table",
-# "tr",
-# "td",
-# "th",
-# "ul",
-# "ol",
-# "li",
-# "h1",
-# "h2",
-# "h3",
-# "h4",
-# "h5",
-# "h6",
-# "code",
-# "pre",
-# "blockquote",
-# "em",
-# "strong",
-# "del",
-# "ins",
-# "sub",
-# "sup",
-# "html",
-# "head",
-# "body",
-# "title",
-# "link",
-# "meta",
-# "script",
-# "style",
-# "nav",
-# "header",
-# "footer",
-# "section",
-# "article",
-# "aside",
-# "main",
-# "figure",
-# "figcaption",
-# "details",
-# "summary",
-# "video",
-# "audio",
-# "iframe",
-# "canvas",
-# "svg",
-# "path",
-# "form",
-# "input",
-# "button",
-# "select",
-# "option",
-# "textarea",
-# "label",
-# }
-#
-# def replace_bracket(match):
-# content = match.group(1)
-#
-# tag_content = content.strip()
-# if tag_content.startswith("/"):
-# tag_content = tag_content[1:]
-#
-# tag_name = (
-# tag_content.split()[0].lower()
-# if " " in tag_content
-# else tag_content.lower()
-# )
-#
-# if tag_name in known_html_tags:
-# return f"<{content}>"
-#
-# return f"<{content}>"
-#
-# text = re.sub(r"<([^>]+)>", replace_bracket, text)
-#
-# return text
-
-
-def ensure_frontmatter(content):
- """
- Ensure frontmatter exists and fix YAML formatting.
- Only fixes existing frontmatter - does not inject new fields.
-
- Args:
- content: The markdown content
- """
- # Check if frontmatter already exists
- if content.startswith("---\n"):
- try:
- end_match = re.search(r"\n---\n", content[4:])
- if end_match:
- frontmatter_content = content[4 : 4 + end_match.start()]
- rest_content = content[4 + end_match.end() :]
-
- # Parse and fix the frontmatter
- fixed_frontmatter = fix_yaml_frontmatter(frontmatter_content)
-
- return f"---\n{fixed_frontmatter}\n---\n\n{rest_content}"
- except Exception:
- print(f"[Warning] Frontmatter: Couldn't parse existing frontmatter!")
- pass
-
- return content
-
-
-def quote_yaml_value(value):
- """Quote YAML value if needed, handling already-quoted values."""
- # If value is already properly quoted, return as-is
- if value.startswith('"') and value.endswith('"'):
- # Check if it's properly quoted (not escaped quotes)
- if not value.startswith('"\\"'):
- return value
-
- if value.startswith("'") and value.endswith("'"):
- return value
-
- special_chars = [
- ":",
- "#",
- "@",
- "`",
- "|",
- ">",
- "*",
- "&",
- "!",
- "%",
- "[",
- "]",
- "{",
- "}",
- ",",
- "?",
- ]
-
- needs_quoting = any(char in value for char in special_chars)
-
- if value and (value[0] in ['"', "'", " "] or value[-1] in [" "]):
- needs_quoting = True
-
- if needs_quoting:
- # Don't escape quotes that are already inside the value
- # Just wrap in quotes
- if '"' not in value:
- return f'"{value}"'
- elif "'" not in value:
- return f"'{value}'"
- else:
- # If both quote types exist, escape double quotes and use them
- escaped_value = value.replace('"', '\\"')
- return f'"{escaped_value}"'
-
- return value
-
-
-def fix_yaml_frontmatter(frontmatter_text):
- lines = frontmatter_text.split("\n")
- fixed_lines = []
-
- for line in lines:
- if not line.strip():
- fixed_lines.append(line)
- continue
-
- # Check if line contains a key-value pair
- if ":" in line:
- parts = line.split(":", 1)
- if len(parts) == 2:
- key = parts[0].strip()
- value = parts[1].strip()
-
- quoted_value = quote_yaml_value(value)
- fixed_lines.append(f"{key}: {quoted_value}")
- continue
-
- fixed_lines.append(line)
-
- return "\n".join(fixed_lines)
-
-
-def fix_broken_project_links(
- content,
- repo_name,
- target_dir,
- base_path="/projects",
- github_base="https://github.com/gardenlinux",
-):
- """
- In case a link in /projects/ points to a file that doesn't exist,
- replace it with a GitHub link.
- """
- target_path = Path(target_dir)
-
- def check_and_fix_link(match):
- text = match.group(1)
- link = match.group(2)
-
- # Only process /projects/{repo}/ links
- if not link.startswith(f"{base_path}/{repo_name}/"):
- return match.group(0)
-
- # Extract the path after /projects/{repo}/
- rel_path = link[len(f"{base_path}/{repo_name}/") :]
-
- potential_file = target_path / f"{rel_path}.md"
- potential_index = target_path / rel_path / "index.md"
- potential_dir = target_path / rel_path
-
- # If file exists, or directory exists with index.md, keep the link
- if (
- potential_file.exists()
- or potential_index.exists()
- or (potential_dir.exists() and potential_dir.is_dir() and (potential_dir / "index.md").exists())
- ):
- return match.group(0)
-
- github_link = f"{github_base}/{repo_name}/blob/main/{rel_path}"
- return f"[{text}]({github_link})"
-
- content = re.sub(r"\[([^\]]+)\]\(([^)]+)\)", check_and_fix_link, content)
-
- return content
-
-
-def process_markdown_file(file_path, repo_name, target_dir, base_path="/projects"):
- """
- Process a single markdown file:
- - Escape angle brackets
- - Rewrite links
- - Fix broken project links
- - Fix frontmatter YAML formatting
-
- Args:
- file_path: Path to the markdown file
- repo_name: Name of the repository
- target_dir: Target directory where files are being processed
- base_path: Base path for projects (default: "/projects")
- """
- try:
- with open(file_path, "r", encoding="utf-8") as f:
- content = f.read()
-
- # Calculate relative path from target_dir
- file_path_obj = Path(file_path)
- target_path_obj = Path(target_dir)
- try:
- file_rel_path = str(file_path_obj.relative_to(target_path_obj))
- except ValueError:
- file_rel_path = ""
-
- # content = escape_angle_brackets(content)
- content = rewrite_links(content, repo_name, file_rel_path, base_path)
- content = fix_broken_project_links(content, repo_name, target_dir, base_path)
- content = ensure_frontmatter(content)
-
- with open(file_path, "w", encoding="utf-8") as f:
- f.write(content)
-
- return True
- except Exception as e:
- print(f" [Warning] Error processing {file_path}: {e}")
- return False
-
-
-def process_all_markdown(target_dir, repo_name):
- """
- Process all markdown files in target directory
-
- Args:
- target_dir: Target directory containing markdown files
- repo_name: Name of the repository
- """
- target_path = Path(target_dir)
-
- # Rename all README.md to index.md for VitePress
- readme_files = list(target_path.rglob("README.md"))
- for readme in readme_files:
- index_file = readme.parent / "index.md"
- if not index_file.exists():
- readme.rename(index_file)
- print(f" Renamed {readme.relative_to(target_path)} to index.md")
-
- md_files = list(target_path.rglob("*.md"))
-
- print(f" Processing {len(md_files)} markdown files...")
-
- success_count = 0
- for md_file in md_files:
- if process_markdown_file(md_file, repo_name, target_dir):
- success_count += 1
-
- print(f" [Success] Processed {success_count}/{len(md_files)} files successfully")
-
-
-def parse_frontmatter(content):
- """
- Parse YAML frontmatter from markdown content.
- Returns (frontmatter_dict, content_without_frontmatter) or (None, original_content)
-
- Uses simple key: value parsing (no external YAML library required).
- Handles the subset of YAML used in frontmatter: simple string key-value pairs.
- """
- if not content.startswith("---\n"):
- return None, content
-
- try:
- end_match = re.search(r"\n---\n", content[4:])
- if not end_match:
- return None, content
-
- frontmatter_text = content[4 : 4 + end_match.start()]
- rest_content = content[4 + end_match.end() :]
-
- frontmatter_dict = {}
- for line in frontmatter_text.split("\n"):
- line = line.strip()
- if not line:
- continue
- if ":" in line:
- key, value = line.split(":", 1)
- key = key.strip()
- value = value.strip().strip("\"'")
- frontmatter_dict[key] = value
-
- return frontmatter_dict, rest_content
- except Exception as e:
- print(f" [Warning] Failed to parse frontmatter: {e}")
- return None, content
-
-
-def copy_targeted_docs(source_dir, docs_dir, repo_name):
- """
- Copy markdown files with 'github_target_path:' frontmatter to their specified locations.
-
- Args:
- source_dir: Source directory containing fetched docs (e.g., /tmp/xxx/gardenlinux)
- docs_dir: Target docs directory (e.g., /path/to/docs-ng/docs)
- repo_name: Name of the repository for logging
- """
- source_path = Path(source_dir)
- docs_path = Path(docs_dir)
-
- if not source_path.exists():
- print(f" [Warning] Source directory not found: {source_dir}")
- return
-
- # Find all markdown files
- md_files = list(source_path.rglob("*.md"))
- targeted_files = []
-
- print(f" Scanning {len(md_files)} files for 'github_target_path:' frontmatter...")
-
- for md_file in md_files:
- try:
- with open(md_file, "r", encoding="utf-8") as f:
- content = f.read()
-
- frontmatter, _ = parse_frontmatter(content)
-
- # Check for 'github_target_path' in frontmatter
- if frontmatter and ("github_target_path" in frontmatter):
- target_path = frontmatter.get("github_target_path") or frontmatter.get("target")
-
- # Strip leading 'docs/' if present
- if target_path.startswith("docs/"):
- target_path = target_path[5:]
-
- target_file = docs_path / target_path
-
- # Create parent directories if needed
- target_file.parent.mkdir(parents=True, exist_ok=True)
-
- # Copy the file
- shutil.copy2(md_file, target_file)
-
- # Apply markdown processing (but not project-specific link rewriting)
- # These files live in main docs tree, not under /projects/
- # content = escape_angle_brackets(content)
- content = ensure_frontmatter(content)
-
- with open(target_file, "w", encoding="utf-8") as f:
- f.write(content)
-
- targeted_files.append((md_file.relative_to(source_path), target_path))
- print(f" ✓ Copied: {md_file.name} → {target_path}")
-
- except Exception as e:
- print(f" [Warning] Error processing {md_file.name}: {e}")
-
- if targeted_files:
- print(f" [Success] Copied {len(targeted_files)} targeted file(s)")
- else:
- print(f" No files with 'github_target_path:' frontmatter found")
-
-
-def transform_repo_docs(repo_config, docs_dir, temp_dir):
- """
- Transform documentation for a single repository
- """
- repo_name = repo_config["name"]
- print(f"\nTransforming docs for: {repo_name}")
-
- source_dir = os.path.join(temp_dir, repo_name)
- target_dir = os.path.join(docs_dir, repo_config["target_path"])
-
- structure = repo_config.get("structure", "flat")
- special_files = repo_config.get("special_files", {})
- media_dirs = repo_config.get("media_directories", [])
-
- # First, copy files with 'target:' frontmatter to their specified locations
- print(f"\n Step 2a: Processing targeted files...")
- copy_targeted_docs(source_dir, docs_dir, repo_name)
-
- # Then, do the standard structure transformation to projects/ directory
- print(f"\n Step 2b: Transforming project structure...")
- transform_directory_structure(
- source_dir, target_dir, structure, special_files, media_dirs
- )
- process_all_markdown(target_dir, repo_name)
-
- print(f"[Complete] Transformation complete for {repo_name}")
-
-
-def main():
- parser = argparse.ArgumentParser(description="Transform documentation content")
- parser.add_argument("--config", required=True, help="Path to repos-config.json")
- parser.add_argument("--docs-dir", required=True, help="Path to docs directory")
- parser.add_argument(
- "--temp-dir",
- required=True,
- help="Path to temporary directory with fetched docs",
- )
- parser.add_argument("--repo", help="Only transform specific repo (optional)")
-
- args = parser.parse_args()
-
- config = load_config(args.config)
-
- for repo in config["repos"]:
- if args.repo and repo["name"] != args.repo:
- continue
-
- transform_repo_docs(repo, args.docs_dir, args.temp_dir)
-
- print("\n[Complete] All transformations complete!")
-
-
-if __name__ == "__main__":
- main()
diff --git a/scripts/update_config.py.backup.202603250900 b/scripts/update_config.py.backup.202603250900
deleted file mode 100755
index c148ef9..0000000
--- a/scripts/update_config.py.backup.202603250900
+++ /dev/null
@@ -1,694 +0,0 @@
-#!/usr/bin/env python3
-"""
-Update VitePress configuration with dynamically generated sidebars
-for aggregated documentation from multiple repositories defined in repos-config.json.
-"""
-
-import argparse
-import json
-import re
-from pathlib import Path
-
-
-def load_config(config_path):
- with open(config_path, "r") as f:
- return json.load(f)
-
-
-def get_section_priority(section, priority_map):
- section_name = section.get("text", "").lower()
- for key, priority in priority_map.items():
- if key in section_name:
- return priority
- return 999
-
-
-def get_directory_structure(path, docs_dir=None):
- """
- Scan directory and build sidebar structure
- Returns list of sidebar items
- """
- items = []
- path = Path(path)
-
- if not path.exists():
- return items
-
- # If docs_dir not provided, use path.parent for backward compatibility
- if docs_dir is None:
- docs_dir = path.parent
-
- # Get all markdown files and directories
- # Sort with index or README files first, then alphabetically
- def sort_key(entry):
- if entry.name.lower() in ["index.md", "readme.md"]:
- return (0, entry.name)
- else:
- return (1, entry.name)
-
- entries = sorted(path.iterdir(), key=sort_key)
-
- # Track added index files for project
- index_added = False
-
- for entry in entries:
- if entry.name.startswith(".") or entry.name.startswith("_"):
- continue
-
- if entry.is_file() and entry.suffix == ".md":
- title = get_title_from_file(entry)
- if entry.name == "README.md" or entry.name == "index.md":
- # Add index files (prefer index.md over README.md)
- if not index_added:
- link = "/" + str(entry.parent.relative_to(docs_dir))
- if not link.endswith("/"):
- link += "/"
- items.append(
- {
- "text": title
- or entry.parent.name.replace("-", " ")
- .replace("_", " ")
- .title(),
- "link": link,
- }
- )
- index_added = True
- else:
- link = "/" + str(entry.relative_to(docs_dir)).replace(".md", "")
- items.append(
- {
- "text": title
- or entry.stem.replace("-", " ").replace("_", " ").title(),
- "link": link,
- }
- )
-
- elif entry.is_dir():
- sub_items = get_directory_structure(entry, docs_dir)
-
- if sub_items:
- dir_item = {
- "text": entry.name.replace("-", " ").replace("_", " ").title(),
- "collapsed": True,
- "items": sub_items,
- }
- items.append(dir_item)
-
- return items
-
-
-def get_title_from_file(file_path):
- """
- Extract title from markdown file (frontmatter or first heading)
- """
- try:
- with open(file_path, "r", encoding="utf-8") as f:
- content = f.read()
-
- frontmatter_match = re.search(
- r"^---\s*\ntitle:\s*(.+?)\s*\n", content, re.MULTILINE
- )
- if frontmatter_match:
- return frontmatter_match.group(1).strip()
-
- heading_match = re.search(r"^#\s+(.+)$", content, re.MULTILINE)
- if heading_match:
- return heading_match.group(1).strip()
- except Exception:
- pass
-
- return None
-
-
-def find_important_guides(repo_docs_path, docs_dir):
- """
- Find important guides like installation, quickstart, getting started, etc.
- Returns a dict with guide type as important and link as value
- """
- important_guides = {}
-
- guide_keywords = {
- "getting_started": [
- "getting_started",
- "getting-started",
- "gettingstarted",
- "get_started",
- "get-started",
- ],
- "quickstart": ["quickstart", "quick_start", "quick-start"],
- "installation": ["installation", "installing", "setup"],
- }
-
- for md_file in repo_docs_path.rglob("*.md"):
- if md_file.name.startswith(".") or md_file.name.startswith("_"):
- continue
-
- filename = md_file.stem.lower()
-
- for guide_type, keywords in guide_keywords.items():
- if guide_type not in important_guides:
- for keyword in keywords:
- if keyword in filename:
- rel_path = md_file.relative_to(Path(docs_dir))
- link = "/" + str(rel_path).replace(".md", "")
-
- title = get_title_from_file(md_file)
- if not title:
- title = (
- md_file.stem.replace("-", " ").replace("_", " ").title()
- )
-
- important_guides[guide_type] = {"link": link, "title": title}
- break
-
- return important_guides
-
-
-def create_missing_index_files(docs_dir, repos):
- """
- Create index.md files for directories that don't have them.
- This prevents dead links when linking to directory paths.
- Also fixes links in existing markdown files to add trailing slashes.
- """
- created_files = []
- directories_with_new_indexes = set()
-
- for repo in repos:
- target_path = repo["target_path"]
- repo_docs_path = Path(docs_dir) / target_path
-
- if not repo_docs_path.exists():
- continue
-
- for dirpath in repo_docs_path.rglob("*"):
- if not dirpath.is_dir():
- continue
-
- if dirpath.name.startswith(".") or dirpath.name.startswith("_"):
- continue
-
- has_index = (dirpath / "index.md").exists() or (
- dirpath / "README.md"
- ).exists()
-
- if not has_index:
- md_files = sorted([f for f in dirpath.glob("*.md") if f.is_file()])
-
- if md_files:
- index_path = dirpath / "index.md"
-
- dir_name = dirpath.name.replace("-", " ").replace("_", " ").title()
-
- content = f"# {dir_name}\n\n"
- content += f"This section contains the following guides:\n\n"
-
- for md_file in md_files:
- title = get_title_from_file(md_file)
- if not title:
- title = (
- md_file.stem.replace("-", " ").replace("_", " ").title()
- )
-
- link = md_file.stem
- content += f"- [{title}](./{link})\n"
-
- with open(index_path, "w", encoding="utf-8") as f:
- f.write(content)
-
- created_files.append(str(index_path.relative_to(docs_dir)))
- dir_path_str = "/" + str(dirpath.relative_to(docs_dir))
- directories_with_new_indexes.add(dir_path_str)
- print(
- f" [Success] Created index for: {dirpath.relative_to(docs_dir)}"
- )
-
- if directories_with_new_indexes:
- print("\n Fixing links to newly indexed directories...")
- for repo in repos:
- target_path = repo["target_path"]
- repo_docs_path = Path(docs_dir) / target_path
-
- if not repo_docs_path.exists():
- continue
-
- for md_file in repo_docs_path.rglob("*.md"):
- if not md_file.is_file():
- continue
-
- try:
- with open(md_file, "r", encoding="utf-8") as f:
- content = f.read()
-
- modified = False
- for dir_path in directories_with_new_indexes:
- # Look for links to this directory without trailing slash
- # Pattern: ](/path/to/dir) or ](/path/to/dir "title")
- import re
-
- pattern = re.compile(f"\\]\\({re.escape(dir_path)}(\\)|\\s)")
- if pattern.search(content):
- content = pattern.sub(f"]({dir_path}/\\1", content)
- modified = True
-
- if modified:
- with open(md_file, "w", encoding="utf-8") as f:
- f.write(content)
- print(
- f" [Success] Fixed links in: {md_file.relative_to(docs_dir)}"
- )
-
- except Exception as e:
- print(f" [Warning] Could not process {md_file}: {e}")
-
- return created_files
-
-
-def generate_sidebar_config(repo_config, docs_dir, section_priorities):
- """
- Generate sidebar configuration for a repository
- """
- repo_name = repo_config["name"]
- target_path = repo_config["target_path"]
-
- repo_docs_path = Path(docs_dir) / target_path
-
- if not repo_docs_path.exists():
- print(f" [Warning] Docs path not found for {repo_name}: {repo_docs_path}")
- return None
-
- print(f" Generating sidebar for: {repo_name}")
-
- items = get_directory_structure(repo_docs_path, Path(docs_dir))
-
- if not items:
- print(f" [Warning] No items found for {repo_name}")
- return None
-
- items = sorted(items, key=lambda s: get_section_priority(s, section_priorities))
-
- sidebar_path = f"/projects/{repo_name}/"
-
- key_guides = find_important_guides(repo_docs_path, docs_dir)
- print(f" Found key guides: {list(key_guides.keys())}")
-
- has_overview = (repo_docs_path / "index.md").exists() or (
- repo_docs_path / "README.md"
- ).exists()
- print(f" Has overview page: {has_overview}")
-
- return {
- "path": sidebar_path,
- "items": items,
- "key_guides": key_guides,
- "has_overview": has_overview,
- }
-
-
-def generate_nav_items(repos, sidebars):
- """
- Generate navigation dropdown items for projects
- For nav, we use simple links (not nested) since VitePress nav only supports 2 levels
- """
- nav_items = []
-
- # Create a map of repo name to sidebar for quick lookup
- sidebar_map = {s["path"].strip("/").split("/")[-1]: s for s in sidebars if s}
-
- for repo in repos:
- repo_name = repo["name"]
- # Use display name if configured, otherwise use repo name
- display_name = repo.get("display_name", repo_name.replace("-", " ").title())
-
- sidebar = sidebar_map.get(repo_name)
-
- link = None
-
- if sidebar:
- # Try to use one of the important guides as first link ("getting started", "quickstart", etc.)
- important_guides = sidebar.get("important_guides", {})
- for guide_type in ["getting_started", "quickstart", "installation"]:
- if guide_type in important_guides:
- link = important_guides[guide_type]["link"]
- break
-
- # If there is no important guide, check if there's an overview
- if not link and sidebar.get("has_overview"):
- link = f"/projects/{repo_name}/"
-
- # If still no link, use first section's first item
- if not link and sidebar.get("items") and len(sidebar["items"]) > 0:
- first_item = sidebar["items"][0]
- if "items" in first_item and len(first_item["items"]) > 0:
- link = first_item["items"][0].get("link")
- elif "link" in first_item:
- link = first_item["link"]
-
- if not link:
- link = f"/projects/{repo_name}/"
-
- nav_items.append({"text": display_name, "link": link})
-
- return nav_items
-
-
-def generate_technical_docs_sidebar_items(repos, sidebars):
- """
- Generate expandable sidebar items for the Technical Documentation section.
- Uses the full sidebar structure with proper expandable sections.
- """
- sidebar_items = []
-
- sidebar_map = {s["path"].strip("/").split("/")[-1]: s for s in sidebars if s}
-
- for repo in repos:
- repo_name = repo["name"]
- display_name = repo.get("display_name", repo_name.replace("-", " ").title())
-
- sidebar = sidebar_map.get(repo_name)
-
- if not sidebar or not sidebar.get("items"):
- # Simple link if no sidebar found
- sidebar_items.append(
- {"text": display_name, "link": f"/projects/{repo_name}/"}
- )
- continue
-
- project_item = {"text": display_name, "collapsed": True, "items": []}
-
- important_guides = sidebar.get("important_guides", {})
- guide_order = ["quickstart", "getting_started", "installation"]
- important_guide_links = set()
-
- for guide_type in guide_order:
- if guide_type in important_guides:
- guide = important_guides[guide_type]
- project_item["items"].append(
- {"text": guide["title"], "link": guide["link"]}
- )
- important_guide_links.add(guide["link"])
-
- # Add the full sidebar items (sections like Introduction, Developers, Operators)
- # Sort sections to put Introduction first
- sections = sidebar.get("items", [])
-
- section_priority = {
- "introduction": 0,
- "overview": 0,
- "developers": 1,
- "operators": 2,
- }
-
- def get_section_priority(section):
- section_name = section.get("text", "").lower()
- for key, priority in section_priority.items():
- if key in section_name:
- return priority
- return 999
-
- sorted_sections = sorted(sections, key=get_section_priority)
-
- for section in sorted_sections:
- filtered_section = filter_section_items(section, important_guide_links)
- if filtered_section:
- project_item["items"].append(filtered_section)
-
- sidebar_items.append(project_item)
-
- return sidebar_items
-
-
-def filter_section_items(section, exclude_links):
- """
- Recursively filter out items that are in the exclude_links set.
- Returns None if the section becomes empty after filtering.
- """
- if "items" in section:
- # This is a section with subitems
- filtered_items = []
- for item in section["items"]:
- filtered_item = filter_section_items(item, exclude_links)
- if filtered_item:
- filtered_items.append(filtered_item)
-
- if filtered_items:
- return {
- "text": section["text"],
- "collapsed": section.get("collapsed", True),
- "items": filtered_items,
- }
- else:
- return None
- elif "link" in section:
- # This is a direct link item
- if section["link"] not in exclude_links:
- return {"text": section["text"], "link": section["link"]}
- else:
- return None
- else:
- # Unknown structure, pass
- return section
-
-
-def format_items_as_typescript(items, indent_level=3):
- """
- Format items array as TypeScript code
-
- Args:
- items: List of item dictionaries
- indent_level: Indentation level (3 = 12 spaces for alignment in nav, 4 = 14 spaces in sidebar)
- """
- indent = " " * indent_level
- lines = []
-
- for item in items:
- if "items" in item:
- # Expandable item with subitems
- lines.append(f"{indent}{{")
- lines.append(f"{indent} text: '{item['text']}',")
- if "collapsed" in item:
- collapsed = "true" if item["collapsed"] else "false"
- lines.append(f"{indent} collapsed: {collapsed},")
- lines.append(f"{indent} items: [")
-
- # Process subitems (can be links or nested sections)
- for subitem in item["items"]:
- if "items" in subitem:
- # Nested section
- nested_lines = format_items_as_typescript(
- [subitem], indent_level + 2
- )
- lines.append(nested_lines)
- else:
- # Simple link: clean the title and add it
- title = subitem["text"].strip('"').replace("'", "\\'")
- lines.append(
- f"{indent} {{ text: '{title}', link: '{subitem['link']}' }},"
- )
-
- lines.append(f"{indent} ]")
- lines.append(f"{indent}}},")
- else:
- title = item["text"].strip('"').replace("'", "\\'")
- lines.append(f"{indent}{{ text: '{title}', link: '{item['link']}' }},")
-
- return "\n".join(lines)
-
-
-def update_vitepress_config(config_path, sidebars, nav_items, technical_docs_items):
- """
- Update VitePress config.mts file with generated sidebars and nav items
- Automatically replaces Technical Documentation sections in both nav and sidebar
- """
- print(f"\nUpdating VitePress config: {config_path}")
-
- try:
- with open(config_path, "r", encoding="utf-8") as f:
- lines = f.readlines()
- except FileNotFoundError:
- print(f" [ERROR] Config file not found: {config_path}")
- return False
-
- # Find and replace Technical Documentation sections
- i = 0
- sections_updated = 0
-
- while i < len(lines):
- line = lines[i]
-
- if "text: 'Technical Documentation'" in line:
- j = i + 1
- while j < len(lines) and "items: [" not in lines[j]:
- j += 1
-
- if j >= len(lines):
- i += 1
- continue
-
- items_line_indent = len(lines[j]) - len(lines[j].lstrip())
-
- k = j + 1
- bracket_count = 1
- while k < len(lines) and bracket_count > 0:
- bracket_count += lines[k].count("[") - lines[k].count("]")
- if bracket_count == 0:
- break
- k += 1
-
- if k >= len(lines):
- i += 1
- continue
-
- # k now points to the line with the closing ]
- # Determine which section this is (nav or sidebar) by checking if we're before or after 'sidebar:'
- is_nav_section = True
- for check_line in lines[:i]:
- if "sidebar:" in check_line:
- is_nav_section = False
- break
-
- # Generate the replacement content
- if is_nav_section:
- replacement_items = format_items_as_typescript(
- nav_items, indent_level=7
- )
- else:
- replacement_items = format_items_as_typescript(
- technical_docs_items, indent_level=6
- )
-
- new_lines = lines[: j + 1] + [replacement_items + "\n"] + lines[k:]
- lines = new_lines
-
- sections_updated += 1
- print(
- f" [Success] Updated Technical Documentation ({'nav' if is_nav_section else 'sidebar'} section)"
- )
-
- i = j + 2
- else:
- i += 1
-
- if sections_updated == 0:
- print(" [Warning] Could not find any Technical Documentation sections")
- return False
-
- # Update project-specific sidebars (e.g., '/projects/gardenlinux/')
- print("\n Updating project-specific sidebars...")
- for sidebar in sidebars:
- if not sidebar:
- continue
-
- project_path = sidebar["path"]
- project_items = sidebar["items"]
-
- i = 0
- while i < len(lines):
- if f"'{project_path}': [" in lines[i] or f'"{project_path}": [' in lines[i]:
- j = i
- while j < len(lines) and "[" not in lines[j]:
- j += 1
-
- if j >= len(lines):
- i += 1
- continue
-
- k = j + 1
- bracket_count = 1
- while k < len(lines) and bracket_count > 0:
- bracket_count += lines[k].count("[") - lines[k].count("]")
- if bracket_count == 0:
- break
- k += 1
-
- if k >= len(lines):
- i += 1
- continue
-
- replacement_items = format_items_as_typescript(
- project_items, indent_level=4
- )
-
- new_lines = lines[: j + 1] + [replacement_items + "\n"] + lines[k:]
- lines = new_lines
-
- sections_updated += 1
- print(f" [Success] Updated {project_path} sidebar")
-
- i = j + 2
- break
- else:
- i += 1
-
- try:
- with open(config_path, "w", encoding="utf-8") as f:
- f.writelines(lines)
- print(f" [Success] Successfully updated {config_path}")
- except Exception as e:
- print(f" [ERROR] Error writing config file: {e}")
- return False
-
- output_file = config_path.replace(".mts", ".generated.json")
- with open(output_file, "w", encoding="utf-8") as f:
- json.dump(
- {
- "sidebars": sidebars,
- "nav_items": nav_items,
- "technical_docs_sidebar_items": technical_docs_items,
- },
- f,
- indent=2,
- )
-
- print(f"\n Generated config also saved to: {output_file}")
- print(f"\n Summary:")
- print(f" - Project sidebars: {len([s for s in sidebars if s])}")
- print(f" - Nav items: {len(nav_items)}")
- print(f" - Technical docs sidebar items: {len(technical_docs_items)}")
- print(f" - Sections updated: {sections_updated}")
-
- return True
-
-
-def main():
- parser = argparse.ArgumentParser(description="Update VitePress configuration")
- parser.add_argument("--config", required=True, help="Path to repos-config.json")
- parser.add_argument("--docs-dir", required=True, help="Path to docs directory")
- parser.add_argument(
- "--vitepress-config", required=True, help="Path to VitePress config.mts"
- )
-
- args = parser.parse_args()
-
- print("Generating VitePress configuration...")
-
- config = load_config(args.config)
- section_priorities = config.get("section_priorities", {})
-
- print("\nChecking for directories without index files...")
- created_files = create_missing_index_files(args.docs_dir, config["repos"])
- if created_files:
- print(f" Created {len(created_files)} index file(s)")
- else:
- print(" All directories have index files")
-
- sidebars = []
- for repo in config["repos"]:
- sidebar = generate_sidebar_config(repo, args.docs_dir, section_priorities)
- if sidebar:
- sidebars.append(sidebar)
-
- nav_items = generate_nav_items(config["repos"], sidebars)
-
- technical_docs_items = generate_technical_docs_sidebar_items(
- config["repos"], sidebars
- )
-
- update_vitepress_config(
- args.vitepress_config, sidebars, nav_items, technical_docs_items
- )
-
- print("\n[Completed] Configuration update complete!")
-
-
-if __name__ == "__main__":
- main()
diff --git a/src/README.md b/src/README.md
new file mode 100644
index 0000000..f1b1a02
--- /dev/null
+++ b/src/README.md
@@ -0,0 +1,73 @@
+# Source Code Documentation
+
+## Structure
+
+```
+src/
+├── aggregate.py # CLI entry point
+├── migration_tracker.py # Standalone utility
+└── aggregation/ # Core package
+ ├── __init__.py
+ ├── models.py # Data classes
+ ├── config.py # Config I/O
+ ├── fetcher.py # Git + local fetch
+ ├── transformer.py # Content transforms
+ └── structure.py # Directory transforms
+```
+
+## Modules
+
+### `aggregation/models.py`
+Data classes for type safety:
+- `RepoConfig` — repository configuration
+- `AggregateResult` — fetch result with commit hash
+
+### `aggregation/config.py`
+Configuration file handling:
+- `load_config()` — parse repos-config.json
+- `save_config()` — write updated config (commit locks)
+
+### `aggregation/fetcher.py`
+Repository fetching:
+- `DocsFetcher` — main class
+ - `__init__(project_root, update_locks=False)` — initialize with optional commit lock updating
+ - `fetch()` — fetch repository and return result with commit hash
+ - `_fetch_remote()` — git sparse checkout from remote repository
+ - `_fetch_local()` — filesystem copy from local repository
+ - `_copy_docs()` — static method to copy docs directory
+ - `_copy_root_files()` — static method to copy root-level files (e.g., CONTRIBUTING.md)
+
+### `aggregation/transformer.py`
+Content transformation:
+- `rewrite_links()` — fix markdown links
+- `quote_yaml_value()` — YAML safety
+- `ensure_frontmatter()` — add/fix frontmatter
+- `parse_frontmatter()` — extract metadata
+- `fix_broken_project_links()` — validate links
+
+### `aggregation/structure.py`
+Directory operations:
+- `transform_directory_structure()` — restructure docs based on config
+- `copy_targeted_docs(source_dir, docs_dir, repo_name, media_dirs=None)` — place files via `github_target_path` frontmatter and copy associated media directories
+ - Handles nested media dirs (e.g., `tutorials/assets/`) by copying to same relative path
+ - Handles root-level media dirs (e.g., `_static/`) by copying to common ancestor of targeted files
+- `process_markdown_file()` — transform single markdown file
+- `process_all_markdown()` — batch process all markdown files in directory
+
+### `aggregate.py`
+CLI orchestration — combines all modules into workflow.
+
+## Usage
+
+```python
+from aggregation import load_config, DocsFetcher, process_all_markdown
+
+# Load config
+repos = load_config("repos-config.json")
+
+# Fetch docs
+fetcher = DocsFetcher(project_root)
+result = fetcher.fetch(repo, output_dir)
+
+# Transform
+process_all_markdown(target_dir, repo_name)
\ No newline at end of file
diff --git a/src/aggregate.py b/src/aggregate.py
new file mode 100755
index 0000000..2425f64
--- /dev/null
+++ b/src/aggregate.py
@@ -0,0 +1,233 @@
+#!/usr/bin/env python3
+"""
+Unified documentation aggregation script for docs-ng
+
+This script orchestrates documentation aggregation from multiple repositories.
+All heavy lifting is done by the aggregation package modules.
+"""
+
+import argparse
+import sys
+import tempfile
+from pathlib import Path
+
+from aggregation import (
+ load_config,
+ save_config,
+ DocsFetcher,
+ transform_directory_structure,
+ copy_targeted_docs,
+ process_all_markdown,
+)
+
+
+def transform_repo_docs(
+ repo,
+ docs_dir: Path,
+ temp_dir: Path,
+) -> bool:
+ """Transform documentation for a single repository."""
+ repo_name = repo.name
+ print(f"\n{'='*60}")
+ print(f"Transforming docs for: {repo_name}")
+ print(f"{'='*60}")
+
+ source_dir = temp_dir / repo_name
+ target_dir = docs_dir / repo.target_path
+
+ # Step 1: Copy files with 'github_target_path:' frontmatter
+ print(f"\nStep 1: Processing targeted files...")
+ copy_targeted_docs(str(source_dir), str(docs_dir), repo_name, repo.media_directories)
+
+ # Step 2: Transform project structure
+ print(f"\nStep 2: Transforming project structure...")
+ transform_directory_structure(
+ str(source_dir),
+ str(target_dir),
+ repo.structure,
+ repo.special_files,
+ repo.media_directories,
+ )
+
+ # Step 3: Process markdown files
+ print(f"\nStep 3: Processing markdown files...")
+ process_all_markdown(str(target_dir), repo_name)
+
+ print(f"\n✓ Transformation complete for {repo_name}")
+ return True
+
+
+def aggregate_repo(
+ repo,
+ docs_dir: Path,
+ temp_dir: Path,
+ fetcher: DocsFetcher,
+) -> tuple:
+ """
+ Aggregate documentation for a single repository.
+
+ Returns:
+ Tuple of (success, resolved_commit_hash)
+ """
+ print(f"\n{'='*60}")
+ print(f"Aggregating: {repo.name}")
+ print(f"{'='*60}")
+
+ # Create output directory for this repo
+ repo_output_dir = temp_dir / repo.name
+ repo_output_dir.mkdir(parents=True, exist_ok=True)
+
+ # Fetch the repository
+ result = fetcher.fetch(repo, repo_output_dir)
+
+ if not result.success:
+ print(f"✗ Failed to fetch {repo.name}")
+ return False, result.resolved_commit
+
+ # Transform the fetched docs
+ transform_success = transform_repo_docs(repo, docs_dir, temp_dir)
+
+ if not transform_success:
+ print(f"✗ Failed to transform {repo.name}")
+ return False, result.resolved_commit
+
+ return True, result.resolved_commit
+
+
+def main() -> int:
+ """Main entry point."""
+ parser = argparse.ArgumentParser(
+ description="Aggregate documentation from multiple repositories",
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ epilog="""
+Examples:
+ # Aggregate all repositories
+ %(prog)s
+
+ # Aggregate with local config (file:// URLs, no git)
+ %(prog)s --config repos-config.local.json
+
+ # Aggregate specific repository
+ %(prog)s --repo gardenlinux
+
+ # Update commit locks (fetch and update config with resolved commit hashes)
+ %(prog)s --update-locks
+ """,
+ )
+
+ parser.add_argument(
+ "--config",
+ default="repos-config.json",
+ help="Path to repos-config.json (default: repos-config.json)",
+ )
+ parser.add_argument(
+ "--docs-dir",
+ default="docs",
+ help="Path to docs directory (default: docs)",
+ )
+ parser.add_argument(
+ "--repo",
+ help="Only aggregate specific repository",
+ )
+ parser.add_argument(
+ "--update-locks",
+ action="store_true",
+ help="Update commit locks: fetch and update config with resolved commit hashes",
+ )
+
+ args = parser.parse_args()
+
+ # Determine script directory
+ script_dir = Path(__file__).parent.resolve()
+ project_root = script_dir.parent
+
+ # Resolve paths
+ # Config files are in project root, not in src/
+ if not Path(args.config).is_absolute():
+ config_path = project_root / args.config
+ else:
+ config_path = Path(args.config)
+
+ if not Path(args.docs_dir).is_absolute():
+ docs_dir = project_root / args.docs_dir
+ else:
+ docs_dir = Path(args.docs_dir)
+
+ # Load configuration
+ print(f"{'='*60}")
+ print("Garden Linux Documentation Aggregation")
+ print(f"{'='*60}\n")
+ print(f"Configuration: {config_path}")
+ print(f"Docs directory: {docs_dir}")
+ if args.repo:
+ print(f"Repository filter: {args.repo}")
+ if args.update_locks:
+ print("Update commit locks: ENABLED")
+ print()
+
+ repos = load_config(str(config_path))
+
+ # Create temporary directory for fetched docs
+ with tempfile.TemporaryDirectory() as temp_dir_str:
+ temp_dir = Path(temp_dir_str)
+ print(f"Temporary directory: {temp_dir}\n")
+
+ # Initialize fetcher
+ fetcher = DocsFetcher(project_root, update_locks=args.update_locks)
+
+ # Track resolved commits for locking
+ resolved_commits = {}
+ success_count = 0
+ fail_count = 0
+
+ # Aggregate each repository
+ for repo in repos:
+ # Filter by repo if specified
+ if args.repo and repo.name != args.repo:
+ continue
+
+ success, resolved_commit = aggregate_repo(
+ repo,
+ docs_dir,
+ temp_dir,
+ fetcher,
+ )
+
+ if success:
+ success_count += 1
+ if resolved_commit:
+ resolved_commits[repo.name] = resolved_commit
+ else:
+ fail_count += 1
+
+ # Update config with resolved commits if locking
+ if args.update_locks and resolved_commits:
+ print(f"\n{'='*60}")
+ print("Updating config with resolved commits...")
+ print(f"{'='*60}\n")
+
+ for repo in repos:
+ if repo.name in resolved_commits:
+ repo.commit = resolved_commits[repo.name]
+ print(f" {repo.name}: {resolved_commits[repo.name]}")
+
+ save_config(str(config_path), repos)
+ print(f"\n✓ Config updated: {config_path}")
+
+ # Summary
+ print(f"\n{'='*60}")
+ print("Documentation aggregation complete!")
+ print(f"{'='*60}\n")
+ print(f"Successful: {success_count}")
+ print(f"Failed: {fail_count}")
+
+ print("\nNext steps:")
+ print(" 1. Review the changes in docs/projects/")
+ print(" 2. Run 'make dev' or 'pnpm run docs:dev' to preview")
+ print(" 3. Commit the changes if satisfied")
+
+ return 0 if fail_count == 0 else 1
+
+
+if __name__ == "__main__":
+ sys.exit(main())
\ No newline at end of file
diff --git a/src/aggregation/__init__.py b/src/aggregation/__init__.py
new file mode 100644
index 0000000..98390e9
--- /dev/null
+++ b/src/aggregation/__init__.py
@@ -0,0 +1,38 @@
+"""Aggregation package for docs-ng documentation aggregation."""
+
+# Re-export commonly used functions for backward compatibility with tests
+from .transformer import (
+ rewrite_links,
+ ensure_frontmatter,
+ quote_yaml_value,
+ parse_frontmatter,
+)
+
+from .models import RepoConfig, AggregateResult
+from .config import load_config, save_config
+from .fetcher import DocsFetcher
+from .structure import (
+ transform_directory_structure,
+ copy_targeted_docs,
+ process_all_markdown,
+)
+
+__all__ = [
+ # Models
+ "RepoConfig",
+ "AggregateResult",
+ # Config
+ "load_config",
+ "save_config",
+ # Fetcher
+ "DocsFetcher",
+ # Transformer (for tests)
+ "rewrite_links",
+ "ensure_frontmatter",
+ "quote_yaml_value",
+ "parse_frontmatter",
+ # Structure
+ "transform_directory_structure",
+ "copy_targeted_docs",
+ "process_all_markdown",
+]
\ No newline at end of file
diff --git a/src/aggregation/config.py b/src/aggregation/config.py
new file mode 100644
index 0000000..16af74f
--- /dev/null
+++ b/src/aggregation/config.py
@@ -0,0 +1,71 @@
+"""Configuration loading and saving for documentation aggregation."""
+
+import json
+import sys
+from typing import Dict, List
+
+from .models import RepoConfig
+
+
+def load_config(config_path: str) -> List[RepoConfig]:
+ """
+ Load and validate repository configuration.
+
+ Args:
+ config_path: Path to JSON configuration file
+
+ Returns:
+ List of validated RepoConfig objects
+ """
+ try:
+ with open(config_path, "r", encoding="utf-8") as f:
+ config = json.load(f)
+
+ if "repos" not in config:
+ raise ValueError("Configuration must have 'repos' array")
+
+ repos = []
+ for repo_dict in config["repos"]:
+ repo = RepoConfig.from_dict(repo_dict)
+ repo.validate()
+ repos.append(repo)
+
+ return repos
+ except json.JSONDecodeError as e:
+ print(f"Error: Invalid JSON in config file: {e}", file=sys.stderr)
+ sys.exit(1)
+ except Exception as e:
+ print(f"Error loading config: {e}", file=sys.stderr)
+ sys.exit(1)
+
+
+def save_config(config_path: str, repos: List[RepoConfig]) -> None:
+ """
+ Save repository configuration to JSON file.
+
+ Args:
+ config_path: Path to JSON configuration file
+ repos: List of RepoConfig objects to save
+ """
+ # Build config dict
+ config = {
+ "repos": [
+ {
+ "name": repo.name,
+ "url": repo.url,
+ "docs_path": repo.docs_path,
+ "target_path": repo.target_path,
+ **({"ref": repo.ref} if repo.ref else {}),
+ **({"commit": repo.commit} if repo.commit else {}),
+ **({"root_files": repo.root_files} if repo.root_files else {}),
+ **({"structure": repo.structure} if repo.structure != "flat" else {}),
+ **({"special_files": repo.special_files} if repo.special_files else {}),
+ **({"media_directories": repo.media_directories} if repo.media_directories else {}),
+ }
+ for repo in repos
+ ]
+ }
+
+ with open(config_path, "w", encoding="utf-8") as f:
+ json.dump(config, f, indent=2)
+ f.write("\n")
\ No newline at end of file
diff --git a/src/aggregation/fetcher.py b/src/aggregation/fetcher.py
new file mode 100644
index 0000000..02dadcd
--- /dev/null
+++ b/src/aggregation/fetcher.py
@@ -0,0 +1,236 @@
+"""Repository fetching for documentation aggregation."""
+
+import shutil
+import subprocess
+import sys
+import tempfile
+from pathlib import Path
+from typing import Tuple, Optional
+
+from .models import RepoConfig, AggregateResult
+
+
+class DocsFetcher:
+ """Handles fetching documentation from remote or local repositories."""
+
+ def __init__(self, project_root: Path, update_locks: bool = False):
+ """
+ Initialize fetcher.
+
+ Args:
+ project_root: Root directory of docs-ng project
+ update_locks: Whether we're in update-locks mode (allows commit mismatches)
+ """
+ self.project_root = project_root
+ self.update_locks = update_locks
+
+ def fetch(self, repo: RepoConfig, output_dir: Path) -> AggregateResult:
+ """
+ Fetch documentation for a repository.
+
+ Args:
+ repo: Repository configuration
+ output_dir: Where to copy fetched files
+
+ Returns:
+ AggregateResult with success status and resolved commit
+ """
+ if repo.is_local:
+ success = self._fetch_local(repo, output_dir)
+ return AggregateResult(repo.name, success, None)
+ else:
+ success, commit = self._fetch_remote(repo, output_dir)
+ return AggregateResult(repo.name, success, commit)
+
+ def _fetch_remote(
+ self,
+ repo: RepoConfig,
+ output_dir: Path,
+ ) -> Tuple[bool, Optional[str]]:
+ """Fetch from remote repository using git sparse checkout."""
+ temp_dir = Path(tempfile.mkdtemp())
+
+ try:
+ print(f" Fetching from: {repo.url}")
+ print(f" Ref: {repo.ref}")
+ if repo.root_files:
+ print(f" Root files: {', '.join(repo.root_files)}")
+ print(f" Output: {output_dir}")
+
+ # Initialize sparse checkout
+ subprocess.run(["git", "init"], check=True, capture_output=True, cwd=temp_dir)
+ subprocess.run(
+ ["git", "remote", "add", "origin", repo.url],
+ check=True,
+ capture_output=True,
+ cwd=temp_dir,
+ )
+ subprocess.run(
+ ["git", "config", "core.sparseCheckout", "true"],
+ check=True,
+ capture_output=True,
+ cwd=temp_dir,
+ )
+
+ # Configure sparse checkout patterns
+ sparse_checkout_file = temp_dir / ".git" / "info" / "sparse-checkout"
+ with open(sparse_checkout_file, "w") as f:
+ f.write(f"{repo.docs_path}/*\n")
+ for root_file in repo.root_files:
+ f.write(f"{root_file}\n")
+
+ # Fetch and checkout
+ print(" Cloning (sparse checkout)...")
+ subprocess.run(
+ ["git", "fetch", "--depth=1", "origin", repo.ref],
+ check=True,
+ capture_output=True,
+ cwd=temp_dir,
+ )
+ subprocess.run(
+ ["git", "checkout", repo.ref],
+ check=True,
+ capture_output=True,
+ cwd=temp_dir,
+ )
+
+ # Get resolved commit hash
+ result = subprocess.run(
+ ["git", "rev-parse", "HEAD"],
+ check=True,
+ capture_output=True,
+ text=True,
+ cwd=temp_dir,
+ )
+ resolved_commit = result.stdout.strip()
+ print(f" Resolved commit: {resolved_commit}")
+
+ # Verify commit lock if specified
+ if repo.commit:
+ if resolved_commit != repo.commit:
+ if self.update_locks:
+ # In update-locks mode, commit mismatch is expected
+ print(f" Updating lock: {repo.commit[:8]} → {resolved_commit[:8]}")
+ else:
+ # In normal mode, commit mismatch is an error
+ print(f" Warning: Commit mismatch!", file=sys.stderr)
+ print(f" Expected: {repo.commit}", file=sys.stderr)
+ print(f" Got: {resolved_commit}", file=sys.stderr)
+ return False, resolved_commit
+ else:
+ print(f" ✓ Commit lock verified")
+
+ # Copy docs to output directory
+ docs_source = temp_dir / repo.docs_path
+ if docs_source.exists():
+ print(f" Copying docs to {output_dir}")
+ self._copy_docs(docs_source, output_dir)
+ else:
+ print(f" Warning: docs_path '{repo.docs_path}' not found in repository")
+
+ # Copy root files if specified
+ self._copy_root_files(temp_dir, repo.root_files, output_dir)
+
+ print(" ✓ Fetch complete")
+ return True, resolved_commit
+
+ except subprocess.CalledProcessError as e:
+ print(f" Error: Git command failed: {e}", file=sys.stderr)
+ if e.stderr:
+ print(f" {e.stderr.decode()}", file=sys.stderr)
+ return False, None
+ except Exception as e:
+ print(f" Error: {e}", file=sys.stderr)
+ return False, None
+ finally:
+ # Cleanup
+ shutil.rmtree(temp_dir, ignore_errors=True)
+
+ def _fetch_local(
+ self,
+ repo: RepoConfig,
+ output_dir: Path,
+ ) -> bool:
+ """Fetch from local repository via direct filesystem copy."""
+ try:
+ # Resolve repo path (handle relative paths)
+ repo_path = Path(repo.local_path)
+ if not repo_path.is_absolute():
+ repo_abs_path = (self.project_root / repo_path).resolve()
+ else:
+ repo_abs_path = repo_path.resolve()
+
+ print(f" Copying from: {repo_abs_path}")
+ if repo.root_files:
+ print(f" Root files: {', '.join(repo.root_files)}")
+ print(f" Output: {output_dir}")
+
+ if not repo_abs_path.exists():
+ print(f" Error: Local repository not found: {repo_abs_path}", file=sys.stderr)
+ return False
+
+ # Copy docs directory
+ docs_source = repo_abs_path / repo.docs_path
+ if docs_source.exists():
+ print(f" Copying docs from {repo.docs_path}/")
+ self._copy_docs(docs_source, output_dir)
+ else:
+ print(f" Warning: docs_path '{repo.docs_path}' not found in local repository")
+
+ # Copy root files if specified
+ self._copy_root_files(repo_abs_path, repo.root_files, output_dir)
+
+ print(" ✓ Copy complete")
+ return True
+
+ except Exception as e:
+ print(f" Error: {e}", file=sys.stderr)
+ return False
+
+ @staticmethod
+ def _copy_docs(source: Path, dest: Path) -> None:
+ """
+ Copy documentation directory contents.
+
+ Args:
+ source: Source docs directory
+ dest: Destination directory
+ """
+ dest.mkdir(parents=True, exist_ok=True)
+
+ # Copy all regular files and directories
+ for item in source.iterdir():
+ target = dest / item.name
+ if item.is_file():
+ shutil.copy2(item, target)
+ elif item.is_dir():
+ shutil.copytree(item, target, dirs_exist_ok=True)
+
+ # Also copy hidden directories (like .media)
+ for item in source.glob(".*"):
+ if item.is_dir() and item.name not in [".", ".."]:
+ target = dest / item.name
+ shutil.copytree(item, target, dirs_exist_ok=True)
+
+ @staticmethod
+ def _copy_root_files(repo_root: Path, root_files: list, dest: Path) -> None:
+ """
+ Copy specified root-level files from repository.
+
+ Args:
+ repo_root: Root directory of the repository
+ root_files: List of filenames to copy
+ dest: Destination directory
+ """
+ if not root_files:
+ return
+
+ print(" Copying root files")
+ for filename in root_files:
+ src = repo_root / filename
+ if src.exists():
+ target = dest / src.name
+ shutil.copy2(src, target)
+ print(f" ✓ {filename}")
+ else:
+ print(f" Warning: {filename} not found")
diff --git a/src/aggregation/models.py b/src/aggregation/models.py
new file mode 100644
index 0000000..de4133e
--- /dev/null
+++ b/src/aggregation/models.py
@@ -0,0 +1,68 @@
+"""Data models for documentation aggregation."""
+
+from dataclasses import dataclass, field
+from typing import Dict, List, Optional, Union
+
+
+@dataclass
+class RepoConfig:
+ """Configuration for a single repository."""
+
+ name: str
+ url: str
+ docs_path: str
+ target_path: str
+ ref: Optional[str] = None
+ commit: Optional[str] = None
+ root_files: List[str] = field(default_factory=list)
+ structure: Union[str, Dict[str, str]] = "flat"
+ special_files: Dict[str, str] = field(default_factory=dict)
+ media_directories: List[str] = field(default_factory=list)
+
+ @property
+ def is_local(self) -> bool:
+ """Check if this is a local file:// repository."""
+ return self.url.startswith("file://")
+
+ @property
+ def is_remote(self) -> bool:
+ """Check if this is a remote https:// repository."""
+ return self.url.startswith("https://")
+
+ @property
+ def local_path(self) -> str:
+ """Get local path by stripping file:// prefix."""
+ return self.url[7:] if self.is_local else ""
+
+ def validate(self) -> None:
+ """Validate repository configuration."""
+ if not (self.is_local or self.is_remote):
+ raise ValueError(f"Invalid URL scheme for {self.name}: {self.url}")
+
+ if self.is_remote and not self.ref:
+ raise ValueError(f"Remote repository {self.name} must have 'ref' field")
+
+ @classmethod
+ def from_dict(cls, data: Dict) -> "RepoConfig":
+ """Create RepoConfig from dictionary."""
+ return cls(
+ name=data["name"],
+ url=data["url"],
+ docs_path=data["docs_path"],
+ target_path=data["target_path"],
+ ref=data.get("ref"),
+ commit=data.get("commit"),
+ root_files=data.get("root_files", []),
+ structure=data.get("structure", "flat"),
+ special_files=data.get("special_files", {}),
+ media_directories=data.get("media_directories", []),
+ )
+
+
+@dataclass
+class AggregateResult:
+ """Result of aggregating a single repository."""
+
+ repo_name: str
+ success: bool
+ resolved_commit: Optional[str] = None
\ No newline at end of file
diff --git a/src/aggregation/structure.py b/src/aggregation/structure.py
new file mode 100644
index 0000000..8a39a66
--- /dev/null
+++ b/src/aggregation/structure.py
@@ -0,0 +1,259 @@
+"""Directory structure transformation and markdown processing."""
+
+import shutil
+from pathlib import Path
+from typing import Dict, List, Optional
+
+from .transformer import (
+ rewrite_links,
+ fix_broken_project_links,
+ ensure_frontmatter,
+ parse_frontmatter,
+)
+
+
+def transform_directory_structure(
+ source_dir: str,
+ target_dir: str,
+ structure_map,
+ special_files: Optional[Dict] = None,
+ media_dirs: Optional[List[str]] = None,
+) -> None:
+ """
+ Transform directory structure based on mapping.
+
+ Args:
+ source_dir: Source directory with fetched docs
+ target_dir: Target directory in docs/projects/
+ structure_map: Directory structure mapping or copy mode
+ special_files: Map of files to move to specific locations
+ media_dirs: List of media directories to preserve
+ """
+ source_path = Path(source_dir)
+ target_path = Path(target_dir)
+ target_path.mkdir(parents=True, exist_ok=True)
+
+ special_files = special_files or {}
+ media_dirs = media_dirs or []
+
+ if isinstance(structure_map, dict):
+ # Structured transformation with subdirectories specified
+ for old_name, new_name in structure_map.items():
+ old_path = source_path / old_name
+ new_path = target_path / new_name
+
+ if old_path.exists():
+ print(f" Transforming: {old_name} -> {new_name}")
+ shutil.copytree(old_path, new_path, dirs_exist_ok=True)
+
+ # Handle special files
+ for item in source_path.iterdir():
+ if item.name in structure_map:
+ continue
+
+ if item.name in special_files:
+ target_subdir = target_path / special_files[item.name]
+ target_subdir.mkdir(parents=True, exist_ok=True)
+ if item.is_file():
+ print(f" Moving {item.name} to {special_files[item.name]}")
+ shutil.copy2(item, target_subdir / item.name)
+ elif item.is_dir():
+ print(f" Moving {item.name} to {special_files[item.name]}")
+ shutil.copytree(item, target_subdir / item.name, dirs_exist_ok=True)
+ elif item.name in media_dirs:
+ print(f" Copying media directory: {item.name}")
+ shutil.copytree(item, target_path / item.name, dirs_exist_ok=True)
+ elif item.is_file() and not item.name.startswith("_"):
+ shutil.copy2(item, target_path / item.name)
+ elif (
+ item.is_dir()
+ and not item.name.startswith("_")
+ and not item.name.startswith(".")
+ ):
+ shutil.copytree(item, target_path / item.name, dirs_exist_ok=True)
+
+ else:
+ # Flat/sphinx structure - copy all files as-is (merged logic)
+ print(f" Copying {structure_map} structure")
+ for item in source_path.glob("*"):
+ target_item = target_path / item.name
+ if item.is_file():
+ shutil.copy2(item, target_item)
+ elif item.is_dir():
+ shutil.copytree(item, target_item, dirs_exist_ok=True)
+
+
+def copy_targeted_docs(source_dir: str, docs_dir: str, repo_name: str, media_dirs: Optional[List[str]] = None) -> None:
+ """
+ Copy markdown files with 'github_target_path:' frontmatter to their specified locations.
+ Also copies media directories to the common target path of targeted files.
+
+ Args:
+ source_dir: Source directory with fetched docs
+ docs_dir: Docs root directory
+ repo_name: Repository name
+ media_dirs: List of media directories to copy alongside targeted files
+ """
+ source_path = Path(source_dir)
+ docs_path = Path(docs_dir)
+
+ if not source_path.exists():
+ print(f" [Warning] Source directory not found: {source_dir}")
+ return
+
+ # Find all markdown files
+ md_files = list(source_path.rglob("*.md"))
+ targeted_files = []
+
+ print(f" Scanning {len(md_files)} files for 'github_target_path:' frontmatter...")
+
+ for md_file in md_files:
+ try:
+ with open(md_file, "r", encoding="utf-8") as f:
+ content = f.read()
+
+ frontmatter, _ = parse_frontmatter(content)
+
+ # Check for 'github_target_path' in frontmatter
+ if frontmatter and ("github_target_path" in frontmatter):
+ target_path = frontmatter.get("github_target_path") or frontmatter.get("target")
+
+ # Strip leading 'docs/' if present
+ if target_path.startswith("docs/"):
+ target_path = target_path[5:]
+
+ target_file = docs_path / target_path
+
+ # Create parent directories if needed
+ target_file.parent.mkdir(parents=True, exist_ok=True)
+
+ # Copy the file
+ shutil.copy2(md_file, target_file)
+
+ # Apply markdown processing
+ content = ensure_frontmatter(content)
+
+ with open(target_file, "w", encoding="utf-8") as f:
+ f.write(content)
+
+ targeted_files.append((md_file.relative_to(source_path), target_path))
+ print(f" ✓ Copied: {md_file.name} → {target_path}")
+
+ except Exception as e:
+ print(f" [Warning] Error processing {md_file.name}: {e}")
+
+ if targeted_files:
+ print(f" ✓ Copied {len(targeted_files)} targeted file(s)")
+
+ # Copy media directories to maintain relative paths with targeted files
+ if media_dirs:
+ print(f" Copying media directories recursively...")
+
+ # Compute common ancestor of all targeted files for root-level media
+ target_paths = [Path(target_path) for _, target_path in targeted_files]
+ common_parent = None
+ if target_paths:
+ # Get all parent directories and find the most common one
+ all_parents = [list(p.parents) for p in target_paths]
+ if all_parents:
+ # Find the deepest common ancestor
+ for p in target_paths[0].parents:
+ if all(p in parents for parents in all_parents):
+ common_parent = p
+ break
+
+ for media_dir_name in media_dirs:
+ # Recursively find all instances of this media directory in the source
+ for media_dir in source_path.rglob(media_dir_name):
+ if media_dir.is_dir():
+ # Calculate relative path from source_path
+ rel_path = media_dir.relative_to(source_path)
+
+ # Determine if this is a root-level or nested media directory
+ if len(rel_path.parts) == 1:
+ # Root-level media directory: copy to common ancestor of targeted files
+ if common_parent and common_parent != Path('.'):
+ target_media = docs_path / common_parent / media_dir_name
+ target_media.parent.mkdir(parents=True, exist_ok=True)
+ shutil.copytree(media_dir, target_media, dirs_exist_ok=True)
+ print(f" ✓ Copied media: {common_parent / media_dir_name}")
+ else:
+ # Nested media directory: copy to same relative path
+ target_media = docs_path / rel_path
+ target_media.parent.mkdir(parents=True, exist_ok=True)
+ shutil.copytree(media_dir, target_media, dirs_exist_ok=True)
+ print(f" ✓ Copied media: {rel_path}")
+ else:
+ print(" No files with 'github_target_path:' frontmatter found")
+
+
+def process_markdown_file(
+ file_path: Path,
+ repo_name: str,
+ target_dir: str,
+ base_path: str = "/projects",
+) -> bool:
+ """
+ Process a single markdown file: rewrite links, fix frontmatter.
+
+ Args:
+ file_path: Path to markdown file
+ repo_name: Repository name
+ target_dir: Target directory path
+ base_path: Base path for projects
+
+ Returns:
+ Success status
+ """
+ try:
+ with open(file_path, "r", encoding="utf-8") as f:
+ content = f.read()
+
+ # Calculate relative path from target_dir
+ file_path_obj = Path(file_path)
+ target_path_obj = Path(target_dir)
+ try:
+ file_rel_path = str(file_path_obj.relative_to(target_path_obj))
+ except ValueError:
+ file_rel_path = ""
+
+ content = rewrite_links(content, repo_name, file_rel_path, base_path)
+ content = fix_broken_project_links(content, repo_name, target_dir, base_path)
+ content = ensure_frontmatter(content)
+
+ with open(file_path, "w", encoding="utf-8") as f:
+ f.write(content)
+
+ return True
+ except Exception as e:
+ print(f" [Warning] Error processing {file_path}: {e}")
+ return False
+
+
+def process_all_markdown(target_dir: str, repo_name: str) -> None:
+ """
+ Process all markdown files in target directory.
+
+ Args:
+ target_dir: Target directory path
+ repo_name: Repository name
+ """
+ target_path = Path(target_dir)
+
+ # Rename all README.md to index.md for VitePress
+ readme_files = list(target_path.rglob("README.md"))
+ for readme in readme_files:
+ index_file = readme.parent / "index.md"
+ if not index_file.exists():
+ readme.rename(index_file)
+ print(f" Renamed {readme.relative_to(target_path)} to index.md")
+
+ md_files = list(target_path.rglob("*.md"))
+ print(f" Processing {len(md_files)} markdown files...")
+
+ success_count = 0
+ for md_file in md_files:
+ if process_markdown_file(md_file, repo_name, target_dir):
+ success_count += 1
+
+ print(f" ✓ Processed {success_count}/{len(md_files)} files successfully")
diff --git a/src/aggregation/transformer.py b/src/aggregation/transformer.py
new file mode 100644
index 0000000..b9319d1
--- /dev/null
+++ b/src/aggregation/transformer.py
@@ -0,0 +1,327 @@
+"""Content transformation functions for documentation aggregation."""
+
+import re
+from pathlib import Path
+from typing import Optional, Dict, Tuple
+
+
+def rewrite_links(
+ content: str,
+ repo_name: str,
+ file_rel_path: str = "",
+ base_path: str = "/projects",
+ github_base: str = "https://github.com/gardenlinux",
+) -> str:
+ """
+ Rewrite internal markdown links to work with VitePress structure.
+
+ Args:
+ content: The markdown content
+ repo_name: Name of the repository
+ file_rel_path: Relative path of the file within the repo
+ base_path: Base path for projects
+ github_base: Base URL for GitHub organization
+
+ Returns:
+ Content with rewritten links
+ """
+ file_dir = str(Path(file_rel_path).parent) if file_rel_path else ""
+ if file_dir == ".":
+ file_dir = ""
+
+ def replace_link(match):
+ text = match.group(1)
+ link = match.group(2)
+
+ # Skip external links
+ if link.startswith("http://") or link.startswith("https://"):
+ return match.group(0)
+
+ # Skip anchors
+ if link.startswith("#"):
+ return match.group(0)
+
+ # Skip if already a /projects/ link
+ if link.startswith(f"{base_path}/"):
+ return match.group(0)
+
+ # Handle relative paths for .media directory
+ if ".media/" in link:
+ media_part = link
+ while media_part.startswith("../"):
+ media_part = media_part[3:]
+ media_part = media_part.replace("./", "")
+ new_link = f"{base_path}/{repo_name}/{media_part}"
+ return f"[{text}]({new_link})"
+
+ # Handle relative links
+ if link.startswith("../") or link.startswith("./"):
+ stripped_link = link.replace(".md", "")
+
+ # For ./ links (same directory)
+ if link.startswith("./"):
+ stripped_link = stripped_link.replace("./", "")
+ if file_dir:
+ new_link = f"{base_path}/{repo_name}/{file_dir}/{stripped_link}"
+ else:
+ new_link = f"{base_path}/{repo_name}/{stripped_link}"
+ else:
+ # For ../ links, check if they go outside docs/
+ levels_up = link.count("../")
+ stripped_link = stripped_link.replace("../", "")
+
+ # Check if we go outside docs/
+ if file_dir:
+ dir_depth = len(file_dir.split("/"))
+ if levels_up > dir_depth:
+ # Link to GitHub
+ new_link = f"{github_base}/{repo_name}/blob/main/{stripped_link}"
+ return f"[{text}]({new_link})"
+
+ # Remove numbered prefixes
+ stripped_link = re.sub(r"\d+_(\w+)", r"\1", stripped_link)
+ new_link = f"{base_path}/{repo_name}/{stripped_link}"
+
+ return f"[{text}]({new_link})"
+
+ # Handle absolute paths from root
+ if link.startswith("/"):
+ if link.startswith(f"{base_path}/"):
+ return match.group(0)
+ # Link to file outside docs/ - point to GitHub
+ stripped_link = link.lstrip("/")
+ new_link = f"{github_base}/{repo_name}/blob/main/{stripped_link}"
+ return f"[{text}]({new_link})"
+
+ # Handle simple filenames (same directory)
+ if "/" not in link:
+ stripped_link = link.replace(".md", "")
+ if file_dir:
+ new_link = f"{base_path}/{repo_name}/{file_dir}/{stripped_link}"
+ else:
+ new_link = f"{base_path}/{repo_name}/{stripped_link}"
+ return f"[{text}]({new_link})"
+
+ return match.group(0)
+
+ # Apply transform to markdown links
+ content = re.sub(r"\[([^\]]+)\]\(([^)]+)\)", replace_link, content)
+
+ # Handle HTML media links
+ def replace_html_media_link(match):
+ attr_name = match.group(1)
+ link = match.group(2)
+
+ if link.startswith(f"{base_path}/"):
+ return match.group(0)
+ if ".media/" in link:
+ media_part = link
+ while media_part.startswith("../"):
+ media_part = media_part[3:]
+ media_part = media_part.replace("./", "")
+ new_link = f"{base_path}/{repo_name}/{media_part}"
+ return f'{attr_name}="{new_link}"'
+ return match.group(0)
+
+ content = re.sub(
+ r'(src|srcset)="([^"]*\.media/[^"]*)"',
+ replace_html_media_link,
+ content,
+ )
+
+ return content
+
+
+def quote_yaml_value(value: str) -> str:
+ """
+ Quote YAML value if needed, handling already-quoted values.
+
+ Args:
+ value: YAML value to potentially quote
+
+ Returns:
+ Quoted value if needed, otherwise original value
+ """
+ # If value is already properly quoted, return as-is
+ if value.startswith('"') and value.endswith('"'):
+ if not value.startswith('"\\"'):
+ return value
+
+ if value.startswith("'") and value.endswith("'"):
+ return value
+
+ special_chars = [
+ ":", "#", "@", "`", "|", ">", "*", "&", "!",
+ "%", "[", "]", "{", "}", ",", "?",
+ ]
+
+ needs_quoting = any(char in value for char in special_chars)
+
+ if value and (value[0] in ['"', "'", " "] or value[-1] in [" "]):
+ needs_quoting = True
+
+ if needs_quoting:
+ if '"' not in value:
+ return f'"{value}"'
+ elif "'" not in value:
+ return f"'{value}'"
+ else:
+ escaped_value = value.replace('"', '\\"')
+ return f'"{escaped_value}"'
+
+ return value
+
+
+def parse_frontmatter(content: str) -> Tuple[Optional[Dict[str, str]], str]:
+ """
+ Parse YAML frontmatter from markdown content.
+
+ Args:
+ content: Markdown content potentially with frontmatter
+
+ Returns:
+ Tuple of (frontmatter_dict, content_without_frontmatter)
+ or (None, original_content) if no frontmatter found.
+ """
+ if not content.startswith("---\n"):
+ return None, content
+
+ try:
+ end_match = re.search(r"\n---\n", content[4:])
+ if not end_match:
+ return None, content
+
+ frontmatter_text = content[4 : 4 + end_match.start()]
+ rest_content = content[4 + end_match.end() :]
+
+ frontmatter_dict = {}
+ for line in frontmatter_text.split("\n"):
+ line = line.strip()
+ if not line or ":" not in line:
+ continue
+
+ key, value = line.split(":", 1)
+ key = key.strip()
+ value = value.strip().strip("\"'")
+ frontmatter_dict[key] = value
+
+ return frontmatter_dict, rest_content
+ except Exception as e:
+ print(f" [Warning] Failed to parse frontmatter: {e}")
+ return None, content
+
+
+def fix_yaml_frontmatter(frontmatter_text: str) -> str:
+ """
+ Fix YAML frontmatter formatting.
+
+ Args:
+ frontmatter_text: Frontmatter content (without --- markers)
+
+ Returns:
+ Fixed frontmatter text
+ """
+ lines = frontmatter_text.split("\n")
+ fixed_lines = []
+
+ for line in lines:
+ if not line.strip():
+ fixed_lines.append(line)
+ continue
+
+ if ":" in line:
+ parts = line.split(":", 1)
+ if len(parts) == 2:
+ key = parts[0].strip()
+ value = parts[1].strip()
+ quoted_value = quote_yaml_value(value)
+ fixed_lines.append(f"{key}: {quoted_value}")
+ continue
+
+ fixed_lines.append(line)
+
+ return "\n".join(fixed_lines)
+
+
+def ensure_frontmatter(content: str) -> str:
+ """
+ Ensure frontmatter exists and fix YAML formatting.
+
+ Args:
+ content: Markdown content
+
+ Returns:
+ Content with fixed frontmatter
+ """
+ if content.startswith("---\n"):
+ try:
+ end_match = re.search(r"\n---\n", content[4:])
+ if end_match:
+ frontmatter_content = content[4 : 4 + end_match.start()]
+ rest_content = content[4 + end_match.end() :]
+
+ # Parse and fix the frontmatter
+ fixed_frontmatter = fix_yaml_frontmatter(frontmatter_content)
+
+ return f"---\n{fixed_frontmatter}\n---\n\n{rest_content}"
+ except Exception:
+ print(" [Warning] Couldn't parse existing frontmatter!")
+
+ return content
+
+
+def fix_broken_project_links(
+ content: str,
+ repo_name: str,
+ target_dir: str,
+ base_path: str = "/projects",
+ github_base: str = "https://github.com/gardenlinux",
+) -> str:
+ """
+ Fix links in /projects/ that point to non-existent files.
+ Replace with GitHub links.
+
+ Args:
+ content: Markdown content
+ repo_name: Repository name
+ target_dir: Target directory path
+ base_path: Base path for projects
+ github_base: GitHub base URL
+
+ Returns:
+ Content with fixed links
+ """
+ target_path = Path(target_dir)
+
+ def check_and_fix_link(match):
+ text = match.group(1)
+ link = match.group(2)
+
+ # Only process /projects/{repo}/ links
+ if not link.startswith(f"{base_path}/{repo_name}/"):
+ return match.group(0)
+
+ # Extract the path after /projects/{repo}/
+ rel_path = link[len(f"{base_path}/{repo_name}/") :]
+
+ potential_file = target_path / f"{rel_path}.md"
+ potential_index = target_path / rel_path / "index.md"
+ potential_dir = target_path / rel_path
+
+ # If file exists, or directory exists with index.md, keep the link
+ if (
+ potential_file.exists()
+ or potential_index.exists()
+ or (
+ potential_dir.exists()
+ and potential_dir.is_dir()
+ and (potential_dir / "index.md").exists()
+ )
+ ):
+ return match.group(0)
+
+ github_link = f"{github_base}/{repo_name}/blob/main/{rel_path}"
+ return f"[{text}]({github_link})"
+
+ content = re.sub(r"\[([^\]]+)\]\(([^)]+)\)", check_and_fix_link, content)
+ return content
diff --git a/scripts/migration_tracker.py b/src/migration_tracker.py
similarity index 100%
rename from scripts/migration_tracker.py
rename to src/migration_tracker.py
diff --git a/tests/README.md b/tests/README.md
new file mode 100644
index 0000000..512338f
--- /dev/null
+++ b/tests/README.md
@@ -0,0 +1,59 @@
+# Test Suite
+
+## Structure
+
+```
+tests/
+├── conftest.py # pytest configuration
+├── fixtures/ # Test data
+├── unit/ # Unit tests (pure functions)
+│ ├── test_config.py
+│ ├── test_models.py
+│ └── test_transformer.py
+└── integration/ # Integration tests (filesystem)
+ └── test_aggregation.py
+```
+
+## Running Tests
+
+```bash
+# All tests
+make test
+
+# Unit tests
+make test-unit
+
+# Integration tests
+make test-integration
+
+# Direct pytest
+python3 -m pytest tests/unit/ -v
+python3 -m pytest tests/integration/ -v
+```
+
+## Test Types
+
+### Unit Tests
+
+Test pure functions with no I/O:
+
+- Link rewriting (`rewrite_links`)
+- YAML quoting (`quote_yaml_value`)
+- Frontmatter handling (`ensure_frontmatter`)
+- Config loading/saving
+- Model validation
+
+### Integration Tests
+
+Test filesystem operations:
+
+- Local repository fetching
+- Markdown file processing
+- Directory transformation
+
+## Adding Tests
+
+1. Unit test: `tests/unit/test_*.py`
+2. Integration test: `tests/integration/test_*.py`
+3. Use pytest conventions: `test_*` functions, `Test*` classes
+4. Use `assert` statements, not custom test runners
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..9cf1019
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,8 @@
+"""Pytest configuration and shared fixtures."""
+
+import sys
+from pathlib import Path
+
+# Add project root to path so we can import src/aggregation
+project_root = Path(__file__).parent.parent
+sys.path.insert(0, str(project_root / "src"))
diff --git a/scripts/tests/fixtures/colon_title.md b/tests/fixtures/colon_title.md
similarity index 100%
rename from scripts/tests/fixtures/colon_title.md
rename to tests/fixtures/colon_title.md
diff --git a/scripts/tests/fixtures/test_doc.md b/tests/fixtures/test_doc.md
similarity index 100%
rename from scripts/tests/fixtures/test_doc.md
rename to tests/fixtures/test_doc.md
diff --git a/scripts/tests/fixtures/with_frontmatter.md b/tests/fixtures/with_frontmatter.md
similarity index 100%
rename from scripts/tests/fixtures/with_frontmatter.md
rename to tests/fixtures/with_frontmatter.md
diff --git a/tests/integration/test_aggregation.py b/tests/integration/test_aggregation.py
new file mode 100644
index 0000000..41a8b0d
--- /dev/null
+++ b/tests/integration/test_aggregation.py
@@ -0,0 +1,106 @@
+"""Integration tests for documentation aggregation."""
+
+import tempfile
+from pathlib import Path
+
+import pytest
+from aggregation import DocsFetcher, RepoConfig, process_all_markdown
+
+
+class TestDocsFetcher:
+ """Integration tests for DocsFetcher."""
+
+ def test_fetch_local_with_temp_dir(self, tmp_path):
+ """Test fetching from a local directory structure."""
+ # Create a mock local repository
+ repo_path = tmp_path / "mock-repo"
+ docs_path = repo_path / "docs"
+ docs_path.mkdir(parents=True)
+
+ # Create some test files
+ (docs_path / "index.md").write_text("# Test Documentation\n\nContent here.")
+ (docs_path / "guide.md").write_text("# Guide\n\nSome guide content.")
+
+ subdir = docs_path / "tutorials"
+ subdir.mkdir()
+ (subdir / "tutorial1.md").write_text("# Tutorial 1\n\nTutorial content.")
+
+ # Create repo config
+ repo = RepoConfig(
+ name="test-repo",
+ url=f"file://{repo_path}",
+ docs_path="docs",
+ target_path="projects/test-repo",
+ )
+
+ # Fetch the docs
+ output_dir = tmp_path / "output"
+ output_dir.mkdir()
+
+ fetcher = DocsFetcher(tmp_path)
+ result = fetcher.fetch(repo, output_dir)
+
+ # Verify success
+ assert result.success is True
+ assert result.resolved_commit is None # Local repos don't have commits
+
+ # Verify files were copied
+ assert (output_dir / "index.md").exists()
+ assert (output_dir / "guide.md").exists()
+ assert (output_dir / "tutorials" / "tutorial1.md").exists()
+
+ # Verify content
+ assert "Test Documentation" in (output_dir / "index.md").read_text()
+
+
+class TestMarkdownProcessing:
+ """Integration tests for markdown processing."""
+
+ def test_process_all_markdown(self, tmp_path):
+ """Test processing markdown files in a directory."""
+ # Create test directory structure
+ target_dir = tmp_path / "target"
+ target_dir.mkdir()
+
+ # Create test markdown files
+ (target_dir / "README.md").write_text(
+ "# README\n\n[Link](./guide.md)\n[External](https://example.com)"
+ )
+ (target_dir / "index.md").write_text("# Index\n\nContent")
+
+ subdir = target_dir / "docs"
+ subdir.mkdir()
+ (subdir / "guide.md").write_text("# Guide\n\n[Back](../README.md)")
+
+ # Process the markdown
+ process_all_markdown(str(target_dir), "test-repo")
+
+ # Verify README was renamed to index (but we already have index.md, so it won't be)
+ # The function only renames if index.md doesn't exist
+ assert (target_dir / "README.md").exists()
+
+ # Verify links were rewritten in index.md (which was already there)
+ index_content = (target_dir / "index.md").read_text()
+ assert "# Index" in index_content
+
+ # Verify guide links were rewritten
+ guide_content = (subdir / "guide.md").read_text()
+ assert "/projects/test-repo" in guide_content or "README" in guide_content
+
+ def test_process_markdown_with_frontmatter(self, tmp_path):
+ """Test that frontmatter is properly handled."""
+ target_dir = tmp_path / "target"
+ target_dir.mkdir()
+
+ # Create markdown with problematic frontmatter
+ (target_dir / "test.md").write_text(
+ "---\ntitle: Test: Example\ntags: tag1, tag2\n---\n\n# Content"
+ )
+
+ # Process
+ process_all_markdown(str(target_dir), "test-repo")
+
+ # Verify frontmatter was fixed
+ content = (target_dir / "test.md").read_text()
+ assert '"Test: Example"' in content # Colon should be quoted
+ assert '"tag1, tag2"' in content # Comma should be quoted
\ No newline at end of file
diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py
new file mode 100644
index 0000000..d6b6d64
--- /dev/null
+++ b/tests/unit/test_config.py
@@ -0,0 +1,225 @@
+"""Unit tests for aggregation.config module."""
+
+import json
+import tempfile
+from pathlib import Path
+
+import pytest
+from aggregation import load_config, save_config, RepoConfig
+
+
+class TestLoadConfig:
+ """Tests for load_config function."""
+
+ def test_load_valid_config(self, tmp_path):
+ """Test loading valid configuration."""
+ config_data = {
+ "repos": [
+ {
+ "name": "test-repo",
+ "url": "https://github.com/test/repo",
+ "docs_path": "docs",
+ "target_path": "projects/test",
+ "ref": "main",
+ }
+ ]
+ }
+ config_file = tmp_path / "config.json"
+ config_file.write_text(json.dumps(config_data))
+
+ repos = load_config(str(config_file))
+ assert len(repos) == 1
+ assert repos[0].name == "test-repo"
+ assert repos[0].url == "https://github.com/test/repo"
+
+ def test_load_multiple_repos(self, tmp_path):
+ """Test loading config with multiple repositories."""
+ config_data = {
+ "repos": [
+ {
+ "name": "repo1",
+ "url": "https://github.com/test/repo1",
+ "docs_path": "docs",
+ "target_path": "projects/repo1",
+ "ref": "main",
+ },
+ {
+ "name": "repo2",
+ "url": "file://../repo2",
+ "docs_path": "docs",
+ "target_path": "projects/repo2",
+ },
+ ]
+ }
+ config_file = tmp_path / "config.json"
+ config_file.write_text(json.dumps(config_data))
+
+ repos = load_config(str(config_file))
+ assert len(repos) == 2
+ assert repos[0].name == "repo1"
+ assert repos[1].name == "repo2"
+
+ def test_load_config_with_optional_fields(self, tmp_path):
+ """Test loading config with optional fields."""
+ config_data = {
+ "repos": [
+ {
+ "name": "test-repo",
+ "url": "https://github.com/test/repo",
+ "docs_path": "docs",
+ "target_path": "projects/test",
+ "ref": "main",
+ "commit": "abc123",
+ "root_files": ["README.md"],
+ "structure": {"old": "new"},
+ }
+ ]
+ }
+ config_file = tmp_path / "config.json"
+ config_file.write_text(json.dumps(config_data))
+
+ repos = load_config(str(config_file))
+ assert repos[0].commit == "abc123"
+ assert repos[0].root_files == ["README.md"]
+ assert repos[0].structure == {"old": "new"}
+
+ def test_load_invalid_json(self, tmp_path):
+ """Test that invalid JSON causes exit."""
+ config_file = tmp_path / "config.json"
+ config_file.write_text("{ invalid json")
+
+ with pytest.raises(SystemExit):
+ load_config(str(config_file))
+
+ def test_load_missing_repos_key(self, tmp_path):
+ """Test that missing 'repos' key causes exit."""
+ config_data = {"other": "data"}
+ config_file = tmp_path / "config.json"
+ config_file.write_text(json.dumps(config_data))
+
+ with pytest.raises(SystemExit):
+ load_config(str(config_file))
+
+
+class TestSaveConfig:
+ """Tests for save_config function."""
+
+ def test_save_single_repo(self, tmp_path):
+ """Test saving configuration with single repository."""
+ repos = [
+ RepoConfig(
+ name="test-repo",
+ url="https://github.com/test/repo",
+ docs_path="docs",
+ target_path="projects/test",
+ ref="main",
+ )
+ ]
+ config_file = tmp_path / "config.json"
+
+ save_config(str(config_file), repos)
+
+ # Verify file was created and contains correct data
+ assert config_file.exists()
+ with open(config_file) as f:
+ data = json.load(f)
+
+ assert "repos" in data
+ assert len(data["repos"]) == 1
+ assert data["repos"][0]["name"] == "test-repo"
+ assert data["repos"][0]["ref"] == "main"
+
+ def test_save_multiple_repos(self, tmp_path):
+ """Test saving configuration with multiple repositories."""
+ repos = [
+ RepoConfig(
+ name="repo1",
+ url="https://github.com/test/repo1",
+ docs_path="docs",
+ target_path="projects/repo1",
+ ref="main",
+ ),
+ RepoConfig(
+ name="repo2",
+ url="file://../repo2",
+ docs_path="docs",
+ target_path="projects/repo2",
+ ),
+ ]
+ config_file = tmp_path / "config.json"
+
+ save_config(str(config_file), repos)
+
+ with open(config_file) as f:
+ data = json.load(f)
+
+ assert len(data["repos"]) == 2
+
+ def test_save_with_commit_lock(self, tmp_path):
+ """Test saving configuration with commit field."""
+ repos = [
+ RepoConfig(
+ name="test-repo",
+ url="https://github.com/test/repo",
+ docs_path="docs",
+ target_path="projects/test",
+ ref="main",
+ commit="abc123",
+ )
+ ]
+ config_file = tmp_path / "config.json"
+
+ save_config(str(config_file), repos)
+
+ with open(config_file) as f:
+ data = json.load(f)
+
+ assert data["repos"][0]["commit"] == "abc123"
+
+ def test_save_omits_empty_optional_fields(self, tmp_path):
+ """Test that empty optional fields are omitted."""
+ repos = [
+ RepoConfig(
+ name="test-repo",
+ url="https://github.com/test/repo",
+ docs_path="docs",
+ target_path="projects/test",
+ ref="main",
+ )
+ ]
+ config_file = tmp_path / "config.json"
+
+ save_config(str(config_file), repos)
+
+ with open(config_file) as f:
+ data = json.load(f)
+
+ # Should not have empty optional fields
+ assert "commit" not in data["repos"][0]
+ assert "root_files" not in data["repos"][0]
+
+ def test_round_trip(self, tmp_path):
+ """Test that load/save round-trip preserves data."""
+ original_repos = [
+ RepoConfig(
+ name="test-repo",
+ url="https://github.com/test/repo",
+ docs_path="docs",
+ target_path="projects/test",
+ ref="main",
+ commit="abc123",
+ root_files=["README.md"],
+ )
+ ]
+ config_file = tmp_path / "config.json"
+
+ # Save and load
+ save_config(str(config_file), original_repos)
+ loaded_repos = load_config(str(config_file))
+
+ # Compare
+ assert len(loaded_repos) == 1
+ assert loaded_repos[0].name == original_repos[0].name
+ assert loaded_repos[0].url == original_repos[0].url
+ assert loaded_repos[0].commit == original_repos[0].commit
+ assert loaded_repos[0].root_files == original_repos[0].root_files
diff --git a/tests/unit/test_models.py b/tests/unit/test_models.py
new file mode 100644
index 0000000..437a738
--- /dev/null
+++ b/tests/unit/test_models.py
@@ -0,0 +1,131 @@
+"""Unit tests for aggregation.models module."""
+
+import pytest
+from aggregation import RepoConfig, AggregateResult
+
+
+class TestRepoConfig:
+ """Tests for RepoConfig dataclass."""
+
+ def test_from_dict_minimal(self):
+ """Test creating RepoConfig from minimal valid dict."""
+ data = {
+ "name": "test-repo",
+ "url": "https://github.com/test/repo",
+ "docs_path": "docs",
+ "target_path": "projects/test-repo",
+ "ref": "main",
+ }
+ repo = RepoConfig.from_dict(data)
+ assert repo.name == "test-repo"
+ assert repo.url == "https://github.com/test/repo"
+ assert repo.docs_path == "docs"
+ assert repo.target_path == "projects/test-repo"
+ assert repo.ref == "main"
+ assert repo.commit is None
+ assert repo.root_files == []
+ assert repo.structure == "flat"
+
+ def test_from_dict_full(self):
+ """Test creating RepoConfig from dict with all fields."""
+ data = {
+ "name": "test-repo",
+ "url": "https://github.com/test/repo",
+ "docs_path": "docs",
+ "target_path": "projects/test-repo",
+ "ref": "main",
+ "commit": "abc123",
+ "root_files": ["README.md", "LICENSE"],
+ "structure": {"old": "new"},
+ "special_files": {"file.md": "special/"},
+ "media_directories": [".media"],
+ }
+ repo = RepoConfig.from_dict(data)
+ assert repo.commit == "abc123"
+ assert repo.root_files == ["README.md", "LICENSE"]
+ assert repo.structure == {"old": "new"}
+ assert repo.special_files == {"file.md": "special/"}
+ assert repo.media_directories == [".media"]
+
+ def test_is_local_file_url(self):
+ """Test is_local property with file:// URL."""
+ repo = RepoConfig(
+ name="local",
+ url="file://../gardenlinux",
+ docs_path="docs",
+ target_path="projects/gardenlinux",
+ )
+ assert repo.is_local is True
+ assert repo.is_remote is False
+
+ def test_is_remote_https_url(self):
+ """Test is_remote property with https:// URL."""
+ repo = RepoConfig(
+ name="remote",
+ url="https://github.com/test/repo",
+ docs_path="docs",
+ target_path="projects/test",
+ ref="main",
+ )
+ assert repo.is_remote is True
+ assert repo.is_local is False
+
+ def test_local_path_property(self):
+ """Test local_path property strips file:// prefix."""
+ repo = RepoConfig(
+ name="local",
+ url="file://../gardenlinux",
+ docs_path="docs",
+ target_path="projects/gardenlinux",
+ )
+ assert repo.local_path == "../gardenlinux"
+
+ def test_validate_local_without_ref(self):
+ """Test that local repos don't require ref."""
+ repo = RepoConfig(
+ name="local",
+ url="file://../gardenlinux",
+ docs_path="docs",
+ target_path="projects/gardenlinux",
+ )
+ repo.validate() # Should not raise
+
+ def test_validate_remote_requires_ref(self):
+ """Test that remote repos must have ref."""
+ repo = RepoConfig(
+ name="remote",
+ url="https://github.com/test/repo",
+ docs_path="docs",
+ target_path="projects/test",
+ )
+ with pytest.raises(ValueError, match="must have 'ref' field"):
+ repo.validate()
+
+ def test_validate_invalid_url_scheme(self):
+ """Test that invalid URL schemes are rejected."""
+ repo = RepoConfig(
+ name="invalid",
+ url="ftp://example.com/repo",
+ docs_path="docs",
+ target_path="projects/test",
+ )
+ with pytest.raises(ValueError, match="Invalid URL scheme"):
+ repo.validate()
+
+
+class TestAggregateResult:
+ """Tests for AggregateResult dataclass."""
+
+ def test_success_result(self):
+ """Test creating success result."""
+ result = AggregateResult("test-repo", True, "abc123")
+ assert result.repo_name == "test-repo"
+ assert result.success is True
+ assert result.resolved_commit == "abc123"
+
+ def test_failure_result(self):
+ """Test creating failure result."""
+ result = AggregateResult("test-repo", False, None)
+ assert result.repo_name == "test-repo"
+ assert result.success is False
+ assert result.resolved_commit is None
\ No newline at end of file
diff --git a/tests/unit/test_transformer.py b/tests/unit/test_transformer.py
new file mode 100644
index 0000000..ace21e6
--- /dev/null
+++ b/tests/unit/test_transformer.py
@@ -0,0 +1,115 @@
+"""Unit tests for aggregation.transformer module."""
+
+import pytest
+from aggregation import (
+ rewrite_links,
+ quote_yaml_value,
+ ensure_frontmatter,
+)
+
+
+class TestRewriteLinks:
+ """Tests for rewrite_links function."""
+
+ def test_relative_links(self):
+ """Test that relative links are rewritten correctly."""
+ content = "[Link](./other.md)"
+ result = rewrite_links(content, "gardenlinux", "introduction/index.md")
+ assert "/projects/gardenlinux/introduction/other" in result
+
+ def test_numbered_directory_links(self):
+ """Test that numbered directories in links are transformed."""
+ content = "[Link](../01_developers/guide.md)"
+ result = rewrite_links(content, "gardenlinux", "introduction/index.md")
+ assert "developers/guide" in result
+
+ def test_preserve_external_links(self):
+ """Test that external links are not modified."""
+ content = "[External](https://github.com/gardenlinux/gardenlinux)"
+ result = rewrite_links(content, "gardenlinux", "")
+ assert result == content
+
+ def test_preserve_anchor_links(self):
+ """Test that anchor links are preserved."""
+ content = "[Anchor](#section)"
+ result = rewrite_links(content, "gardenlinux", "")
+ assert result == content
+
+ def test_media_links(self):
+ """Test that .media/ links are rewritten correctly."""
+ content = "[Image](../.media/image.png)"
+ result = rewrite_links(content, "gardenlinux", "introduction/index.md")
+ assert "/projects/gardenlinux/.media/image.png" in result
+
+ def test_absolute_paths_to_github(self):
+ """Test that absolute paths outside docs/ link to GitHub."""
+ content = "[File](/README.md)"
+ result = rewrite_links(content, "gardenlinux", "")
+ assert "https://github.com/gardenlinux/gardenlinux/blob/main/README.md" in result
+
+
+class TestQuoteYamlValue:
+ """Tests for quote_yaml_value function."""
+
+ def test_quote_value_with_colon(self):
+ """Test that YAML values with colons are quoted."""
+ value = "Getting Started: Creating Images"
+ result = quote_yaml_value(value)
+ assert '"' in result
+ assert "Getting Started: Creating Images" in result
+
+ def test_simple_value_not_quoted(self):
+ """Test that simple YAML values are not quoted."""
+ value = "Simple Title"
+ result = quote_yaml_value(value)
+ assert result == "Simple Title"
+
+ def test_already_quoted_value(self):
+ """Test that already-quoted values are not double-quoted."""
+ value = '"Already Quoted"'
+ result = quote_yaml_value(value)
+ assert result == '"Already Quoted"'
+
+ def test_value_with_hash(self):
+ """Test that values with # are quoted."""
+ value = "Title #1"
+ result = quote_yaml_value(value)
+ assert '"' in result
+
+ def test_value_with_special_chars(self):
+ """Test various special characters that require quoting."""
+ special_chars = [":", "#", "@", "*", "&", "!"]
+ for char in special_chars:
+ value = f"Text {char} more"
+ result = quote_yaml_value(value)
+ assert '"' in result or "'" in result
+
+
+class TestEnsureFrontmatter:
+ """Tests for ensure_frontmatter function."""
+
+ def test_no_change_when_missing(self):
+ """Test that content without frontmatter is returned unchanged."""
+ content = "# Test Title\n\nContent here."
+ result = ensure_frontmatter(content)
+ assert result == content
+
+ def test_preserve_existing(self):
+ """Test that existing frontmatter is preserved."""
+ content = "---\ntitle: Existing\n---\n\nContent"
+ result = ensure_frontmatter(content)
+ assert "title: Existing" in result
+
+ def test_fix_colons(self):
+ """Test that colons in existing frontmatter are quoted."""
+ content = "---\ntitle: Test: Example\n---\n\nContent"
+ result = ensure_frontmatter(content)
+ assert '"Test: Example"' in result
+
+ def test_fix_multiple_fields(self):
+ """Test that multiple frontmatter fields are fixed."""
+ content = "---\ntitle: Test: Example\nauthor: John Doe\ntags: tag1, tag2\n---\n\nContent"
+ result = ensure_frontmatter(content)
+ assert '"Test: Example"' in result
+ assert "John Doe" in result
+ assert '"tag1, tag2"' in result
\ No newline at end of file
From 120058164b04a2f15014541823f8e90db3998a5d Mon Sep 17 00:00:00 2001
From: Eike Waldt
Date: Wed, 1 Apr 2026 15:10:51 +0200
Subject: [PATCH 2/5] docs: Restructure docs-ng docs based on Diataxis
Signed-off-by: Eike Waldt
On-behalf-of: SAP
---
.gitignore | 4 +
README.md | 186 +----------
.../docs-ng/explanation/architecture.md | 288 +++++++++++++++++
.../docs-ng/how-to/adding-repos.md | 256 +++++++++++++++
.../docs-ng/overview/index.md | 88 +++++
.../docs-ng/reference/configuration.md | 303 ++++++++++++++++++
.../docs-ng/reference/technical.md | 181 +++++++++++
.../docs-ng/reference/testing.md | 232 ++++++++++++++
.../docs-ng/tutorials/getting_started.md | 152 +++++++++
docs/reference/supporting_tools/index.md | 11 +
repos-config.json | 19 +-
src/README.md | 74 +----
src/aggregate.py | 2 +-
src/aggregation/structure.py | 32 +-
tests/README.md | 60 +---
15 files changed, 1553 insertions(+), 335 deletions(-)
mode change 100644 => 120000 README.md
create mode 100644 docs/reference/supporting_tools/docs-ng/explanation/architecture.md
create mode 100644 docs/reference/supporting_tools/docs-ng/how-to/adding-repos.md
create mode 100644 docs/reference/supporting_tools/docs-ng/overview/index.md
create mode 100644 docs/reference/supporting_tools/docs-ng/reference/configuration.md
create mode 100644 docs/reference/supporting_tools/docs-ng/reference/technical.md
create mode 100644 docs/reference/supporting_tools/docs-ng/reference/testing.md
create mode 100644 docs/reference/supporting_tools/docs-ng/tutorials/getting_started.md
create mode 100644 docs/reference/supporting_tools/index.md
mode change 100644 => 120000 src/README.md
mode change 100644 => 120000 tests/README.md
diff --git a/.gitignore b/.gitignore
index a45231e..a8304ed 100644
--- a/.gitignore
+++ b/.gitignore
@@ -22,5 +22,9 @@ docs/how-to/**
docs/reference/**
!docs/reference/glossary.md
!docs/reference/index.md
+!docs/reference/supporting_tools/
+docs/reference/supporting_tools/**
+!docs/reference/supporting_tools/index.md
+!docs/reference/supporting_tools/docs-ng/
docs/tutorials/**
!docs/tutorials/index.md
diff --git a/README.md b/README.md
deleted file mode 100644
index a183681..0000000
--- a/README.md
+++ /dev/null
@@ -1,185 +0,0 @@
-# Garden Linux Documentation Hub
-
-Build unified documentation from multiple Garden Linux repositories.
-
-## Quick Start
-
-```bash
-# Aggregate documentation from repos
-make aggregate
-
-# Run development server
-make dev
-```
-
-## Overview
-
-This project aggregates content from multiple source repositories (gardenlinux, builder, python-gardenlinux-lib) into a cohesive VitePress site.
-
-### Documentation Paths
-
-1. **Targeted Documentation** — Files with `github_target_path` frontmatter → `docs/tutorials/`, `docs/how-to/`, etc.
-2. **Project Mirror** — All repo docs mirrored under `docs/projects//`
-
-## Architecture
-
-```
-Source Repos → Fetch (git/local) → Transform → docs/ → VitePress
-```
-
-**Aggregation Pipeline:**
-
-1. **Fetch** — `src/aggregation/fetcher.py` pulls docs via git sparse checkout or local copy
-2. **Transform** — `src/aggregation/transformer.py` rewrites links, fixes frontmatter
-3. **Structure** — `src/aggregation/structure.py` reorganizes directories and copies media
-4. **Output** — VitePress builds the site
-
-**Key Mechanisms:**
-
-- **Targeted Documentation**: Files with `github_target_path` frontmatter are copied directly to specified paths (e.g., `docs/tutorials/cloud/first-boot-aws.md`). This is the primary mechanism for aggregating content from source repos into the unified documentation structure.
-
-- **Media Directories**: Directories listed in `media_directories` (e.g., `_static`, `assets`) are automatically discovered and copied. For nested media dirs (like `tutorials/assets`), they're copied to the same relative path. For root-level media dirs (like `_static`), they're copied to the common ancestor of all targeted files.
-
-- **Commit Locking**: The `commit` field in `repos-config.json` locks to a specific commit for reproducibility. Use `make aggregate-update` to fetch the latest commits and update the locks automatically.
-
-- **Project Mirror**: In addition to targeted docs, the entire `docs/` directory from each repo is mirrored under `docs/projects//` for reference.
-
-## Configuration
-
-### repos-config.json
-
-Located at project root. Defines repositories to aggregate:
-
-```json
-{
- "repos": [
- {
- "name": "gardenlinux",
- "url": "https://github.com/gardenlinux/gardenlinux",
- "docs_path": "docs",
- "target_path": "projects/gardenlinux",
- "ref": "docs-ng",
- "commit": "abc123...",
- "root_files": ["CONTRIBUTING.md", "SECURITY.md"],
- "structure": {
- "tutorials": "tutorials",
- "how-to": "how-to",
- "explanation": "explanation",
- "reference": "reference"
- },
- "media_directories": [".media", "assets", "_static"]
- },
- {
- "name": "python-gardenlinux-lib",
- "url": "https://github.com/gardenlinux/python-gardenlinux-lib",
- "docs_path": "docs",
- "target_path": "projects/python-gardenlinux-lib",
- "ref": "docs-ng",
- "commit": "def456...",
- "structure": "sphinx",
- "media_directories": ["_static"]
- }
- ]
-}
-```
-
-**Key fields:**
-
-- `ref` — branch/tag to fetch
-- `commit` — (optional) commit lock for reproducibility; use `--update-locks` to update
-- `root_files` — (optional) root-level files to copy (e.g., CONTRIBUTING.md)
-- `structure` — directory mapping, `"flat"` for as-is copy, or `"sphinx"` for Sphinx docs
-- `media_directories` — (optional) list of media directories to copy (relative paths searched recursively)
-
-For local testing, use `repos-config.local.json` with `file://` URLs.
-
-## Commands
-
-### Development
-
-```bash
-make dev # Start dev server
-make build # Production build
-make preview # Preview production build
-```
-
-### Testing
-
-```bash
-make test # Run all tests
-make test-unit # Unit tests
-make test-integration # Integration tests
-```
-
-### Aggregation
-
-```bash
-make aggregate-local # From local repos (file://)
-make aggregate # From remote repos (locked commits)
-make aggregate-repo REPO=... # Single repository
-make aggregate-update # Fetch latest + update commit locks
-make aggregate-update-repo REPO=... # Single repository
-```
-
-### Utilities
-
-```bash
-make clean # Clean build artifacts and aggregated docs
-```
-
-## Project Structure
-
-```
-docs-ng/
-├── repos-config.json # Repository configuration
-├── repos-config.local.json # Local development config
-├── src/ # Source code
-│ ├── aggregate.py # CLI entry point
-│ ├── aggregation/ # Core package
-│ │ ├── models.py # Data classes
-│ │ ├── config.py # Config I/O
-│ │ ├── fetcher.py # Repository fetching
-│ │ ├── transformer.py # Content transformation
-│ │ └── structure.py # Directory operations
-│ └── README.md # Technical documentation
-├── tests/ # Test suite
-│ ├── unit/ # Unit tests (pure functions)
-│ ├── integration/ # Integration tests (filesystem)
-│ └── README.md # Test documentation
-└── docs/ # Generated documentation
- ├── projects/ # Mirrored repository docs
- ├── tutorials/ # Aggregated tutorials (via github_target_path)
- ├── how-to/ # Aggregated guides (via github_target_path)
- ├── explanation/ # Aggregated explanations (via github_target_path)
- ├── reference/ # Aggregated reference docs (via github_target_path)
- │ └── supporting_tools/ # Builder, python-gardenlinux-lib docs
- └── contributing/ # Aggregated contributing docs (via github_target_path)
-```
-
-## Adding Repositories
-
-1. Add to `repos-config.json`:
-
-```json
-{
- "name": "new-repo",
- "url": "https://github.com/gardenlinux/new-repo",
- "docs_path": "docs",
- "target_path": "projects/new-repo",
- "ref": "main",
- "structure": "flat"
-}
-```
-
-2. Test: `make aggregate-repo REPO=new-repo`
-3. Preview: `make dev`
-
-## Documentation
-
-- **User Guide**: This README
-- **Technical Docs**: `src/README.md`
-- **Test Docs**: `tests/README.md`
-
-## Contributing
-
-See `CONTRIBUTING.md` for development guidelines.
diff --git a/README.md b/README.md
new file mode 120000
index 0000000..d0c1cc7
--- /dev/null
+++ b/README.md
@@ -0,0 +1 @@
+docs/reference/supporting_tools/docs-ng/overview/index.md
\ No newline at end of file
diff --git a/docs/reference/supporting_tools/docs-ng/explanation/architecture.md b/docs/reference/supporting_tools/docs-ng/explanation/architecture.md
new file mode 100644
index 0000000..5a8617b
--- /dev/null
+++ b/docs/reference/supporting_tools/docs-ng/explanation/architecture.md
@@ -0,0 +1,288 @@
+---
+title: "docs-ng Architecture"
+description: "Deep dive into how the docs-ng documentation aggregation system works"
+github_org: gardenlinux
+github_repo: docs-ng
+github_source_path: docs/reference/supporting_tools/docs-ng/explanation/architecture.md
+---
+
+# docs-ng Architecture
+
+Deep dive into the design and implementation of the docs-ng documentation aggregation system.
+
+> **Source Repository:** [gardenlinux/docs-ng](https://github.com/gardenlinux/docs-ng)
+
+## System Overview
+
+docs-ng is a documentation aggregation pipeline that combines content from multiple source repositories into a unified VitePress documentation site.
+
+```
+┌─────────────────┐
+│ Source Repos │
+│ - gardenlinux │
+│ - builder │
+│ - python-gl-lib │
+└────────┬────────┘
+ │
+ ▼
+┌─────────────────┐
+│ Fetch Stage │
+│ Git sparse │
+│ checkout or │
+│ local copy │
+└────────┬────────┘
+ │
+ ▼
+┌─────────────────┐
+│ Transform Stage │
+│ Rewrite links │
+│ Fix frontmatter │
+└────────┬────────┘
+ │
+ ▼
+┌─────────────────┐
+│ Structure Stage │
+│ Reorganize dirs │
+│ Copy media │
+└────────┬────────┘
+ │
+ ▼
+┌─────────────────┐
+│ docs/ output │
+│ VitePress build │
+└─────────────────┘
+```
+
+## Core Components
+
+### 1. Fetch Stage (`fetcher.py`)
+
+**Purpose:** Retrieve documentation from source repositories
+
+**Mechanisms:**
+
+- **Git Sparse Checkout:** For remote repositories, uses sparse checkout to fetch only the `docs/` directory, minimizing clone size
+- **Local Copy:** For `file://` URLs, performs direct filesystem copy without git operations
+- **Commit Resolution:** Records the resolved commit hash for locking
+
+**Key Features:**
+
+- Supports both remote (git) and local (file) sources
+- Handles root files separately from docs directory
+- Provides commit hash for reproducible builds
+
+### 2. Transform Stage (`transformer.py`)
+
+**Purpose:** Modify content to work in the aggregated site
+
+**Transformations:**
+
+1. **Link Rewriting:** Transform relative links to work across repository boundaries
+
+ - Intra-repo links: Maintained relative to project mirror
+ - Cross-repo links: Rewritten to absolute paths
+ - External links: Preserved as-is
+
+2. **Frontmatter Handling:** Ensure all documents have proper frontmatter
+
+ - Add missing frontmatter blocks
+ - Quote YAML values safely
+ - Preserve existing metadata
+
+3. **Project Link Validation:** Fix broken links to project mirrors
+
+### 3. Structure Stage (`structure.py`)
+
+**Purpose:** Organize documentation into the final directory structure
+
+**Operations:**
+
+1. **Targeted Documentation:** Copy files with `github_target_path` to specified locations
+2. **Directory Mapping:** Transform source directories according to `structure` config
+3. **Media Copying:** Discover and copy media directories
+4. **Markdown Processing:** Apply transformations to all markdown files
+
+**Structure Types:**
+
+- **Flat:** Copy all files as-is
+- **Sphinx:** Handle Sphinx documentation structure
+- **Custom Mapping:** Map source directories to Diataxis categories
+
+## Key Mechanisms
+
+### Targeted Documentation
+
+Files with `github_target_path` frontmatter are copied directly to their specified location:
+
+```yaml
+---
+github_target_path: "docs/how-to/example.md"
+---
+```
+
+**Flow:**
+
+1. Scan all markdown files for `github_target_path`
+2. Create target directory structure
+3. Copy file to exact specified location
+4. Apply markdown transformations
+
+This allows fine-grained control over where content appears in the final site.
+
+### Project Mirrors
+
+In addition to targeted docs, the entire `docs/` directory from each repo is mirrored under `docs/projects//`:
+
+**Purpose:**
+
+- Preserve complete repository documentation
+- Provide fallback for untargeted content
+- Enable browsing of raw source structure
+
+### Media Directory Handling
+
+Media directories are automatically discovered and copied:
+
+**Nested Media:**
+
+- Location: `tutorials/assets/`
+- Copied to: `docs/tutorials/assets/`
+- Rationale: Preserve relative paths for tutorial-specific media
+
+**Root-Level Media:**
+
+- Location: `_static/`, `.media/`
+- Copied to: Common ancestor of all targeted files
+- Rationale: Shared media available to all documents
+
+### Commit Locking
+
+For reproducible builds, commits can be locked:
+
+```json
+{
+ "name": "repo",
+ "ref": "main",
+ "commit": "abc123..."
+}
+```
+
+**Benefits:**
+
+- Reproducible documentation builds
+- Stable CI/CD pipelines
+- Version control for aggregated docs
+
+**Update Process:**
+
+```bash
+make aggregate-update
+```
+
+This fetches the latest from `ref` and updates commit locks.
+
+## Design Decisions
+
+### Why Git Sparse Checkout?
+
+- **Efficiency:** Only fetches docs directory, not entire repository
+- **Speed:** Faster than full clone, especially for large repos
+- **Minimal Disk Usage:** Reduces storage requirements
+
+### Why Frontmatter-Based Targeting?
+
+- **Flexibility:** Authors control where their docs appear
+- **Decentralization:** No central mapping file to maintain
+- **Explicit:** Clear indication in source files of their destination
+
+### Why Separate Fetch/Transform/Structure?
+
+- **Modularity:** Each stage has single responsibility
+- **Testability:** Easy to test individual stages
+- **Extensibility:** New transformations added without affecting fetch/structure
+
+### Why Project Mirrors?
+
+- **Completeness:** No documentation is lost
+- **Development:** Easier to debug and understand source structure
+- **Backwards Compatibility:** Existing links to source repos still work
+
+## Data Flow
+
+### Repository → Temporary Directory
+
+```
+Source Repo Temp Directory
+├── docs/ → /tmp/xyz/repo-name/
+│ ├── tutorials/ ├── tutorials/
+│ ├── how-to/ ├── how-to/
+│ └── reference/ └── reference/
+├── README.md → README.md (if in root_files)
+└── src/ (not copied)
+```
+
+### Temporary Directory → Docs Output
+
+```
+Temp Directory Docs Output
+/tmp/xyz/repo-name/ →
+├── tutorials/ docs/
+│ └── guide.md ├── tutorials/
+│ (github_target_path) │ └── guide.md (targeted)
+├── how-to/ ├── how-to/
+└── reference/ └── projects/repo-name/
+ ├── tutorials/ (mirror)
+ ├── how-to/ (mirror)
+ └── reference/ (mirror)
+```
+
+## Performance Characteristics
+
+### Fetch Stage
+
+- **Git sparse:** O(docs_size) + network latency
+- **Local copy:** O(docs_size) filesystem I/O
+
+### Transform Stage
+
+- **Link rewriting:** O(n \* m) where n = files, m = avg file size
+- **Frontmatter:** O(n) single pass through files
+
+### Structure Stage
+
+- **Targeted copy:** O(n) where n = files with github_target_path
+- **Directory mapping:** O(n) where n = total files
+- **Media copy:** O(m) where m = media files
+
+### Overall
+
+- Dominated by git network operations for remote repos
+- Filesystem I/O bound for local repos
+- Typically completes in seconds for typical documentation repos
+
+## Error Handling
+
+### Fetch Failures
+
+- Invalid git URL → Clear error message with URL
+- Network issues → Retry with exponential backoff
+- Missing docs_path → Warning, skip repository
+
+### Transform Failures
+
+- Invalid frontmatter → Add default frontmatter, log warning
+- Broken links → Log warning, preserve original link
+- Invalid markdown → Process as best-effort, log error
+
+### Structure Failures
+
+- Missing target directory → Create automatically
+- Conflicting file paths → Error with clear message
+- Media directory not found → Log warning, continue
+
+## See Also
+
+- [Technical Reference](/reference/supporting_tools/docs-ng/reference/technical) — Module and API documentation
+- [Configuration Reference](/reference/supporting_tools/docs-ng/reference/configuration) — Complete configuration field reference
+- [Getting Started](/reference/supporting_tools/docs-ng/tutorials/getting_started) — Setup guide
+- [Adding Repositories](/reference/supporting_tools/docs-ng/how-to/adding-repos) — How to add new repos
diff --git a/docs/reference/supporting_tools/docs-ng/how-to/adding-repos.md b/docs/reference/supporting_tools/docs-ng/how-to/adding-repos.md
new file mode 100644
index 0000000..ac7357a
--- /dev/null
+++ b/docs/reference/supporting_tools/docs-ng/how-to/adding-repos.md
@@ -0,0 +1,256 @@
+---
+title: "How to Add Repositories to docs-ng"
+description: "Guide for adding new repositories to the documentation aggregation system"
+github_org: gardenlinux
+github_repo: docs-ng
+github_source_path: docs/reference/supporting_tools/docs-ng/how-to/adding-repos.md
+---
+
+# How to Add Repositories to docs-ng
+
+This guide explains how to add a new repository to the docs-ng aggregation system.
+
+> **Source Repository:** [gardenlinux/docs-ng](https://github.com/gardenlinux/docs-ng)
+
+## Prerequisites
+
+- Access to the repository you want to add
+- Understanding of the repository's documentation structure
+- Edit access to `repos-config.json`
+
+## Step 1: Add Repository Configuration
+
+Edit `repos-config.json` and add a new entry to the `repos` array:
+
+```json
+{
+ "name": "new-repo",
+ "url": "https://github.com/gardenlinux/new-repo",
+ "docs_path": "docs",
+ "target_path": "projects/new-repo",
+ "ref": "main",
+ "structure": "flat"
+}
+```
+
+### Required Fields
+
+- **`name`**: Unique identifier for the repository
+- **`url`**: Git URL or `file://` path for local development
+- **`docs_path`**: Path to docs directory within the repository
+- **`target_path`**: Where to place mirrored docs (usually `projects/`)
+- **`ref`**: Git branch or tag to fetch from
+
+### Optional Fields
+
+- **`commit`**: Lock to a specific commit hash for reproducibility
+- **`root_files`**: List of root-level files to copy (e.g., `["README.md", "CONTRIBUTING.md"]`)
+- **`structure`**: Directory mapping strategy (see below)
+- **`media_directories`**: List of media directories to copy (e.g., `[".media", "_static", "assets"]`)
+
+## Step 2: Choose a Structure Strategy
+
+### Flat Structure
+
+Copy all files as-is without reorganization:
+
+```json
+"structure": "flat"
+```
+
+### Sphinx Structure
+
+For Sphinx-generated documentation:
+
+```json
+"structure": "sphinx"
+```
+
+### Custom Mapping
+
+Map source directories to Diataxis categories:
+
+```json
+"structure": {
+ "tutorials": "tutorials",
+ "guides": "how-to",
+ "concepts": "explanation",
+ "api": "reference"
+}
+```
+
+## Step 3: Configure Targeted Documentation
+
+To have files automatically placed into the main Diataxis structure, add `github_target_path` frontmatter to markdown files in the source repository:
+
+```markdown
+---
+title: "Example Guide"
+github_target_path: "docs/how-to/example-guide.md"
+---
+
+# Example Guide
+
+Content here...
+```
+
+Files with `github_target_path` will be copied to that exact location, not to `projects//`.
+
+## Step 4: Test with Local Configuration
+
+Create or edit `repos-config.local.json` for local testing:
+
+```json
+{
+ "repos": [
+ {
+ "name": "new-repo",
+ "url": "file://../new-repo",
+ "docs_path": "docs",
+ "target_path": "projects/new-repo",
+ "structure": "flat"
+ }
+ ]
+}
+```
+
+Then test aggregation:
+
+```bash
+make aggregate-local
+```
+
+## Step 5: Verify the Output
+
+Check that files are in the correct locations:
+
+```bash
+ls -la docs/projects/new-repo/
+```
+
+If using `github_target_path`, verify targeted files:
+
+```bash
+ls -la docs/tutorials/
+ls -la docs/how-to/
+```
+
+## Step 6: Lock the Commit (Production)
+
+For production, lock to a specific commit:
+
+```bash
+# This fetches the latest and updates repos-config.json
+make aggregate-update-repo REPO=new-repo
+```
+
+Or manually add the commit hash:
+
+```json
+{
+ "name": "new-repo",
+ "url": "https://github.com/gardenlinux/new-repo",
+ "docs_path": "docs",
+ "target_path": "projects/new-repo",
+ "ref": "main",
+ "commit": "abc123def456...",
+ "structure": "flat"
+}
+```
+
+## Advanced Configuration
+
+### Media Directories
+
+Automatically copy media directories alongside documentation:
+
+```json
+{
+ "name": "new-repo",
+ "media_directories": [".media", "assets", "_static"]
+}
+```
+
+The system will:
+
+- Find all instances of these directories recursively
+- Copy nested media dirs (e.g., `tutorials/assets/`) to the same relative path
+- Copy root-level media dirs (e.g., `_static/`) to the common ancestor of targeted files
+
+### Root Files
+
+Copy root-level files (like README.md or CONTRIBUTING.md):
+
+```json
+{
+ "name": "new-repo",
+ "root_files": ["README.md", "CONTRIBUTING.md", "LICENSE"]
+}
+```
+
+These files can also have `github_target_path` frontmatter for targeted placement.
+
+### Special Files
+
+Handle non-standard files:
+
+```json
+{
+ "name": "new-repo",
+ "special_files": {
+ "GUIDE.md": "how-to",
+ "CONCEPTS.md": "explanation"
+ }
+}
+```
+
+## Complete Example
+
+Here's a complete configuration:
+
+```json
+{
+ "name": "example-tool",
+ "url": "https://github.com/gardenlinux/example-tool",
+ "docs_path": "documentation",
+ "target_path": "projects/example-tool",
+ "ref": "docs-ng",
+ "commit": "1234567890abcdef",
+ "root_files": ["README.md"],
+ "structure": {
+ "getting-started": "tutorials",
+ "guides": "how-to",
+ "concepts": "explanation",
+ "api-reference": "reference"
+ },
+ "media_directories": [".media", "images"],
+ "special_files": {
+ "CHANGELOG.md": "reference"
+ }
+}
+```
+
+## Troubleshooting
+
+### Files Not Appearing
+
+- Verify `docs_path` points to the correct directory
+- Check that the repository has a `docs-ng` branch or adjust `ref`
+- Ensure `github_target_path` frontmatter is correct
+
+### Media Not Copied
+
+- Add media directory names to `media_directories`
+- Check that media dirs exist in the source repository
+
+### Links Broken
+
+- The transformer attempts to rewrite links automatically
+- Check that relative links in source docs are correct
+- Review `src/aggregation/transformer.py` for link rewriting logic
+
+## See Also
+
+- [Configuration Reference](/reference/supporting_tools/docs-ng/reference/configuration) — Complete field documentation
+- [Architecture Explanation](/reference/supporting_tools/docs-ng/explanation/architecture) — How aggregation works
+- [Technical Reference](/reference/supporting_tools/docs-ng/reference/technical) — Source code documentation
diff --git a/docs/reference/supporting_tools/docs-ng/overview/index.md b/docs/reference/supporting_tools/docs-ng/overview/index.md
new file mode 100644
index 0000000..1748327
--- /dev/null
+++ b/docs/reference/supporting_tools/docs-ng/overview/index.md
@@ -0,0 +1,88 @@
+---
+title: "docs-ng Documentation Hub"
+description: "Documentation aggregation system for Garden Linux - combines docs from multiple repositories into a unified VitePress site"
+github_org: gardenlinux
+github_repo: docs-ng
+github_source_path: docs/reference/supporting_tools/docs-ng/overview/index.md
+---
+
+# docs-ng: Garden Linux Documentation Hub
+
+Build unified documentation from multiple Garden Linux repositories.
+
+> **Source Repository:** [gardenlinux/docs-ng](https://github.com/gardenlinux/docs-ng)
+
+## Overview
+
+docs-ng is the documentation aggregation system that powers the unified Garden Linux documentation site. It aggregates content from multiple source repositories (gardenlinux, builder, python-gardenlinux-lib) into a cohesive VitePress site.
+
+### Key Features
+
+- **Targeted Documentation**: Files with `github_target_path` frontmatter are automatically placed into the correct Diataxis categories
+- **Project Mirroring**: Complete repository documentation mirrored under `docs/projects//`
+- **Commit Locking**: Reproducible builds with locked commit hashes
+- **Media Handling**: Automatic discovery and copying of media directories
+- **Link Rewriting**: Automatic link transformation for cross-repository references
+
+### Documentation Paths
+
+The system supports two complementary documentation paths:
+
+1. **Targeted Documentation** — Files with `github_target_path` frontmatter → `docs/tutorials/`, `docs/how-to/`, etc.
+2. **Project Mirror** — All repo docs mirrored under `docs/projects//`
+
+## Quick Start
+
+```bash
+# Aggregate documentation from repos
+make aggregate
+
+# Run development server
+make dev
+
+# Build production site
+make build
+```
+
+## Architecture Overview
+
+```
+Source Repos → Fetch (git/local) → Transform → docs/ → VitePress
+```
+
+The aggregation pipeline consists of four main stages:
+
+1. **Fetch** — `src/aggregation/fetcher.py` pulls docs via git sparse checkout or local copy
+2. **Transform** — `src/aggregation/transformer.py` rewrites links, fixes frontmatter
+3. **Structure** — `src/aggregation/structure.py` reorganizes directories and copies media
+4. **Output** — VitePress builds the site
+
+## Project Structure
+
+```
+docs-ng/
+├── repos-config.json # Repository configuration
+├── repos-config.local.json # Local development config
+├── src/ # Source code
+│ ├── aggregate.py # CLI entry point
+│ └── aggregation/ # Core package
+├── tests/ # Test suite
+└── docs/ # Generated documentation
+ ├── projects/ # Mirrored repository docs
+ ├── tutorials/ # Aggregated tutorials
+ ├── how-to/ # Aggregated guides
+ ├── explanation/ # Aggregated explanations
+ └── reference/ # Aggregated reference docs
+```
+
+## Further Reading
+
+- [Getting Started Tutorial](/reference/supporting_tools/docs-ng/tutorials/getting_started) — Step-by-step guide to using docs-ng
+- [Adding Repositories](/reference/supporting_tools/docs-ng/how-to/adding-repos) — How to add new repositories to the aggregation
+- [Technical Reference](/reference/supporting_tools/docs-ng/reference/technical) — Source code and API documentation
+- [Configuration Reference](/reference/supporting_tools/docs-ng/reference/configuration) — Complete configuration options
+- [Architecture Explanation](/reference/supporting_tools/docs-ng/explanation/architecture) — Deep dive into how docs-ng works
+
+## Contributing
+
+See the [docs-ng repository](https://github.com/gardenlinux/docs-ng) for contribution guidelines.
diff --git a/docs/reference/supporting_tools/docs-ng/reference/configuration.md b/docs/reference/supporting_tools/docs-ng/reference/configuration.md
new file mode 100644
index 0000000..f3b4db2
--- /dev/null
+++ b/docs/reference/supporting_tools/docs-ng/reference/configuration.md
@@ -0,0 +1,303 @@
+---
+title: "docs-ng Configuration Reference"
+description: "Complete reference for repos-config.json and repos-config.local.json configuration options"
+github_org: gardenlinux
+github_repo: docs-ng
+github_source_path: docs/reference/supporting_tools/docs-ng/reference/configuration.md
+---
+
+# docs-ng Configuration Reference
+
+Complete reference for configuring the docs-ng aggregation system.
+
+> **Source Repository:** [gardenlinux/docs-ng](https://github.com/gardenlinux/docs-ng)
+
+## Configuration Files
+
+### `repos-config.json`
+
+Main configuration file for production aggregation. Uses git URLs and commit locks.
+
+**Location:** Project root
+
+### `repos-config.local.json`
+
+Development configuration file for local testing. Uses `file://` URLs to avoid git operations.
+
+**Location:** Project root
+
+## Configuration Structure
+
+```json
+{
+ "repos": [
+ {
+ "name": "repository-name",
+ "url": "https://github.com/org/repo",
+ "docs_path": "docs",
+ "target_path": "projects/repository-name",
+ "ref": "main",
+ "commit": "abc123...",
+ "root_files": ["README.md"],
+ "structure": "flat",
+ "media_directories": [".media", "assets"],
+ "special_files": {
+ "GUIDE.md": "how-to"
+ }
+ }
+ ]
+}
+```
+
+## Field Reference
+
+### Required Fields
+
+#### `name`
+- **Type:** String
+- **Description:** Unique identifier for the repository
+- **Example:** `"gardenlinux"`, `"builder"`, `"python-gardenlinux-lib"`
+- **Notes:** Used in generated paths and logging
+
+#### `url`
+- **Type:** String (URL or file path)
+- **Description:** Repository location
+- **Examples:**
+ - Git: `"https://github.com/gardenlinux/gardenlinux"`
+ - Local: `"file://../gardenlinux"`
+- **Notes:** For local development, use `file://` URLs in `repos-config.local.json`
+
+#### `docs_path`
+- **Type:** String
+- **Description:** Path to documentation directory within the repository
+- **Examples:** `"docs"`, `"documentation"`, `"."` (for root)
+- **Notes:** Relative to repository root; content of this directory is copied
+
+#### `target_path`
+- **Type:** String
+- **Description:** Destination path in the docs directory
+- **Example:** `"projects/gardenlinux"`
+- **Notes:** Usually `projects/` for project mirrors
+
+#### `ref`
+- **Type:** String
+- **Description:** Git reference to fetch (branch, tag, or commit)
+- **Examples:** `"main"`, `"docs-ng"`, `"v1.0.0"`
+- **Notes:** Required for git URLs; ignored for `file://` URLs
+
+### Optional Fields
+
+#### `commit`
+- **Type:** String (commit hash)
+- **Description:** Lock to a specific commit for reproducible builds
+- **Example:** `"abc123def456..."`
+- **Default:** Not used (fetches from `ref`)
+- **Notes:** Generated automatically with `make aggregate-update`
+
+#### `root_files`
+- **Type:** Array of strings
+- **Description:** Root-level files to copy (e.g., README.md, CONTRIBUTING.md)
+- **Example:** `["README.md", "CONTRIBUTING.md", "LICENSE"]`
+- **Default:** `[]` (no root files copied)
+- **Notes:** Files can have `github_target_path` frontmatter for targeted placement
+
+#### `structure`
+- **Type:** String or Object
+- **Description:** How to reorganize directory structure
+- **Options:**
+ - `"flat"` — Copy all files as-is
+ - `"sphinx"` — Sphinx documentation structure
+ - Object — Custom directory mapping (see below)
+- **Default:** `"flat"`
+
+**Custom Structure Example:**
+```json
+"structure": {
+ "tutorials": "tutorials",
+ "guides": "how-to",
+ "concepts": "explanation",
+ "api-reference": "reference"
+}
+```
+
+This maps source directories to Diataxis categories.
+
+#### `media_directories`
+- **Type:** Array of strings
+- **Description:** Directory names to treat as media/assets
+- **Example:** `[".media", "assets", "_static", "images"]`
+- **Default:** `[]`
+- **Notes:**
+ - Searched recursively in source repository
+ - Nested media dirs (e.g., `tutorials/assets/`) copied to same relative path
+ - Root-level media dirs (e.g., `_static/`) copied to common ancestor of targeted files
+
+#### `special_files`
+- **Type:** Object (filename → category mapping)
+- **Description:** Map non-standard files to Diataxis categories
+- **Example:**
+ ```json
+ {
+ "GUIDE.md": "how-to",
+ "CONCEPTS.md": "explanation",
+ "CHANGELOG.md": "reference"
+ }
+ ```
+- **Default:** `{}`
+- **Notes:** Used when files don't follow standard naming conventions
+
+## Complete Example
+
+```json
+{
+ "repos": [
+ {
+ "name": "gardenlinux",
+ "url": "https://github.com/gardenlinux/gardenlinux",
+ "docs_path": "docs",
+ "target_path": "projects/gardenlinux",
+ "ref": "docs-ng",
+ "commit": "c4b1d8d7f878fcb3e779315d28e35fcb19ae4dfb",
+ "root_files": [
+ "CONTRIBUTING.md",
+ "SECURITY.md"
+ ],
+ "structure": {
+ "tutorials": "tutorials",
+ "how-to": "how-to",
+ "explanation": "explanation",
+ "reference": "reference",
+ "contributing": "contributing"
+ },
+ "media_directories": [
+ ".media",
+ "assets",
+ "_static"
+ ]
+ },
+ {
+ "name": "builder",
+ "url": "https://github.com/gardenlinux/builder",
+ "docs_path": "docs",
+ "target_path": "projects/builder",
+ "ref": "docs-ng",
+ "commit": "b10476ad8c46130f310e36daa42c6e2c14fb51a9",
+ "structure": "flat",
+ "media_directories": [
+ ".media",
+ "assets",
+ "_static"
+ ]
+ },
+ {
+ "name": "python-gardenlinux-lib",
+ "url": "https://github.com/gardenlinux/python-gardenlinux-lib",
+ "docs_path": "docs",
+ "target_path": "projects/python-gardenlinux-lib",
+ "ref": "docs-ng",
+ "commit": "9142fccc3d83ab51759db7d328fa19166bc1df63",
+ "structure": "sphinx",
+ "media_directories": [
+ ".media",
+ "assets",
+ "_static"
+ ]
+ }
+ ]
+}
+```
+
+## Environment-Specific Configuration
+
+### Production (`repos-config.json`)
+
+- Use HTTPS git URLs
+- Include `commit` locks for reproducibility
+- Use `docs-ng` or stable branches for `ref`
+
+### Development (`repos-config.local.json`)
+
+- Use `file://` URLs for local repos
+- Omit `commit` field (not used for local)
+- Omit `ref` field (not needed for file://)
+- Keep structure and other settings consistent with production
+
+**Example local config:**
+```json
+{
+ "repos": [
+ {
+ "name": "gardenlinux",
+ "url": "file://../gardenlinux",
+ "docs_path": "docs",
+ "target_path": "projects/gardenlinux",
+ "root_files": ["CONTRIBUTING.md", "SECURITY.md"],
+ "structure": {
+ "tutorials": "tutorials",
+ "how-to": "how-to",
+ "explanation": "explanation",
+ "reference": "reference"
+ },
+ "media_directories": [".media", "assets"]
+ }
+ ]
+}
+```
+
+## Common Configuration Patterns
+
+### Minimal Configuration
+
+Simplest configuration for a flat repository:
+
+```json
+{
+ "name": "my-repo",
+ "url": "https://github.com/org/my-repo",
+ "docs_path": "docs",
+ "target_path": "projects/my-repo",
+ "ref": "main",
+ "structure": "flat"
+}
+```
+
+### With Targeted Documentation
+
+Repository using `github_target_path` frontmatter:
+
+```json
+{
+ "name": "my-repo",
+ "url": "https://github.com/org/my-repo",
+ "docs_path": "docs",
+ "target_path": "projects/my-repo",
+ "ref": "main",
+ "structure": "flat",
+ "media_directories": ["assets", "_static"]
+}
+```
+
+Then in your markdown files:
+```yaml
+---
+title: "My Tutorial"
+github_target_path: "docs/tutorials/my-tutorial.md"
+---
+```
+
+## Frontmatter Fields
+
+When using `github_target_path`, you can include additional metadata:
+
+- **`github_org`**: Organization name (e.g., `"gardenlinux"`)
+- **`github_repo`**: Repository name (e.g., `"docs-ng"`)
+- **`github_source_path`**: Original file path in source repo (e.g., `"docs/tutorial.md"`)
+
+These help create source links in the documentation.
+
+## See Also
+
+- [Getting Started](/reference/supporting_tools/docs-ng/tutorials/getting_started) — Initial setup guide
+- [Adding Repositories](/reference/supporting_tools/docs-ng/how-to/adding-repos) — How to add new repos
+- [Technical Reference](/reference/supporting_tools/docs-ng/reference/technical) — Source code documentation
+- [Architecture](/reference/supporting_tools/docs-ng/explanation/architecture) — System design
diff --git a/docs/reference/supporting_tools/docs-ng/reference/technical.md b/docs/reference/supporting_tools/docs-ng/reference/technical.md
new file mode 100644
index 0000000..ab2caf7
--- /dev/null
+++ b/docs/reference/supporting_tools/docs-ng/reference/technical.md
@@ -0,0 +1,181 @@
+---
+title: "docs-ng Technical Reference"
+description: "Source code documentation for the docs-ng aggregation system - modules, APIs, and implementation details"
+github_org: gardenlinux
+github_repo: docs-ng
+github_source_path: docs/reference/supporting_tools/docs-ng/reference/technical.md
+---
+
+# docs-ng Technical Reference
+
+Source code documentation for the docs-ng aggregation system.
+
+> **Source Repository:** [gardenlinux/docs-ng](https://github.com/gardenlinux/docs-ng) > **Source File:** [src/README.md](https://github.com/gardenlinux/docs-ng/blob/main/src/README.md)
+
+## Source Code Structure
+
+```
+src/
+├── aggregate.py # CLI entry point
+├── migration_tracker.py # Standalone utility
+└── aggregation/ # Core package
+ ├── __init__.py
+ ├── models.py # Data classes
+ ├── config.py # Config I/O
+ ├── fetcher.py # Git + local fetch
+ ├── transformer.py # Content transforms
+ └── structure.py # Directory transforms
+```
+
+## Module Reference
+
+### `aggregation/models.py`
+
+Data classes for type safety:
+
+- **`RepoConfig`** — Repository configuration data class
+- **`AggregateResult`** — Fetch result with commit hash
+
+### `aggregation/config.py`
+
+Configuration file handling:
+
+- **`load_config()`** — Parse repos-config.json
+- **`save_config()`** — Write updated config (commit locks)
+
+### `aggregation/fetcher.py`
+
+Repository fetching:
+
+**`DocsFetcher`** — Main fetcher class
+
+Methods:
+
+- **`__init__(project_root, update_locks=False)`** — Initialize with optional commit lock updating
+- **`fetch()`** — Fetch repository and return result with commit hash
+- **`_fetch_remote()`** — Git sparse checkout from remote repository
+- **`_fetch_local()`** — Filesystem copy from local repository
+- **`_copy_docs()`** — Static method to copy docs directory
+- **`_copy_root_files()`** — Static method to copy root-level files (e.g., CONTRIBUTING.md)
+
+### `aggregation/transformer.py`
+
+Content transformation:
+
+- **`rewrite_links()`** — Fix markdown links for cross-repository references
+- **`quote_yaml_value()`** — YAML safety for frontmatter values
+- **`ensure_frontmatter()`** — Add or fix frontmatter in markdown files
+- **`parse_frontmatter()`** — Extract metadata from markdown frontmatter
+- **`fix_broken_project_links()`** — Validate and fix links to project mirrors
+
+### `aggregation/structure.py`
+
+Directory operations:
+
+- **`transform_directory_structure()`** — Restructure docs based on config mapping
+- **`copy_targeted_docs(source_dir, docs_dir, repo_name, media_dirs=None, root_files=None)`** — Copy files with `github_target_path` frontmatter to specified locations
+ - Handles nested media dirs (e.g., `tutorials/assets/`) by copying to same relative path
+ - Handles root-level media dirs (e.g., `_static/`) by copying to common ancestor of targeted files
+ - Supports scanning root_files for targeted placement
+- **`process_markdown_file()`** — Transform single markdown file (links, frontmatter)
+- **`process_all_markdown()`** — Batch process all markdown files in directory
+
+### `aggregate.py`
+
+CLI orchestration — Combines all modules into the complete aggregation workflow.
+
+## Usage Example
+
+Basic programmatic usage:
+
+```python
+from aggregation import load_config, DocsFetcher, process_all_markdown
+
+# Load configuration
+repos = load_config("repos-config.json")
+
+# Initialize fetcher
+fetcher = DocsFetcher(project_root)
+
+# Fetch documentation
+result = fetcher.fetch(repo, output_dir)
+
+# Transform markdown files
+process_all_markdown(target_dir, repo_name)
+```
+
+## Key Concepts
+
+### Targeted Documentation
+
+Files with `github_target_path` in their frontmatter are automatically placed at that exact path:
+
+```yaml
+---
+github_target_path: "docs/tutorials/example.md"
+---
+```
+
+The `copy_targeted_docs()` function scans all markdown files and copies those with this frontmatter to their specified locations.
+
+### Link Rewriting
+
+The `rewrite_links()` function transforms markdown links to work in the aggregated site:
+
+- Relative links within the same repo are maintained
+- Cross-repository links are rewritten to point to the correct locations
+- Links to project mirrors are validated
+
+### Media Handling
+
+Media directories specified in `media_directories` configuration are:
+
+1. Discovered recursively in the source repository
+2. Copied alongside their associated documentation
+3. Placed according to whether they're nested (same relative path) or root-level (common ancestor)
+
+### Commit Locking
+
+When `update_locks=True` is passed to `DocsFetcher.__init__()`, the system:
+
+1. Fetches from the `ref` (branch/tag)
+2. Records the resolved commit hash
+3. Updates `repos-config.json` with the lock
+
+This ensures reproducible builds.
+
+## Development
+
+### Running Tests
+
+See [Testing Reference](/reference/supporting_tools/docs-ng/reference/testing) for details on the test suite.
+
+### Adding New Transformation
+
+To add a new transformation:
+
+1. Add function to `transformer.py`
+2. Call it from `process_markdown_file()` or `process_all_markdown()`
+3. Add tests in `tests/unit/test_transformer.py`
+
+### Adding New Structure Type
+
+To add a new structure mapping type:
+
+1. Update `transform_directory_structure()` in `structure.py`
+2. Add corresponding structure key handling
+3. Update configuration documentation
+
+## Architecture Decisions
+
+Key architectural decisions are documented in the source repository:
+
+- Sparse git checkout for efficiency
+- Frontmatter-based targeting for flexibility
+- Separate fetch/transform/structure stages for modularity
+
+## See Also
+
+- [Testing Reference](/reference/supporting_tools/docs-ng/reference/testing) — Test suite documentation
+- [Configuration Reference](/reference/supporting_tools/docs-ng/reference/configuration) — Complete configuration field reference
+- [Architecture Explanation](/reference/supporting_tools/docs-ng/explanation/architecture) — How the system works
diff --git a/docs/reference/supporting_tools/docs-ng/reference/testing.md b/docs/reference/supporting_tools/docs-ng/reference/testing.md
new file mode 100644
index 0000000..b89bd43
--- /dev/null
+++ b/docs/reference/supporting_tools/docs-ng/reference/testing.md
@@ -0,0 +1,232 @@
+---
+title: "docs-ng Testing Guide"
+description: "Test suite documentation for docs-ng - unit tests, integration tests, and testing best practices"
+github_org: gardenlinux
+github_repo: docs-ng
+github_source_path: docs/reference/supporting_tools/docs-ng/reference/testing.md
+---
+
+# docs-ng Testing Guide
+
+Test suite documentation for the docs-ng aggregation system.
+
+> **Source Repository:** [gardenlinux/docs-ng](https://github.com/gardenlinux/docs-ng) > **Source File:** [tests/README.md](https://github.com/gardenlinux/docs-ng/blob/main/tests/README.md)
+
+## Test Structure
+
+```
+tests/
+├── conftest.py # pytest configuration
+├── fixtures/ # Test data
+├── unit/ # Unit tests (pure functions)
+│ ├── test_config.py
+│ ├── test_models.py
+│ └── test_transformer.py
+└── integration/ # Integration tests (filesystem)
+ └── test_aggregation.py
+```
+
+## Running Tests
+
+### All Tests
+
+```bash
+make test
+```
+
+### Unit Tests Only
+
+```bash
+make test-unit
+```
+
+### Integration Tests Only
+
+```bash
+make test-integration
+```
+
+### Direct pytest
+
+For more control, use pytest directly:
+
+```bash
+# Run specific test file
+python3 -m pytest tests/unit/test_transformer.py -v
+
+# Run specific test function
+python3 -m pytest tests/unit/test_transformer.py::test_rewrite_links -v
+
+# Run with coverage
+python3 -m pytest tests/ --cov=src/aggregation --cov-report=html
+```
+
+## Test Types
+
+### Unit Tests
+
+Test pure functions with no I/O side effects:
+
+- **Link rewriting** (`rewrite_links`) — Transform markdown links
+- **YAML quoting** (`quote_yaml_value`) — Safely quote YAML values
+- **Frontmatter handling** (`ensure_frontmatter`, `parse_frontmatter`) — Parse and manipulate frontmatter
+- **Config loading/saving** — Parse and write configuration files
+- **Model validation** — Data class validation and serialization
+
+Unit tests are fast, isolated, and don't touch the filesystem.
+
+### Integration Tests
+
+Test filesystem operations and the full aggregation workflow:
+
+- **Local repository fetching** — Copy docs from local repos
+- **Markdown file processing** — Transform files in place
+- **Directory transformation** — Restructure directory trees
+- **End-to-end aggregation** — Complete workflow testing
+
+Integration tests are slower and require temporary directories.
+
+## Test Fixtures
+
+Located in `tests/fixtures/`, these provide:
+
+- Sample markdown files
+- Example frontmatter configurations
+- Mock repository structures
+- Configuration file examples
+
+## Adding Tests
+
+### Adding a Unit Test
+
+1. Create or update a test file in `tests/unit/`
+2. Use pytest conventions (`test_*` functions, `Test*` classes)
+3. Use `assert` statements for validation
+
+Example:
+
+```python
+def test_rewrite_links():
+ """Test that links are properly rewritten."""
+ content = "[link](../other/file.md)"
+ result = rewrite_links(content, "repo-name", "path/to/file.md")
+ assert "[link](/projects/repo-name/other/file.md)" in result
+```
+
+### Adding an Integration Test
+
+1. Create or update a test file in `tests/integration/`
+2. Use pytest fixtures for temporary directories
+3. Clean up resources in teardown
+
+Example:
+
+```python
+def test_fetch_local(tmp_path):
+ """Test fetching from local repository."""
+ source = tmp_path / "source"
+ source.mkdir()
+ (source / "docs").mkdir()
+ (source / "docs" / "test.md").write_text("# Test")
+
+ fetcher = DocsFetcher(tmp_path)
+ result = fetcher.fetch(config, tmp_path / "output")
+
+ assert result.success
+ assert (tmp_path / "output" / "test.md").exists()
+```
+
+## Test Coverage
+
+Check test coverage with:
+
+```bash
+python3 -m pytest tests/ --cov=src/aggregation --cov-report=term-missing
+```
+
+Target coverage levels:
+
+- **Unit tests**: >90% coverage of pure functions
+- **Integration tests**: Key workflows covered
+- **Overall**: >80% code coverage
+
+## Best Practices
+
+### Do
+
+- Test one thing per test function
+- Use descriptive test names that explain what is being tested
+- Use fixtures for common setup
+- Keep tests fast and isolated
+- Use parametrize for testing multiple inputs
+- Assert specific outcomes, not just absence of errors
+
+### Don't
+
+- Test implementation details
+- Make tests dependent on each other
+- Use time-based assertions (use mocks instead)
+- Leave temporary files after tests
+- Test third-party library behavior
+
+## Continuous Integration
+
+Tests run automatically on:
+
+- Pull requests
+- Pushes to main/docs-ng branches
+- Scheduled nightly builds
+
+## Debugging Tests
+
+### Run with verbose output
+
+```bash
+python3 -m pytest tests/ -vv
+```
+
+### Stop on first failure
+
+```bash
+python3 -m pytest tests/ -x
+```
+
+### Run failed tests only
+
+```bash
+python3 -m pytest tests/ --lf
+```
+
+### Use pdb debugger
+
+```bash
+python3 -m pytest tests/ --pdb
+```
+
+## Common Issues
+
+### ImportError
+
+Ensure you're in the project root and Python can find the `src` directory:
+
+```bash
+export PYTHONPATH="${PYTHONPATH}:$(pwd)"
+```
+
+### Fixture Not Found
+
+Check that `conftest.py` is in the correct location and properly defines fixtures.
+
+### Integration Tests Failing
+
+Integration tests may fail if:
+
+- Insufficient disk space
+- Permission issues with temp directories
+- Git not available in PATH
+
+## See Also
+
+- [Technical Reference](/reference/supporting_tools/docs-ng/reference/technical) — Source code documentation
+- [Configuration Reference](/reference/supporting_tools/docs-ng/reference/configuration) — Configuration options
+- [Architecture Explanation](/reference/supporting_tools/docs-ng/explanation/architecture) — System design
diff --git a/docs/reference/supporting_tools/docs-ng/tutorials/getting_started.md b/docs/reference/supporting_tools/docs-ng/tutorials/getting_started.md
new file mode 100644
index 0000000..6f79f4e
--- /dev/null
+++ b/docs/reference/supporting_tools/docs-ng/tutorials/getting_started.md
@@ -0,0 +1,152 @@
+---
+title: "Getting Started with docs-ng"
+description: "Step-by-step tutorial for setting up and using the docs-ng documentation aggregation system"
+github_org: gardenlinux
+github_repo: docs-ng
+github_source_path: docs/reference/supporting_tools/docs-ng/tutorials/getting_started.md
+---
+
+# Getting Started with docs-ng
+
+This tutorial will walk you through setting up and using docs-ng to aggregate documentation from multiple repositories.
+
+> **Source Repository:** [gardenlinux/docs-ng](https://github.com/gardenlinux/docs-ng)
+
+## Prerequisites
+
+- Python 3.x
+- pnpm (for VitePress)
+- Git
+
+## Step 1: Clone the Repository
+
+```bash
+git clone https://github.com/gardenlinux/docs-ng.git
+cd docs-ng
+```
+
+## Step 2: Install Dependencies
+
+```bash
+pnpm install
+```
+
+This installs VitePress and other Node.js dependencies needed to build the documentation site.
+
+## Step 3: Aggregate Documentation
+
+### From Remote Repositories (Production)
+
+Aggregate from locked commits in `repos-config.json`:
+
+```bash
+make aggregate
+```
+
+This fetches documentation from the configured repositories at their locked commit hashes.
+
+### From Local Repositories (Development)
+
+For local development, use `repos-config.local.json` with `file://` URLs:
+
+```bash
+make aggregate-local
+```
+
+This copies documentation from local repositories without using git.
+
+## Step 4: Start the Development Server
+
+```bash
+make dev
+```
+
+The documentation site will be available at `http://localhost:5173`.
+
+## Step 5: Make Changes
+
+As you work on documentation in source repositories:
+
+1. Make changes to markdown files in source repos
+2. Run `make aggregate-local` to update the aggregated docs
+3. The dev server will hot-reload automatically
+
+## Step 6: Build for Production
+
+When ready to deploy:
+
+```bash
+make build
+```
+
+This creates a production build in `docs/.vitepress/dist/`.
+
+## Common Workflows
+
+### Updating a Single Repository
+
+```bash
+make aggregate-repo REPO=gardenlinux
+```
+
+### Updating Lock Files
+
+To fetch the latest commits and update `repos-config.json`:
+
+```bash
+make aggregate-update
+```
+
+### Running Tests
+
+```bash
+make test
+```
+
+## Project Structure
+
+After aggregation, your docs directory will look like:
+
+```
+docs/
+├── projects/ # Mirrored repository docs
+│ ├── gardenlinux/
+│ ├── builder/
+│ └── python-gardenlinux-lib/
+├── tutorials/ # Aggregated tutorials
+├── how-to/ # Aggregated guides
+├── explanation/ # Aggregated explanations
+├── reference/ # Aggregated reference
+└── contributing/ # Aggregated contributing docs
+```
+
+## Next Steps
+
+- Learn how to [add new repositories](/reference/supporting_tools/docs-ng/how-to/adding-repos)
+- Understand the [architecture](/reference/supporting_tools/docs-ng/explanation/architecture)
+- Review the [configuration reference](/reference/supporting_tools/docs-ng/reference/configuration)
+
+## Troubleshooting
+
+### Clean Build
+
+If you encounter issues, try a clean build:
+
+```bash
+make clean
+make aggregate
+make dev
+```
+
+### Check Dependencies
+
+Ensure all dependencies are installed:
+
+```bash
+pnpm install
+python3 --version # Should be 3.x
+```
+
+### Verify Configuration
+
+Check that `repos-config.json` or `repos-config.local.json` is properly configured. See the [configuration reference](/reference/supporting_tools/docs-ng/reference/configuration) for details.
diff --git a/docs/reference/supporting_tools/index.md b/docs/reference/supporting_tools/index.md
new file mode 100644
index 0000000..385db0f
--- /dev/null
+++ b/docs/reference/supporting_tools/index.md
@@ -0,0 +1,11 @@
+---
+title: Supporting Tools
+description: Documentation of several Garden Linux related Tools
+order: 1
+---
+
+# Supporting Tools
+
+
+
+---
diff --git a/repos-config.json b/repos-config.json
index 617041c..901e94f 100644
--- a/repos-config.json
+++ b/repos-config.json
@@ -11,10 +11,7 @@
"target_path": "projects/gardenlinux",
"ref": "docs-ng",
"commit": "c2cb572a8773779031dd5aaac75442caf9ee9f32",
- "root_files": [
- "CONTRIBUTING.md",
- "SECURITY.md"
- ],
+ "root_files": ["CONTRIBUTING.md", "SECURITY.md"],
"structure": {
"tutorials": "tutorials",
"how-to": "how-to",
@@ -24,7 +21,7 @@
},
"special_files": {},
"media_directories": [
- ".media"
+ ".media",
"assets",
"_static"
]
@@ -36,11 +33,7 @@
"target_path": "projects/builder",
"ref": "docs-ng",
"commit": "b10476ad8c46130f310e36daa42c6e2c14fb51a9",
- "media_directories": [
- ".media",
- "assets",
- "_static"
- ]
+ "media_directories": [".media", "assets", "_static"]
},
{
"name": "python-gardenlinux-lib",
@@ -50,11 +43,7 @@
"ref": "docs-ng",
"commit": "9142fccc3d83ab51759db7d328fa19166bc1df63",
"structure": "sphinx",
- "media_directories": [
- ".media",
- "assets",
- "_static"
- ]
+ "media_directories": [".media", "assets", "_static"]
}
]
}
diff --git a/src/README.md b/src/README.md
deleted file mode 100644
index f1b1a02..0000000
--- a/src/README.md
+++ /dev/null
@@ -1,73 +0,0 @@
-# Source Code Documentation
-
-## Structure
-
-```
-src/
-├── aggregate.py # CLI entry point
-├── migration_tracker.py # Standalone utility
-└── aggregation/ # Core package
- ├── __init__.py
- ├── models.py # Data classes
- ├── config.py # Config I/O
- ├── fetcher.py # Git + local fetch
- ├── transformer.py # Content transforms
- └── structure.py # Directory transforms
-```
-
-## Modules
-
-### `aggregation/models.py`
-Data classes for type safety:
-- `RepoConfig` — repository configuration
-- `AggregateResult` — fetch result with commit hash
-
-### `aggregation/config.py`
-Configuration file handling:
-- `load_config()` — parse repos-config.json
-- `save_config()` — write updated config (commit locks)
-
-### `aggregation/fetcher.py`
-Repository fetching:
-- `DocsFetcher` — main class
- - `__init__(project_root, update_locks=False)` — initialize with optional commit lock updating
- - `fetch()` — fetch repository and return result with commit hash
- - `_fetch_remote()` — git sparse checkout from remote repository
- - `_fetch_local()` — filesystem copy from local repository
- - `_copy_docs()` — static method to copy docs directory
- - `_copy_root_files()` — static method to copy root-level files (e.g., CONTRIBUTING.md)
-
-### `aggregation/transformer.py`
-Content transformation:
-- `rewrite_links()` — fix markdown links
-- `quote_yaml_value()` — YAML safety
-- `ensure_frontmatter()` — add/fix frontmatter
-- `parse_frontmatter()` — extract metadata
-- `fix_broken_project_links()` — validate links
-
-### `aggregation/structure.py`
-Directory operations:
-- `transform_directory_structure()` — restructure docs based on config
-- `copy_targeted_docs(source_dir, docs_dir, repo_name, media_dirs=None)` — place files via `github_target_path` frontmatter and copy associated media directories
- - Handles nested media dirs (e.g., `tutorials/assets/`) by copying to same relative path
- - Handles root-level media dirs (e.g., `_static/`) by copying to common ancestor of targeted files
-- `process_markdown_file()` — transform single markdown file
-- `process_all_markdown()` — batch process all markdown files in directory
-
-### `aggregate.py`
-CLI orchestration — combines all modules into workflow.
-
-## Usage
-
-```python
-from aggregation import load_config, DocsFetcher, process_all_markdown
-
-# Load config
-repos = load_config("repos-config.json")
-
-# Fetch docs
-fetcher = DocsFetcher(project_root)
-result = fetcher.fetch(repo, output_dir)
-
-# Transform
-process_all_markdown(target_dir, repo_name)
\ No newline at end of file
diff --git a/src/README.md b/src/README.md
new file mode 120000
index 0000000..9841fb0
--- /dev/null
+++ b/src/README.md
@@ -0,0 +1 @@
+../docs/reference/supporting_tools/docs-ng/reference/technical.md
\ No newline at end of file
diff --git a/src/aggregate.py b/src/aggregate.py
index 2425f64..5d439db 100755
--- a/src/aggregate.py
+++ b/src/aggregate.py
@@ -37,7 +37,7 @@ def transform_repo_docs(
# Step 1: Copy files with 'github_target_path:' frontmatter
print(f"\nStep 1: Processing targeted files...")
- copy_targeted_docs(str(source_dir), str(docs_dir), repo_name, repo.media_directories)
+ copy_targeted_docs(str(source_dir), str(docs_dir), repo_name, repo.media_directories, repo.root_files)
# Step 2: Transform project structure
print(f"\nStep 2: Transforming project structure...")
diff --git a/src/aggregation/structure.py b/src/aggregation/structure.py
index 8a39a66..500120a 100644
--- a/src/aggregation/structure.py
+++ b/src/aggregation/structure.py
@@ -83,7 +83,7 @@ def transform_directory_structure(
shutil.copytree(item, target_item, dirs_exist_ok=True)
-def copy_targeted_docs(source_dir: str, docs_dir: str, repo_name: str, media_dirs: Optional[List[str]] = None) -> None:
+def copy_targeted_docs(source_dir: str, docs_dir: str, repo_name: str, media_dirs: Optional[List[str]] = None, root_files: Optional[List[str]] = None) -> None:
"""
Copy markdown files with 'github_target_path:' frontmatter to their specified locations.
Also copies media directories to the common target path of targeted files.
@@ -93,6 +93,7 @@ def copy_targeted_docs(source_dir: str, docs_dir: str, repo_name: str, media_dir
docs_dir: Docs root directory
repo_name: Repository name
media_dirs: List of media directories to copy alongside targeted files
+ root_files: List of root-level files to scan for github_target_path (e.g., README.md)
"""
source_path = Path(source_dir)
docs_path = Path(docs_dir)
@@ -101,8 +102,35 @@ def copy_targeted_docs(source_dir: str, docs_dir: str, repo_name: str, media_dir
print(f" [Warning] Source directory not found: {source_dir}")
return
- # Find all markdown files
+ # Find all markdown files (recursively in source_dir)
md_files = list(source_path.rglob("*.md"))
+
+ # Also check root_files if provided
+ # Note: root_files may have been flattened by the fetcher (e.g., src/README.md -> README.md)
+ # So we need to check both the original path and just the basename
+ if root_files:
+ print(f" Checking {len(root_files)} root_files for github_target_path...")
+ for root_file in root_files:
+ # Try the full path first
+ root_file_path = source_path / root_file
+
+ # If that doesn't exist, try just the basename (in case fetcher flattened it)
+ if not root_file_path.exists():
+ root_file_path = source_path / Path(root_file).name
+
+ print(f" Checking: {root_file} -> {root_file_path}")
+ print(f" Exists: {root_file_path.exists()}, Is file: {root_file_path.is_file() if root_file_path.exists() else 'N/A'}, Ends with .md: {root_file.endswith('.md')}")
+
+ if root_file_path.exists() and root_file_path.is_file() and root_file.endswith('.md'):
+ # Add to list if not already there
+ if root_file_path not in md_files:
+ md_files.append(root_file_path)
+ print(f" ✓ Added to scan list")
+ else:
+ print(f" Already in list")
+ else:
+ print(f" ✗ Skipped")
+
targeted_files = []
print(f" Scanning {len(md_files)} files for 'github_target_path:' frontmatter...")
diff --git a/tests/README.md b/tests/README.md
deleted file mode 100644
index 512338f..0000000
--- a/tests/README.md
+++ /dev/null
@@ -1,59 +0,0 @@
-# Test Suite
-
-## Structure
-
-```
-tests/
-├── conftest.py # pytest configuration
-├── fixtures/ # Test data
-├── unit/ # Unit tests (pure functions)
-│ ├── test_config.py
-│ ├── test_models.py
-│ └── test_transformer.py
-└── integration/ # Integration tests (filesystem)
- └── test_aggregation.py
-```
-
-## Running Tests
-
-```bash
-# All tests
-make test
-
-# Unit tests
-make test-unit
-
-# Integration tests
-make test-integration
-
-# Direct pytest
-python3 -m pytest tests/unit/ -v
-python3 -m pytest tests/integration/ -v
-```
-
-## Test Types
-
-### Unit Tests
-
-Test pure functions with no I/O:
-
-- Link rewriting (`rewrite_links`)
-- YAML quoting (`quote_yaml_value`)
-- Frontmatter handling (`ensure_frontmatter`)
-- Config loading/saving
-- Model validation
-
-### Integration Tests
-
-Test filesystem operations:
-
-- Local repository fetching
-- Markdown file processing
-- Directory transformation
-
-## Adding Tests
-
-1. Unit test: `tests/unit/test_*.py`
-2. Integration test: `tests/integration/test_*.py`
-3. Use pytest conventions: `test_*` functions, `Test*` classes
-4. Use `assert` statements, not custom test runners
diff --git a/tests/README.md b/tests/README.md
new file mode 120000
index 0000000..ece7573
--- /dev/null
+++ b/tests/README.md
@@ -0,0 +1 @@
+../docs/reference/supporting_tools/docs-ng/reference/testing.md
\ No newline at end of file
From 16b6689829305f7d4d712d7e690467b716444a8a Mon Sep 17 00:00:00 2001
From: Eike Waldt
Date: Wed, 1 Apr 2026 15:43:50 +0200
Subject: [PATCH 3/5] docs: exclude pnpm-lock.yaml in spellcheck
Signed-off-by: Eike Waldt
On-behalf-of: SAP
---
.exclude_spelling | 1 +
package.json | 2 +-
2 files changed, 2 insertions(+), 1 deletion(-)
create mode 100644 .exclude_spelling
diff --git a/.exclude_spelling b/.exclude_spelling
new file mode 100644
index 0000000..bd5535a
--- /dev/null
+++ b/.exclude_spelling
@@ -0,0 +1 @@
+pnpm-lock.yaml
diff --git a/package.json b/package.json
index 1e4e5f4..f04e915 100644
--- a/package.json
+++ b/package.json
@@ -5,7 +5,7 @@
"docs:build": "vitepress build docs",
"docs:preview": "vitepress preview docs",
"docs:linkcheck": "lychee \"docs/**/*.md\" --config lychee.toml",
- "docs:spelling": "codespell --ignore-words .custom_wordlist.txt docs",
+ "docs:spelling": "codespell --ignore-words .custom_wordlist.txt docs --exclude-file $(cat .exclude_spelling)",
"docs:woke": "woke docs/"
},
"dependencies": {
From 8ccf174ff030113a40be53c8983cc9cc78060ec9 Mon Sep 17 00:00:00 2001
From: Eike Waldt
Date: Wed, 1 Apr 2026 16:39:30 +0200
Subject: [PATCH 4/5] fix: spelling is now green
Signed-off-by: Eike Waldt
On-behalf-of: SAP
---
.custom_wordlist.txt | 2 ++
repos-config.json | 22 ++++++++++++++--------
2 files changed, 16 insertions(+), 8 deletions(-)
diff --git a/.custom_wordlist.txt b/.custom_wordlist.txt
index e69de29..e464644 100644
--- a/.custom_wordlist.txt
+++ b/.custom_wordlist.txt
@@ -0,0 +1,2 @@
+cna
+hda
diff --git a/repos-config.json b/repos-config.json
index 901e94f..ac63c66 100644
--- a/repos-config.json
+++ b/repos-config.json
@@ -4,14 +4,13 @@
"name": "gardenlinux",
"url": "https://github.com/gardenlinux/gardenlinux",
"docs_path": "docs",
+ "target_path": "projects/gardenlinux",
+ "ref": "docs-ng",
+ "commit": "a915a5a6f0d3f6a65bc4ce24db16d5119448287a",
"root_files": [
"CONTRIBUTING.md",
"SECURITY.md"
],
- "target_path": "projects/gardenlinux",
- "ref": "docs-ng",
- "commit": "c2cb572a8773779031dd5aaac75442caf9ee9f32",
- "root_files": ["CONTRIBUTING.md", "SECURITY.md"],
"structure": {
"tutorials": "tutorials",
"how-to": "how-to",
@@ -19,7 +18,6 @@
"reference": "reference",
"contributing": "contributing"
},
- "special_files": {},
"media_directories": [
".media",
"assets",
@@ -32,8 +30,12 @@
"docs_path": "docs",
"target_path": "projects/builder",
"ref": "docs-ng",
- "commit": "b10476ad8c46130f310e36daa42c6e2c14fb51a9",
- "media_directories": [".media", "assets", "_static"]
+ "commit": "086e74ff032c5f2a05989aef4f20ba69f94bdbf9",
+ "media_directories": [
+ ".media",
+ "assets",
+ "_static"
+ ]
},
{
"name": "python-gardenlinux-lib",
@@ -43,7 +45,11 @@
"ref": "docs-ng",
"commit": "9142fccc3d83ab51759db7d328fa19166bc1df63",
"structure": "sphinx",
- "media_directories": [".media", "assets", "_static"]
+ "media_directories": [
+ ".media",
+ "assets",
+ "_static"
+ ]
}
]
}
From bd1796118d3849b1c8b77a9c2c9e3641d738053c Mon Sep 17 00:00:00 2001
From: Eike Waldt
Date: Wed, 1 Apr 2026 16:58:32 +0200
Subject: [PATCH 5/5] fix: tune lychee linkcheck
Signed-off-by: Eike Waldt
On-behalf-of: SAP
---
.github/workflows/linkcheck.yml | 2 ++
lychee.toml | 28 +++++++++++++++++++++++-----
package.json | 2 +-
repos-config.json | 2 +-
src/aggregation/transformer.py | 4 ++++
5 files changed, 31 insertions(+), 7 deletions(-)
diff --git a/.github/workflows/linkcheck.yml b/.github/workflows/linkcheck.yml
index a577dbf..a44f35d 100644
--- a/.github/workflows/linkcheck.yml
+++ b/.github/workflows/linkcheck.yml
@@ -30,3 +30,5 @@ jobs:
- name: Run link check
uses: lycheeverse/lychee-action@8646ba30535128ac92d33dfc9133794bfdd9b411 # v2.8.0
+ with:
+ args: docs/**/*.md --config lychee.toml --root-dir "$(pwd)/docs/"
diff --git a/lychee.toml b/lychee.toml
index 84ea75d..f0e5da1 100644
--- a/lychee.toml
+++ b/lychee.toml
@@ -1,13 +1,31 @@
# lychee link checker configuration
# https://lychee.cli.rs/
+# Verbosity level: 'error', 'warn', 'info', 'debug', 'trace'
+# Setting to 'error' will hide [WARN] messages
+verbose = "error"
+
# Exclude patterns
exclude = [
- "^https://github\\.com/.*/edit/",
- "^https://github\\.com/.*/new/",
- "^mailto:",
- "^tel:",
- "^javascript:",
+ "^/",
+ "^file://",
+ # Legacy docs that reference non-existent GitHub paths
+ "^https://github\\.com/gardenlinux/gardenlinux/blob/main/legacy/",
+ "^https://github\\.com/gardenlinux/gardenlinux/blob/main/tests/README($|#)",
+ "^https://github\\.com/gardenlinux/gardenlinux/blob/main/features/.*/README\\.md$",
+ "^https://github\\.com/gardenlinux/gardenlinux/blob/main/features/.*/file\\.include/",
+ "^https://github\\.com/gardenlinux/gardenlinux/blob/main/assets/",
+ "^https://github\\.com/gardenlinux/gardenlinux/blob/main/reference/",
+ "^https://github\\.com/gardenlinux/gardenlinux/blob/main/how-to/",
+ "^https://github\\.com/gardenlinux/gardenlinux/blob/main/tutorials/",
+ "^https://github\\.com/gardenlinux/gardenlinux/blob/main/contributing/",
+ "^https://github\\.com/gardenlinux/gardenlinux/blob/main/explanation/",
+ "^https://github\\.com/gardenlinux/gardenlinux/blob/main/mailto:",
+ "^https://github\\.com/gardenlinux/builder/blob/main/reference/features$",
+ "^https://github\\.com/gardenlinux/gardenlinux/tree/main/tests/platformSetup/",
+ "^https://github\\.com/gardenlinux/python-gardenlinux-lib/blob/main/_static/",
+ "^https://github\\.com/gardenlinux/docs-ng/blob/main/src/README\\.md$",
+ "^https://github\\.com/gardenlinux/docs-ng/blob/main/tests/README\\.md$",
]
# Accept these status codes as valid
diff --git a/package.json b/package.json
index f04e915..904ef5b 100644
--- a/package.json
+++ b/package.json
@@ -4,7 +4,7 @@
"docs:dev": "vitepress dev docs",
"docs:build": "vitepress build docs",
"docs:preview": "vitepress preview docs",
- "docs:linkcheck": "lychee \"docs/**/*.md\" --config lychee.toml",
+ "docs:linkcheck": "lychee \"docs/**/*.md\" --config lychee.toml --root-dir \"$(pwd)/docs/\"",
"docs:spelling": "codespell --ignore-words .custom_wordlist.txt docs --exclude-file $(cat .exclude_spelling)",
"docs:woke": "woke docs/"
},
diff --git a/repos-config.json b/repos-config.json
index ac63c66..da5a508 100644
--- a/repos-config.json
+++ b/repos-config.json
@@ -6,7 +6,7 @@
"docs_path": "docs",
"target_path": "projects/gardenlinux",
"ref": "docs-ng",
- "commit": "a915a5a6f0d3f6a65bc4ce24db16d5119448287a",
+ "commit": "421e067d71fd118f58ec5126faedd028e623ad8d",
"root_files": [
"CONTRIBUTING.md",
"SECURITY.md"
diff --git a/src/aggregation/transformer.py b/src/aggregation/transformer.py
index b9319d1..dad2c47 100644
--- a/src/aggregation/transformer.py
+++ b/src/aggregation/transformer.py
@@ -37,6 +37,10 @@ def replace_link(match):
if link.startswith("http://") or link.startswith("https://"):
return match.group(0)
+ # Skip special protocols (mailto, tel, javascript, etc.)
+ if ":" in link and not link.startswith("/") and not link.startswith("./") and not link.startswith("../"):
+ return match.group(0)
+
# Skip anchors
if link.startswith("#"):
return match.group(0)