Unsupervisedcom · nhorton · Mar 9, 2026 · Mar 9, 2026 · Mar 9, 2026 · Mar 9, 2026
diff --git a/.github/workflows/claude-code-test.yml b/.github/workflows/claude-code-test.yml
@@ -436,13 +436,37 @@ jobs:
           echo "Workflow tested: /deepwork fruits full - Executed full fruits workflow (identify + classify)"
           echo ""
 
+      - name: Display status files
+        if: steps.check-key.outputs.has_key == 'true' && always()
+        working-directory: test_project
+        run: |
+          echo "=== Status Files ==="
+          STATUS_DIR=".deepwork/tmp/status/v1"
+          if [ -d "$STATUS_DIR" ]; then
+            echo "--- job_manifest.yml ---"
+            cat "$STATUS_DIR/job_manifest.yml" 2>/dev/null || echo "(not found)"
+            echo ""
+            if [ -d "$STATUS_DIR/sessions" ]; then
+              for f in "$STATUS_DIR/sessions"/*.yml; do
+                echo "--- $(basename "$f") ---"
+                cat "$f"
+                echo ""
+              done
+            else
+              echo "(no session status files)"
+            fi
+          else
+            echo "(status directory not found)"
+          fi
+
       - name: Upload test artifacts
         if: steps.check-key.outputs.has_key == 'true' && always()
         uses: actions/upload-artifact@v4
         with:
           name: claude-code-e2e-outputs
           path: |
             test_project/.deepwork/jobs/fruits/
+            test_project/.deepwork/tmp/status/
             test_project/.claude/skills/deepwork/
             test_project/fruits/identified_fruits.md
             test_project/fruits/classified_fruits.md

diff --git a/doc/architecture.md b/doc/architecture.md
@@ -57,7 +57,8 @@ deepwork/                       # DeepWork tool repository
 │       │       ├── state.py        # Workflow session state management
 │       │       ├── schemas.py      # Pydantic models for I/O
 │       │       ├── quality_gate.py # Quality gate with review agent
-│       │       └── claude_cli.py   # Claude CLI subprocess wrapper
+│       │       ├── claude_cli.py   # Claude CLI subprocess wrapper
+│       │       └── status.py       # Status file writer for external consumers
 │       ├── hooks/              # Hook system and cross-platform wrappers
 │       │   ├── wrapper.py      # Cross-platform input/output normalization
 │       │   ├── claude_hook.sh  # Shell wrapper for Claude Code
@@ -1080,6 +1081,7 @@ class StateManager:
     def get_all_outputs(session_id, agent_id=None) -> dict
     def get_stack(session_id, agent_id=None) -> list[StackEntry]
     def get_stack_depth(session_id, agent_id=None) -> int
+    def get_all_session_data(session_id) -> dict[agent_id, (active_stack, completed_workflows)]
 ```
 
 Session state includes:
@@ -1115,14 +1117,39 @@ The quality gate supports two modes:
 - **External runner** (`evaluate_reviews`): Invokes Claude Code via subprocess to evaluate each review, returns list of failed `ReviewResult` objects
 - **Self-review** (`build_review_instructions_file`): Generates a review instructions file for the agent to spawn a subagent for self-review
 
+### Status Writer (`jobs/mcp/status.py`)
+
+Writes file-based status projections for external consumers (UIs, dashboards, monitoring). Status files are written to `.deepwork/tmp/status/v1/` and are a **stable external interface** — the file format must not change without versioning.
+
+```python
+class StatusWriter:
+    def __init__(self, project_root: Path)
+
+    def write_manifest(self, jobs: list[JobDefinition]) -> None
+        """Write job_manifest.yml with all available jobs, workflows, and steps."""
+
+    def write_session_status(self, session_id: str, state_manager: StateManager, job_loader: Callable) -> None
+        """Write sessions/<session_id>.yml from current state."""
+```
+
+**Output files:**
+- `job_manifest.yml` — catalog of all jobs/workflows/steps, sorted alphabetically
+- `sessions/<session_id>.yml` — per-session workflow execution status including active workflow, step history, and completed/aborted workflows
+
+**Write triggers:**
+- Manifest: MCP server startup, `get_workflows`
+- Session status: `start_workflow`, `finished_step`, `go_to_step`, `abort_workflow`
+
+Status writes are fire-and-forget: failures are logged as warnings and never fail the MCP tool call.
+
 ### Schemas (`jobs/mcp/schemas.py`)
 
 Pydantic models for all tool inputs and outputs:
 - `StartWorkflowInput`, `FinishedStepInput`, `AbortWorkflowInput`, `GoToStepInput`
 - `GetWorkflowsResponse`, `StartWorkflowResponse`, `FinishedStepResponse`, `AbortWorkflowResponse`, `GoToStepResponse`
 - `ActiveStepInfo`, `ExpectedOutput`, `ReviewInfo`, `ReviewResult`, `StackEntry`
 - `JobInfo`, `WorkflowInfo`, `JobLoadErrorInfo`
-- `WorkflowSession`, `StepProgress`
+- `WorkflowSession`, `StepProgress`, `StepHistoryEntry`
 - `QualityGateResult`, `QualityCriteriaResult`
 
 ## MCP Server Registration

diff --git a/specs/deepwork/jobs/JOBS-REQ-010-status-reporting.md b/specs/deepwork/jobs/JOBS-REQ-010-status-reporting.md
@@ -0,0 +1,116 @@
+# JOBS-REQ-010: Status Reporting
+
+## Overview
+
+DeepWork provides a file-based external interface for reporting the current status of jobs and workflow sessions. This allows external tools (UIs, dashboards, monitoring) to read the current state without going through the MCP protocol. Status files are a **stable external interface** — the file format MUST NOT change without versioning.
+
+## Requirements
+
+### JOBS-REQ-010.1: Status Directory Structure
+
+1. Status files MUST be written to `.deepwork/tmp/status/v1/` under the project root.
+2. The job manifest MUST be written to `.deepwork/tmp/status/v1/job_manifest.yml`.
+3. Per-session status files MUST be written to `.deepwork/tmp/status/v1/sessions/<session_id>.yml`.
+4. The status directory structure MUST be versioned (currently `v1`) to allow future format changes.
+
+### JOBS-REQ-010.2: Job Manifest
+
+1. `job_manifest.yml` MUST contain a `jobs` array of all available job definitions.
+2. Each job entry MUST include `name`, `display_name`, `summary`, and `workflows`.
+3. Each workflow entry MUST include `name`, `display_name`, `summary`, and `steps`.
+4. Each step entry MUST include `name` and `display_name`.
+5. Jobs MUST be sorted alphabetically by `name`.
+6. Workflows within each job MUST be sorted alphabetically by `name`.
+
+### JOBS-REQ-010.3: Job Manifest Write Triggers
+
+1. The manifest MUST be written at MCP server startup.
+2. The manifest MUST be written when `get_workflows` is called.
+
+### JOBS-REQ-010.4: Display Name Derivation
+
+1. `display_name` MUST be derived from the API name by replacing underscores and hyphens with spaces, then title-casing the result.
+2. An empty API name MUST produce an empty display name.
+
+### JOBS-REQ-010.5: Session Status Format
+
+1. Each session status file MUST include `session_id`, `last_updated_at`, `active_workflow`, and `workflows`.
+2. `active_workflow` MUST be the `workflow_instance_id` of the top-of-stack workflow on the main stack, or `null` if no active workflow.
+3. `last_updated_at` MUST be an ISO 8601 timestamp in UTC.
+4. `workflows` MUST be an array of all workflow instances (active, completed, and aborted) for the session.
+5. Each workflow entry MUST include `workflow_instance_id`, `job_name`, `status`, `workflow` (definition snapshot), `agent_id`, and `steps` (ordered history).
+
+### JOBS-REQ-010.6: Session Status Write Triggers
+
+1. Session status MUST be written when `start_workflow` is called.
+2. Session status MUST be written when `finished_step` is called (for all result statuses: needs_work, next_step, workflow_complete).
+3. Session status MUST be written when `go_to_step` is called.
+4. Session status MUST be written when `abort_workflow` is called.
+
+### JOBS-REQ-010.7: Workflow Instance ID
+
+1. Each WorkflowSession MUST have a `workflow_instance_id` field.
+2. `workflow_instance_id` MUST be generated as `uuid4().hex` (32 hex characters).
+3. `workflow_instance_id` MUST be generated via a default factory so existing state files without the field are backward-compatible.
+4. `workflow_instance_id` MUST be unique across all workflow instances.
+
+### JOBS-REQ-010.8: Step History
+
+1. Each WorkflowSession MUST maintain a `step_history` list of `StepHistoryEntry` objects.
+2. `start_step()` MUST append a new `StepHistoryEntry` with `step_id` and `started_at`.
+3. `complete_step()` MUST update the last matching `StepHistoryEntry`'s `finished_at`.
+4. `go_to_step()` followed by `start_step()` MUST create a new history entry, resulting in the same step appearing multiple times in history.
+5. Step history entries MUST NOT be cleared by `go_to_step()` (only `step_progress` is cleared).
+
+### JOBS-REQ-010.9: Sub-Workflow Instance Tracking
+
+1. `StepProgress` MUST have a `sub_workflow_instance_ids` list field.
+2. `StepHistoryEntry` MUST have a `sub_workflow_instance_ids` list field.
+3. When a nested workflow is started (parent exists on same stack), the child's `workflow_instance_id` MUST be appended to the parent's current step's `sub_workflow_instance_ids` in both `step_progress` and `step_history`.
+4. When a cross-agent sub-workflow is started (agent_id set, parent on main stack), the child's `workflow_instance_id` MUST also be recorded on the main stack parent's current step.
+
+### JOBS-REQ-010.10: Completed/Aborted Workflow Preservation
+
+1. State files MUST support a `completed_workflows` array alongside `workflow_stack`.
+2. `complete_workflow()` MUST move the completed session from `workflow_stack` to `completed_workflows`.
+3. `abort_workflow()` MUST move the aborted session from `workflow_stack` to `completed_workflows`.
+4. `_write_stack()` MUST preserve existing `completed_workflows` when the parameter is not explicitly provided.
+5. Multiple completed/aborted workflows MUST accumulate in the `completed_workflows` array.
+
+### JOBS-REQ-010.11: Session Data Retrieval
+
+1. `get_all_session_data()` MUST scan the session directory for `state.json` and `agent_*.json` files.
+2. `get_all_session_data()` MUST return a dict mapping agent_id (None for main) to (active_stack, completed_workflows) tuples.
+3. `get_all_session_data()` MUST return an empty dict for non-existent sessions.
+
+### JOBS-REQ-010.12: Fire-and-Forget Semantics
+
+1. Status writing failures MUST be logged as warnings.
+2. Status writing failures MUST NOT cause the MCP tool call to fail.
+3. Status writing MUST NOT block or delay the tool response.
+
+### JOBS-REQ-010.13: External Interface Stability
+
+1. The file format of `job_manifest.yml` and `sessions/<session_id>.yml` MUST be treated as a stable external contract.
+2. Field additions MAY be made (backward-compatible).
+3. Field removals, renames, or semantic changes MUST NOT be made without incrementing the version path (e.g., `v2/`).
+
+## Test Coverage
+
+| Requirement | Test File | Test Name |
+|-------------|-----------|-----------|
+| JOBS-REQ-010.1 | test_status.py | TestWriteManifest::test_creates_manifest_file |
+| JOBS-REQ-010.2 | test_status.py | TestWriteManifest::test_manifest_structure |
+| JOBS-REQ-010.3.1 | test_tools.py | TestStatusWriterIntegration::test_startup_writes_manifest |
+| JOBS-REQ-010.3.2 | test_tools.py | TestStatusWriterIntegration::test_get_workflows_writes_manifest |
+| JOBS-REQ-010.4 | test_status.py | TestDeriveDisplayName::* |
+| JOBS-REQ-010.5 | test_status.py | TestWriteSessionStatus::test_session_status_structure, test_last_updated_at_is_iso8601_utc |
+| JOBS-REQ-010.6 | test_tools.py | TestStatusWriterIntegration::test_start_workflow_writes_session_status, test_finished_step_writes_session_status, test_abort_workflow_writes_session_status |
+| JOBS-REQ-010.7 | test_state.py | TestWorkflowInstanceId::* |
+| JOBS-REQ-010.8 | test_state.py | TestStepHistory::* |
+| JOBS-REQ-010.9 | test_state.py | TestSubWorkflowInstanceIds::* (incl. test_cross_agent_sub_workflow_records_on_main_stack) |
+| JOBS-REQ-010.10 | test_state.py | TestCompletedWorkflows::* (incl. test_write_stack_preserves_completed_workflows) |
+| JOBS-REQ-010.11 | test_state.py | TestGetAllSessionData::* |
+| JOBS-REQ-010.12.1, .12.2 | test_tools.py | TestStatusWriterIntegration::test_status_writer_failure_does_not_break_tool |
+| JOBS-REQ-010.12.3 | (Code review — synchronous write is acceptable since it's fire-and-forget) |
+| JOBS-REQ-010.13 | (Manual review — structural contract) |
diff --git a/src/deepwork/jobs/mcp/schemas.py b/src/deepwork/jobs/mcp/schemas.py
@@ -8,6 +8,7 @@
 
 from enum import StrEnum
 from typing import Any
+from uuid import uuid4
 
 from pydantic import BaseModel, Field
 
@@ -375,6 +376,22 @@ class StepProgress(BaseModel):
     )
     notes: str | None = Field(default=None, description="Notes from agent")
     quality_attempts: int = Field(default=0, description="Number of quality gate attempts")
+    sub_workflow_instance_ids: list[str] = Field(
+        default_factory=list,
+        description="Instance IDs of sub-workflows started from this step",
+    )
+
+
+class StepHistoryEntry(BaseModel):
+    """An entry in the step execution history."""
+
+    step_id: str = Field(description="Step identifier")
+    started_at: str | None = Field(default=None, description="ISO timestamp when started")
+    finished_at: str | None = Field(default=None, description="ISO timestamp when finished")
+    sub_workflow_instance_ids: list[str] = Field(
+        default_factory=list,
+        description="Instance IDs of sub-workflows started during this step execution",
+    )
 
 
 class WorkflowSession(BaseModel):
@@ -386,6 +403,10 @@ class WorkflowSession(BaseModel):
             "This is the same session ID the agent received at startup."
         )
     )
+    workflow_instance_id: str = Field(
+        default_factory=lambda: uuid4().hex,
+        description="Unique identifier for this workflow instance",
+    )
     job_name: str = Field(description="Name of the job")
     workflow_name: str = Field(description="Name of the workflow")
     goal: str = Field(description="User's goal for this workflow")
@@ -396,6 +417,9 @@ class WorkflowSession(BaseModel):
     step_progress: dict[str, StepProgress] = Field(
         default_factory=dict, description="Progress for each step"
     )
+    step_history: list[StepHistoryEntry] = Field(
+        default_factory=list, description="Ordered history of step executions"
+    )
     started_at: str = Field(description="ISO timestamp when session started")
     completed_at: str | None = Field(default=None, description="ISO timestamp when completed")
     status: str = Field(default="active", description="Session status: active, completed, aborted")

diff --git a/src/deepwork/jobs/mcp/server.py b/src/deepwork/jobs/mcp/server.py
@@ -29,6 +29,7 @@
     StartWorkflowInput,
 )
 from deepwork.jobs.mcp.state import StateManager
+from deepwork.jobs.mcp.status import StatusWriter
 from deepwork.jobs.mcp.tools import WorkflowTools
 
 # Configure logging
@@ -97,14 +98,23 @@ def create_server(
             # Self-review mode: no CLI, always reference files by path (0 inline)
             quality_gate = QualityGate(cli=None, max_inline_files=0)
 
+    status_writer = StatusWriter(project_path)
+
     tools = WorkflowTools(
         project_root=project_path,
         state_manager=state_manager,
         quality_gate=quality_gate,
         max_quality_attempts=quality_gate_max_attempts,
         external_runner=external_runner,
+        status_writer=status_writer,
     )
 
+    # Write initial manifest at startup
+    try:
+        tools._write_manifest()
+    except Exception:
+        logger.warning("Failed to write initial job manifest", exc_info=True)
+
     # Create MCP server
     mcp = FastMCP(
         name="deepwork",