Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 25 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,12 @@ Configs live under `config/`. The default is `config/orchestration.json`.
],

"Selection": {
"Type": "sequential"
"Type": "keyword",
"DefaultAgent": "Developer",
"Routes": [
{ "Keyword": "HANDOFF TO REVIEWER", "Agent": "Reviewer" },
{ "Keyword": "REVISION REQUIRED", "Agent": "Developer" }
]
},

"Termination": {
Expand Down Expand Up @@ -214,6 +219,7 @@ Both the `FileSystem` and `Shell` plugins enforce `FileSystemSandboxPath` — an
| Type | Description |
|------|-------------|
| `sequential` | Agents take turns in the order they are defined |
| `keyword` | Deterministic routing: scans the last message for keyword substrings and routes to the corresponding agent. Requires `Routes` array. Falls back to `DefaultAgent` (or the first agent) when no keyword matches. Handles conversation compaction by scanning up to 3 recent messages. |
| `llm` | An LLM call picks the next agent each turn. Requires `Prompt` and `Model` |

### Termination strategies
Expand All @@ -240,6 +246,24 @@ Both the `FileSystem` and `Shell` plugins enforce `FileSystemSandboxPath` — an

Add any plugin to an agent by listing its name in the `Plugins` array in the config.

### Agent tool-use enforcement (`FunctionChoice`)

Each agent has a `FunctionChoice` field (default `"auto"`) that maps directly to `tool_choice` in the OpenAI API:

| Value | Behaviour |
|-------|-----------|
| `"auto"` | Model may call tools or respond with text (default) |
| `"required"` | Model **must** call at least one tool per message — use this for action agents (Tester, Developer) to prevent the model from fabricating tool output as plain text instead of actually invoking tools |
| `"none"` | Tools are registered in the kernel but the model is blocked from calling them |

```json
{
"Name": "Tester",
"FunctionChoice": "required",
"Plugins": ["FileSystem", "Shell", "Search"]
}
```

---

## MCP server support
Expand Down
46 changes: 34 additions & 12 deletions config/orchestration.json
Original file line number Diff line number Diff line change
@@ -1,13 +1,25 @@
{
"Orchestration": {
"Name": "SoftwareDevelopmentTeam",
"Description": "A three-agent team: Developer writes code, Tester writes tests, Reviewer approves.",
"Description": "A four-agent team: Planner scopes work, Developer implements, Tester verifies, Reviewer approves.",

"Agents": [
{
"Name": "Planner",
"Description": "Session planner who reads the task and codebase to produce a focused brief for the team.",
"Instructions": "You are a technical project planner. Your job is to read the task and the relevant codebase, then produce a concise brief that scopes the work for the Developer.\n\nFOLLOW THESE STEPS IN ORDER:\n\n1. READ THE TASK: Identify what is being asked. Extract the goal, constraints, and any explicit acceptance criteria.\n\n2. EXPLORE: Use list_files and read_file to identify files most relevant to this task. Read 2-4 key files to understand existing patterns, conventions, and what will need to change.\n\n3. IF THIS IS A RETRY (Reviewer feedback is present in context): Summarize the Reviewer's feedback and prepend it to the brief so the Developer addresses it directly.\n\n4. WRITE THE BRIEF using exactly these sections:\n - **Goal**: one sentence\n - **Files to change**: list with reason\n - **Files to read for context**: list with reason\n - **Acceptance criteria**: bullet list of specific, testable conditions\n - **Constraints**: anything to avoid or preserve\n\n5. HAND OFF: Write HANDOFF TO DEVELOPER on its own line.\n\nRULES:\n- Keep the brief under 30 lines.\n- Be specific: use real file paths, function names, and expected behavior.\n- Do not write any code.",
"Model": {
"ModelId": "grok-4-1-fast-non-reasoning",
"Endpoint": "https://api.x.ai/v1",
"ApiKeyEnvVar": "XAI_API_KEY",
"MaxTokens": 4096
},
"Plugins": ["FileSystem", "Search"]
},
{
"Name": "Developer",
"Description": "Senior software engineer who implements features using tools.",
"Instructions": "You are an expert software developer with access to filesystem, shell, and git tools.\n\nFOLLOW THESE STEPS IN ORDER:\n\n1. EXPLORE: Use list_files and read_file to understand the existing code structure before writing anything. Read at least 2-3 related files to understand patterns and conventions.\n\n2. IMPLEMENT: Use write_file to write the complete new or modified file content. Do not describe what you would write — write it. Never output a diff.\n\n3. VERIFY: Use read_file to confirm the file was written correctly.\n\n4. BUILD/RUN: Use shell_exec to build or run the code and confirm it works. Include the exact output.\n\n5. HAND OFF: When done, write HANDOFF TO TESTER on its own line with a summary of which files changed and what was added.\n\nRULES:\n- Never describe a change without making it with write_file.\n- Never claim success without showing real tool output.\n- If a tool call fails, handle the error and try again.",
"Instructions": "You are an expert software developer with access to filesystem, shell, and git tools.\n\nThe Planner has already explored the codebase and produced a brief. Read it before doing anything else.\n\nFOLLOW THESE STEPS IN ORDER:\n\n1. READ THE BRIEF: The Planner's brief is in the conversation context. Note the goal, files to change, files to read for context, acceptance criteria, and constraints. If the Tester has reported BUGS FOUND, read that report carefully — those are your specific fix targets.\n\n2. READ CONTEXT FILES: Use read_file on the files the Planner listed under 'Files to read for context'. Do not explore beyond what is listed unless a read reveals an unexpected dependency.\n\n3. IMPLEMENT: Use write_file to write the complete new or modified file content. Do not describe what you would write — write it. Never output a diff.\n\n4. VERIFY: Use read_file to confirm the file was written correctly.\n\n5. BUILD/RUN: Use shell_exec to build or run the code and confirm it works. Include the exact output.\n\n6. HAND OFF: Write HANDOFF TO TESTER on its own line with a summary of which files changed and what was added or fixed.\n\nRULES:\n- Never describe a change without making it with write_file.\n- Never claim success without showing real tool output.\n- If a tool call fails, handle the error and try again.\n- If returning from a Tester bug report, address every FAIL item explicitly.",
"Model": {
"ModelId": "grok-4-1-fast-non-reasoning",
"Endpoint": "https://api.x.ai/v1",
Expand All @@ -18,42 +30,52 @@
},
{
"Name": "Tester",
"Description": "QA engineer who independently verifies changes with real tool calls.",
"Instructions": "You are an expert QA engineer. DO NOT trust the Developer's account — verify everything independently.\n\nFOLLOW THESE STEPS IN ORDER:\n\n1. READ THE FILES: Use read_file on every file the Developer claimed to change. Confirm the changes are present. If not, report BLOCKED: changes not written.\n\n2. RUN EXISTING TESTS: Use shell_exec to run the existing test suite. Paste the exact stdout/stderr output.\n\n3. TEST THE NEW BEHAVIOUR: Exercise the new feature end-to-end. If it requires a project context, configuration, or data fixture — create it using write_file and shell_exec first. NEVER accept a guard-clause error (like 'file not found' or 'missing config') as proof the feature works. Set up the environment and run the real code path.\n\n4. HAND OFF: Write HANDOFF TO REVIEWER on its own line with the actual test output pasted verbatim.\n\nRULES:\n- Never claim a test passed without pasting real shell_exec output.\n- Never hand off after only triggering an early-exit guard — that means you did not test the feature.",
"Description": "QA engineer who independently verifies changes with real tool calls and blocks promotion on any failure.",
"Instructions": "You are an expert QA engineer. DO NOT trust the Developer's account — verify everything independently.\n\n⚠️ ANTI-HALLUCINATION RULE — READ THIS FIRST:\nEvery shell output block you write MUST come from a shell_exec call you actually made in this turn.\nBefore you write any line that looks like `$ command` or command output, ask yourself: 'Did I call shell_exec for this?' If the answer is no — STOP. Call shell_exec first. Never write what you think the output should be. The Reviewer will independently re-run your tests; fabricated output will be caught and you will be routed back here.\n\nThe Planner produced an acceptance criteria list. Every criterion must be verified with real tool output. A single FAIL blocks promotion to Reviewer.\n\nFOLLOW THESE STEPS IN ORDER:\n\n1. READ THE BRIEF: Find the Planner's acceptance criteria in the conversation context. These are your test checklist — every item must pass.\n\n2. READ THE FILES: Use read_file on every file the Developer claimed to change. Confirm the changes are present. If any change is missing, immediately write BUGS FOUND on its own line, list what is missing, and stop.\n\n3. RUN EXISTING TESTS: Use shell_exec to run the existing test suite. Paste the exact stdout/stderr output. If any existing test fails, that is a FAIL.\n\n4. TEST EACH ACCEPTANCE CRITERION:\n - For each criterion, call shell_exec (or write_file + shell_exec) to exercise it end-to-end.\n - If the feature requires a project context or data fixture, create it with write_file and shell_exec before running the test.\n - NEVER accept a guard-clause error as proof the feature works. Set up the environment and run the real code path.\n - Record PASS or FAIL for each criterion with the exact shell_exec output — not a summary, the raw output.\n\n5a. IF ALL CRITERIA PASS: Write HANDOFF TO REVIEWER on its own line. Include the full checklist with PASS per item and verbatim shell output for each.\n\n5b. IF ANY CRITERION FAILS: Write BUGS FOUND on its own line. List every FAIL item with the exact error and the file/line where it originates. Do not write HANDOFF TO REVIEWER.\n\nRULES:\n- NEVER write output for a shell command you did not call with shell_exec.\n- NEVER write HANDOFF TO REVIEWER unless every criterion has a real shell_exec output showing PASS.\n- NEVER write HANDOFF TO REVIEWER after only a guard-clause error.",
"Model": {
"ModelId": "grok-4-1-fast-non-reasoning",
"ModelId": "grok-4-1-fast-reasoning",
"Endpoint": "https://api.x.ai/v1",
"ApiKeyEnvVar": "XAI_API_KEY",
"MaxTokens": 16384
"MaxTokens": 8192
},
"FunctionChoice": "required",
"Plugins": ["FileSystem", "Shell", "Search"]
},
{
"Name": "Reviewer",
"Description": "Tech lead who approves only after reading the code and verifying the test evidence.",
"Instructions": "You are a senior tech lead performing a final review.\n\nFOLLOW THESE STEPS IN ORDER:\n\n1. READ THE CODE: Use read_file on the changed files. Do not rely on summaries.\n\n2. REVIEW for: correctness, consistency with existing patterns, error handling, edge cases, and security.\n\n3. VERIFY THE TESTER'S EVIDENCE: The Tester must have run the feature end-to-end — not just triggered a guard clause. If the Tester's only evidence is an early-exit error or a description rather than real shell output, reject it and tell the Tester to run a proper test.\n\n4. DECIDE: If the code is correct and the testing is real, write APPROVED on its own line followed by a 2-3 sentence summary. If anything needs fixing, give specific actionable feedback naming the file and line.",
"Description": "Tech lead who approves only after reading the code, spot-checking with shell, and confirming all acceptance criteria are verified passing.",
"Instructions": "You are a senior tech lead performing a final review.\n\nYou may only write APPROVED if every acceptance criterion from the Planner's brief is marked PASS in the Tester's report AND the Tester provided real shell output for each. No exceptions.\n\nFOLLOW THESE STEPS IN ORDER:\n\n1. READ THE CODE: Use read_file on every changed file. Do not rely on summaries.\n\n2. REVIEW for: correctness, consistency with existing patterns, error handling, edge cases, and security.\n\n3. SPOT-CHECK WITH SHELL: Pick the most critical acceptance criterion (usually 'does the code execute without errors'). Re-run it yourself using shell_exec. If it fails, write REVISION REQUIRED immediately — do not proceed to step 4.\n\n4. AUDIT THE TESTER'S EVIDENCE:\n a. Locate the Planner's acceptance criteria list.\n b. Confirm the Tester has a PASS with real shell output for every criterion.\n c. Check that the Tester's shell output is consistent with the code you read and your own spot-check in step 3. Inconsistencies (output looks invented, or contradicts actual file contents) are grounds for immediate rejection.\n d. If any criterion is missing, marked FAIL, backed only by a guard-clause error, or looks fabricated — write REVISION REQUIRED immediately, name the criterion, and explain what the Tester must re-run.\n\n5. DECIDE — end your response with exactly one of these keywords on its own line:\n - Code correct AND all acceptance criteria verified PASS: write APPROVED followed by a 2-3 sentence summary.\n - Code or tests need fixing (scope unchanged): write REVISION REQUIRED, then give specific actionable feedback naming file and line.\n - Scope or requirements need rethinking: write REPLAN REQUIRED, then describe what the Planner needs to reconsider.",
"Model": {
"ModelId": "grok-4-1-fast-reasoning",
"Endpoint": "https://api.x.ai/v1",
"ApiKeyEnvVar": "XAI_API_KEY",
"MaxTokens": 8192
},
"Plugins": ["FileSystem", "Search"]
"Plugins": ["FileSystem", "Shell", "Search"]
}
],

"Selection": {
"Type": "sequential"
"Type": "keyword",
"DefaultAgent": "Planner",
"Routes": [
{ "Keyword": "HANDOFF TO DEVELOPER", "Agent": "Developer" },
{ "Keyword": "HANDOFF TO TESTER", "Agent": "Tester" },
{ "Keyword": "HANDOFF TO REVIEWER", "Agent": "Reviewer" },
{ "Keyword": "BUGS FOUND", "Agent": "Developer" },
{ "Keyword": "REVISION REQUIRED", "Agent": "Developer" },
{ "Keyword": "REPLAN REQUIRED", "Agent": "Planner" }
]
},

"Termination": {
"Type": "composite",
"MaxIterations": 30,
"MaxIterations": 40,
"Strategies": [
{
"Type": "regex",
"Pattern": "\\bAPPROVED\\b",
"MaxIterations": 30,
"MaxIterations": 40,
"AgentNames": ["Reviewer"]
}
]
Expand Down
8 changes: 7 additions & 1 deletion src/Cli/Commands/ValidateConfigCommand.cs
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,9 @@ public override int Execute(CommandContext context, ValidateConfigSettings setti
else if (string.IsNullOrEmpty(Environment.GetEnvironmentVariable(agent.Model.ApiKeyEnvVar)))
issues.Add(("warning", $"Agent '{agent.Name}': Env var '{agent.Model.ApiKeyEnvVar}' is not set in this shell."));

if (agent.FunctionChoice.ToLowerInvariant() is not ("auto" or "required" or "none"))
issues.Add(("error", $"Agent '{agent.Name}': FunctionChoice '{agent.FunctionChoice}' is invalid. Valid values: auto, required, none."));

if (settings.Strict)
{
var registered = pluginRegistry.RegisteredPlugins
Expand All @@ -111,12 +114,15 @@ public override int Execute(CommandContext context, ValidateConfigSettings setti

// Selection strategy
var selType = config.Selection.Type.ToLowerInvariant();
if (selType is not ("sequential" or "roundrobin" or "llm"))
if (selType is not ("sequential" or "roundrobin" or "llm" or "keyword"))
issues.Add(("error", $"Unknown selection type: '{config.Selection.Type}'."));

if (selType == "llm" && config.Selection.Model is null)
issues.Add(("error", "LLM selection requires Selection.Model to be set."));

if (selType == "keyword" && (config.Selection.Routes is null || config.Selection.Routes.Count == 0))
issues.Add(("error", "Keyword selection requires at least one entry in Routes."));

// Termination strategy
ValidateTermination(config.Termination, config.Agents, issues);

Expand Down
13 changes: 13 additions & 0 deletions src/Core/Models/AgentConfig.cs
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,17 @@ public record AgentConfig
/// </summary>
public List<string> Plugins { get; init; } = [];

/// <summary>
/// Controls how the model uses tools each turn.
/// <list type="bullet">
/// <item><c>auto</c> (default): the model may call tools or respond with text.</item>
/// <item><c>required</c>: the model MUST call at least one tool per message. Use this
/// for action agents (Tester, Developer) to prevent the model from fabricating tool
/// output as plain text instead of actually invoking the tools.</item>
/// <item><c>none</c>: tools are registered but the model is not allowed to call them.</item>
/// </list>
/// Maps to <c>tool_choice</c> in the OpenAI API.
/// </summary>
public string FunctionChoice { get; init; } = "auto";

}
22 changes: 22 additions & 0 deletions src/Core/Models/StrategyConfig.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ public record SelectionStrategyConfig
/// <list type="bullet">
/// <item><c>sequential</c>: round-robin through agents in order (default).</item>
/// <item><c>llm</c>: an LLM call picks the next agent each turn.</item>
/// <item><c>keyword</c>: deterministic routing based on keywords in the last message.</item>
/// </list>
/// </summary>
public string Type { get; init; } = "sequential";
Expand All @@ -24,6 +25,27 @@ public record SelectionStrategyConfig
/// Model config for the LLM-based selection strategy.
/// </summary>
public ModelConfig? Model { get; init; }

/// <summary>
/// Routing rules for the <c>keyword</c> strategy. Evaluated in order; first match wins.
/// </summary>
public List<KeywordRoute>? Routes { get; init; }

/// <summary>
/// Default agent name for the <c>keyword</c> strategy when no keyword matches.
/// Defaults to the first agent in the config.
/// </summary>
public string? DefaultAgent { get; init; }
}

/// <summary>A single keyword → agent routing rule.</summary>
public record KeywordRoute
{
/// <summary>Substring to search for in the last message (case-insensitive).</summary>
public string Keyword { get; init; } = string.Empty;

/// <summary>Agent name to route to when <see cref="Keyword"/> is found.</summary>
public string Agent { get; init; } = string.Empty;
}

/// <summary>
Expand Down
Loading
Loading