From 605869db4d914d3f2bfadc4166b767d74929bdfd Mon Sep 17 00:00:00 2001 From: "Strix (Claude Opus 4.6)" Date: Wed, 25 Mar 2026 01:07:33 +0000 Subject: [PATCH] Add dissonance tracking builtin skill Cross-references journal entries (user_wanted/agent_did) against event logs to detect intent-vs-outcome gaps: action mismatches (claimed silence but sent message), invisible failures (claimed success with errors), scope drift (work volume vs description), and phantom work. Includes analysis script (dissonance_review.py), skill docs, and 22 tests. Co-Authored-By: Claude Opus 4.6 --- .../dissonance-tracking/SKILL.md | 118 +++++++ .../scripts/dissonance_review.py | 334 ++++++++++++++++++ tests/test_dissonance_review.py | 222 ++++++++++++ 3 files changed, 674 insertions(+) create mode 100644 open_strix/builtin_skills/dissonance-tracking/SKILL.md create mode 100644 open_strix/builtin_skills/scripts/dissonance_review.py create mode 100644 tests/test_dissonance_review.py diff --git a/open_strix/builtin_skills/dissonance-tracking/SKILL.md b/open_strix/builtin_skills/dissonance-tracking/SKILL.md new file mode 100644 index 0000000..0c4c6b9 --- /dev/null +++ b/open_strix/builtin_skills/dissonance-tracking/SKILL.md @@ -0,0 +1,118 @@ +--- +name: dissonance-tracking +description: Detect and analyze gaps between what you intended to do and what you actually did, using journal entries cross-referenced with event logs. Use during periodic self-review (ticks, maintenance windows) or when you suspect behavioral drift. Do not use for one-off messaging or real-time decision-making. +allowed-tools: bash powershell read_file +--- + +# Dissonance Tracking + +Systematic detection of intent-vs-outcome gaps. You already capture intent (`user_wanted`, `agent_did`) in every journal entry. This skill teaches you to cross-reference those claims against ground truth (events.jsonl, Discord history) to find where your self-report diverges from reality. + +## Why This Matters + +Self-reports are narratives, not measurements. Common failure modes: + +- **Action mismatch:** Journal says "Silence" but events.jsonl shows a `send_message` in the same session +- **Scope drift:** `user_wanted` asks for one thing, `agent_did` describes three things +- **Invisible failures:** `agent_did` claims success but events show `tool_call_error` in that session +- **Phantom work:** `agent_did` describes actions with no corresponding events +- **Understated action:** Events show significant work not mentioned in `agent_did` + +These aren't moral failures. They're calibration data. A pilot who doesn't cross-check instruments against reality eventually drifts off course. + +## Running a Dissonance Review + +### Quick check (single session) + +Use the analysis script to review the most recent sessions: + +```bash +uv run python .open_strix_builtin_skills/scripts/dissonance_review.py --last 5 +``` + +This compares the last 5 journal entries against their corresponding event logs and reports any gaps. + +### Full review (time window) + +```bash +uv run python .open_strix_builtin_skills/scripts/dissonance_review.py --hours 72 +``` + +Reviews all journal entries from the last 72 hours. + +### Output + +The script writes structured records to `state/dissonance_reviews.jsonl`: + +```json +{ + "timestamp": "2026-03-25T12:00:00+00:00", + "journal_timestamp": "2026-03-25T11:55:00+00:00", + "session_id": "abc123", + "dissonance_type": "action_mismatch", + "journal_claim": "Silence — no response needed", + "event_evidence": "send_message called at 11:56:00", + "severity": "high", + "notes": "" +} +``` + +### Severity levels + +- **high:** Direct contradiction between journal claim and events (said silence, sent message; said success, got error) +- **medium:** Scope mismatch or understated action (did more/less than described) +- **low:** Minor omissions or imprecise language (described 3 of 4 actions taken) + +## Dissonance Types + +### action_mismatch +Journal claims one action, events show a different one. The sharpest signal. + +**Detection:** Compare `agent_did` keywords against session events. If journal says "silence"/"no response"/"no message" but session has `send_message` events, that's a mismatch. If journal says "sent message" but no `send_message` event exists, also a mismatch. + +### scope_drift +Agent did significantly more or less than what was requested or described. + +**Detection:** Count tool calls in session vs complexity described in `agent_did`. Large discrepancy (many tools, brief description OR few tools, elaborate description) suggests drift. + +### invisible_failure +Journal claims success but events show errors in the same session. + +**Detection:** Check for `tool_call_error` events in sessions where `agent_did` doesn't mention any failure. + +### phantom_work +Journal describes actions with no corresponding events. + +**Detection:** Journal references specific tools or file operations but events.jsonl has no matching tool calls in that session. + +### understated_action +Significant event activity not reflected in journal. + +**Detection:** Session has many tool calls, file operations, or messages but journal is minimal. This is the least concerning type — better to understate than overstate — but persistent understatement means your self-model is incomplete. + +## Integration with Other Skills + +**Prediction Review:** Dissonance tracking asks "did I do what I said I did?" Prediction review asks "did reality match what I predicted?" They're complementary — predictions test your world model, dissonance tests your self-model. + +**Introspection:** When dissonance review finds a pattern (e.g., repeatedly understating action in certain channels), use introspection's event queries to dig deeper into the specific sessions. + +**Memory:** If a dissonance pattern is persistent (same type appearing across multiple reviews), update a memory block with the behavioral correction. The pattern itself is the learning. + +## Review Cadence + +Run dissonance review: +- During maintenance ticks (every 12-24 hours) +- After sessions where you feel uncertain about what you did +- When someone corrects your self-report (that's a confirmed dissonance — log it) + +Do NOT run dissonance review: +- Every single session (too noisy, diminishing returns) +- As a real-time decision tool (it's retrospective by design) + +## Interpreting Results + +**Zero dissonance is suspicious.** Either the review window is too short, the detection thresholds are too loose, or you're not doing enough to have gaps. Some dissonance is healthy — it means you're operating in uncertain territory. + +**Persistent patterns matter more than individual events.** One action mismatch is a data point. Five action mismatches in the same channel or context is a behavioral pattern that needs correction. + +**High severity + low frequency = probably fine.** Everyone has off moments. High severity + high frequency = something structural needs to change. diff --git a/open_strix/builtin_skills/scripts/dissonance_review.py b/open_strix/builtin_skills/scripts/dissonance_review.py new file mode 100644 index 0000000..db1b000 --- /dev/null +++ b/open_strix/builtin_skills/scripts/dissonance_review.py @@ -0,0 +1,334 @@ +"""Cross-reference journal entries against event logs to detect intent-vs-outcome gaps. + +Reads logs/journal.jsonl and logs/events.jsonl, compares claims in each journal +entry against the actual events recorded for that session, and outputs structured +dissonance records. +""" +from __future__ import annotations + +import argparse +import json +import re +import sys +from datetime import datetime, timedelta, timezone +from pathlib import Path +from typing import Any + +UTC = timezone.utc + +# Keywords that indicate "no action taken" in agent_did +SILENCE_PATTERNS = re.compile( + r"\b(silence|no\s+(?:message|response|text|reply)\s+(?:sent|needed|warranted))\b", + re.IGNORECASE, +) + +# Keywords that indicate message sending in agent_did +SEND_PATTERNS = re.compile( + r"\b(sent|posted|replied|responded|messaged|relayed|shared)\b", + re.IGNORECASE, +) + +# Keywords that indicate success in agent_did +SUCCESS_PATTERNS = re.compile( + r"\b(completed|done|succeeded|delivered|shipped|fixed|resolved)\b", + re.IGNORECASE, +) + + +def load_jsonl(path: Path) -> list[dict[str, Any]]: + """Load a JSONL file, skipping blank or malformed lines.""" + if not path.exists(): + return [] + entries: list[dict[str, Any]] = [] + for line in path.read_text(encoding="utf-8").splitlines(): + line = line.strip() + if not line: + continue + try: + entries.append(json.loads(line)) + except json.JSONDecodeError: + continue + return entries + + +def parse_timestamp(raw: str) -> datetime: + """Parse an ISO timestamp, normalizing to UTC.""" + dt = datetime.fromisoformat(raw.replace("Z", "+00:00")) + if dt.tzinfo is None: + dt = dt.replace(tzinfo=UTC) + return dt.astimezone(UTC) + + +def events_for_session( + events: list[dict[str, Any]], session_id: str +) -> list[dict[str, Any]]: + """Filter events to a specific session.""" + return [e for e in events if e.get("session_id") == session_id] + + +def detect_action_mismatch( + journal_entry: dict[str, Any], + session_events: list[dict[str, Any]], +) -> list[dict[str, Any]]: + """Detect contradictions between journal claims and actual events.""" + findings: list[dict[str, Any]] = [] + agent_did = str(journal_entry.get("agent_did", "")) + + send_events = [ + e for e in session_events if e.get("tool") == "send_message" + ] + react_events = [e for e in session_events if e.get("tool") == "react"] + + # Claimed silence but sent messages + if SILENCE_PATTERNS.search(agent_did) and send_events: + findings.append({ + "dissonance_type": "action_mismatch", + "journal_claim": _truncate(agent_did, 200), + "event_evidence": ( + f"{len(send_events)} send_message call(s) in session" + ), + "severity": "high", + }) + + # Claimed sending but no send events + if SEND_PATTERNS.search(agent_did) and not send_events and not react_events: + # Only flag if the claim is about sending a message, not just reacting + if not re.search(r"\breact", agent_did, re.IGNORECASE): + findings.append({ + "dissonance_type": "action_mismatch", + "journal_claim": _truncate(agent_did, 200), + "event_evidence": "no send_message or react events in session", + "severity": "high", + }) + + return findings + + +def detect_invisible_failure( + journal_entry: dict[str, Any], + session_events: list[dict[str, Any]], +) -> list[dict[str, Any]]: + """Detect sessions where journal claims success but events show errors.""" + findings: list[dict[str, Any]] = [] + agent_did = str(journal_entry.get("agent_did", "")) + + error_events = [ + e + for e in session_events + if "error" in str(e.get("type", "")).lower() + ] + + if SUCCESS_PATTERNS.search(agent_did) and error_events: + error_types = [e.get("type", "unknown") for e in error_events] + if not re.search(r"\b(error|fail|issue)\b", agent_did, re.IGNORECASE): + findings.append({ + "dissonance_type": "invisible_failure", + "journal_claim": _truncate(agent_did, 200), + "event_evidence": ( + f"{len(error_events)} error event(s): " + f"{', '.join(error_types[:3])}" + ), + "severity": "high", + }) + + return findings + + +def detect_scope_drift( + journal_entry: dict[str, Any], + session_events: list[dict[str, Any]], +) -> list[dict[str, Any]]: + """Detect significant mismatch between event volume and journal description.""" + findings: list[dict[str, Any]] = [] + agent_did = str(journal_entry.get("agent_did", "")) + + tool_calls = [e for e in session_events if e.get("type") == "tool_call"] + description_length = len(agent_did) + + # Many tool calls, very brief description + if len(tool_calls) >= 10 and description_length < 50: + findings.append({ + "dissonance_type": "understated_action", + "journal_claim": _truncate(agent_did, 200), + "event_evidence": ( + f"{len(tool_calls)} tool calls but only " + f"{description_length} chars in agent_did" + ), + "severity": "low", + }) + + # Very few tool calls, elaborate description + if len(tool_calls) <= 1 and description_length > 500: + findings.append({ + "dissonance_type": "phantom_work", + "journal_claim": _truncate(agent_did, 200), + "event_evidence": ( + f"only {len(tool_calls)} tool call(s) but " + f"{description_length} chars describing work done" + ), + "severity": "medium", + }) + + return findings + + +def _truncate(text: str, max_len: int) -> str: + if len(text) <= max_len: + return text + return text[: max_len - 3] + "..." + + +def review_entry( + journal_entry: dict[str, Any], + all_events: list[dict[str, Any]], +) -> list[dict[str, Any]]: + """Run all detectors on a single journal entry.""" + session_id = journal_entry.get("session_id", "") + if not session_id: + return [] + + session_events = events_for_session(all_events, session_id) + if not session_events: + return [] + + findings: list[dict[str, Any]] = [] + findings.extend(detect_action_mismatch(journal_entry, session_events)) + findings.extend(detect_invisible_failure(journal_entry, session_events)) + findings.extend(detect_scope_drift(journal_entry, session_events)) + + now_iso = datetime.now(tz=UTC).isoformat() + for f in findings: + f["timestamp"] = now_iso + f["journal_timestamp"] = journal_entry.get("timestamp", "") + f["session_id"] = session_id + f.setdefault("notes", "") + + return findings + + +def build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + description="Cross-reference journal entries against event logs to detect dissonance.", + ) + group = parser.add_mutually_exclusive_group() + group.add_argument( + "--last", + type=int, + default=None, + help="Review the N most recent journal entries.", + ) + group.add_argument( + "--hours", + type=float, + default=None, + help="Review journal entries from the last N hours.", + ) + parser.add_argument( + "--journal", + default="logs/journal.jsonl", + help="Path to journal JSONL file.", + ) + parser.add_argument( + "--events", + default="logs/events.jsonl", + help="Path to events JSONL file.", + ) + parser.add_argument( + "--output", + default="state/dissonance_reviews.jsonl", + help="Path to write dissonance records.", + ) + parser.add_argument( + "--dry-run", + action="store_true", + help="Print findings to stdout without writing to output file.", + ) + return parser + + +def main() -> None: + args = build_parser().parse_args() + + journal_path = Path(args.journal).expanduser() + events_path = Path(args.events).expanduser() + output_path = Path(args.output).expanduser() + + if not journal_path.is_absolute(): + journal_path = Path.cwd() / journal_path + if not events_path.is_absolute(): + events_path = Path.cwd() / events_path + if not output_path.is_absolute(): + output_path = Path.cwd() / output_path + + journal_entries = load_jsonl(journal_path) + all_events = load_jsonl(events_path) + + if not journal_entries: + print("No journal entries found.", file=sys.stderr) + return + + # Filter entries based on args + if args.last is not None: + journal_entries = journal_entries[-args.last :] + elif args.hours is not None: + cutoff = datetime.now(tz=UTC) - timedelta(hours=args.hours) + journal_entries = [ + e + for e in journal_entries + if "timestamp" in e and parse_timestamp(e["timestamp"]) >= cutoff + ] + else: + # Default: last 10 entries + journal_entries = journal_entries[-10:] + + all_findings: list[dict[str, Any]] = [] + for entry in journal_entries: + findings = review_entry(entry, all_events) + all_findings.extend(findings) + + # Report + if not all_findings: + print(f"No dissonance detected across {len(journal_entries)} journal entries.") + return + + print( + f"Found {len(all_findings)} dissonance(s) across " + f"{len(journal_entries)} journal entries:" + ) + for f in all_findings: + severity = f["severity"].upper() + dtype = f["dissonance_type"] + print(f" [{severity}] {dtype}: {f['event_evidence']}") + + # Summary by type + type_counts: dict[str, int] = {} + for f in all_findings: + t = f["dissonance_type"] + type_counts[t] = type_counts.get(t, 0) + 1 + print("\nBy type:") + for t, count in sorted(type_counts.items(), key=lambda x: -x[1]): + print(f" {t}: {count}") + + severity_counts: dict[str, int] = {} + for f in all_findings: + s = f["severity"] + severity_counts[s] = severity_counts.get(s, 0) + 1 + print("\nBy severity:") + for s in ("high", "medium", "low"): + if s in severity_counts: + print(f" {s}: {severity_counts[s]}") + + if args.dry_run: + print("\n(dry run — not writing to output file)") + return + + # Write findings + output_path.parent.mkdir(parents=True, exist_ok=True) + with output_path.open("a", encoding="utf-8") as fh: + for f in all_findings: + fh.write(json.dumps(f, ensure_ascii=True) + "\n") + print(f"\nAppended {len(all_findings)} record(s) to {output_path}") + + +if __name__ == "__main__": + main() diff --git a/tests/test_dissonance_review.py b/tests/test_dissonance_review.py new file mode 100644 index 0000000..df2d331 --- /dev/null +++ b/tests/test_dissonance_review.py @@ -0,0 +1,222 @@ +"""Tests for the dissonance review script.""" +from __future__ import annotations + +import json +from datetime import datetime, timezone +from pathlib import Path + +import pytest + +from open_strix.builtin_skills.scripts.dissonance_review import ( + detect_action_mismatch, + detect_invisible_failure, + detect_scope_drift, + load_jsonl, + review_entry, +) + +UTC = timezone.utc +NOW = datetime.now(tz=UTC).isoformat() + + +def _journal(agent_did: str, session_id: str = "sess-1") -> dict: + return { + "timestamp": NOW, + "session_id": session_id, + "channel_id": "ch-1", + "user_wanted": "test", + "agent_did": agent_did, + "predictions": "", + } + + +def _event( + event_type: str = "tool_call", + session_id: str = "sess-1", + tool: str | None = None, + **kwargs: object, +) -> dict: + d: dict = {"timestamp": NOW, "type": event_type, "session_id": session_id} + if tool is not None: + d["tool"] = tool + d.update(kwargs) + return d + + +class TestActionMismatch: + """Detect contradictions between journal silence claims and actual sends.""" + + def test_silence_claimed_but_message_sent(self) -> None: + entry = _journal("Silence — no response needed") + events = [_event(tool="send_message")] + findings = detect_action_mismatch(entry, events) + assert len(findings) == 1 + assert findings[0]["dissonance_type"] == "action_mismatch" + assert findings[0]["severity"] == "high" + + def test_no_message_sent_variation(self) -> None: + entry = _journal("No message sent — Tim heads-down") + events = [_event(tool="send_message")] + findings = detect_action_mismatch(entry, events) + assert len(findings) == 1 + + def test_actual_silence_no_finding(self) -> None: + entry = _journal("Silence — no response needed") + events = [_event(tool="read_file")] + findings = detect_action_mismatch(entry, events) + assert len(findings) == 0 + + def test_claimed_send_but_no_events(self) -> None: + entry = _journal("Sent substantive analysis to research channel") + events = [_event(tool="read_file")] + findings = detect_action_mismatch(entry, events) + assert len(findings) == 1 + assert findings[0]["dissonance_type"] == "action_mismatch" + + def test_claimed_send_with_send_event(self) -> None: + entry = _journal("Sent substantive analysis to research channel") + events = [_event(tool="send_message")] + findings = detect_action_mismatch(entry, events) + assert len(findings) == 0 + + def test_claimed_react_with_react_event(self) -> None: + entry = _journal("Reacted with owl emoji") + events = [_event(tool="react")] + findings = detect_action_mismatch(entry, events) + assert len(findings) == 0 + + def test_no_silence_keywords_no_finding(self) -> None: + entry = _journal("Updated state files and committed") + events = [_event(tool="send_message")] + findings = detect_action_mismatch(entry, events) + assert len(findings) == 0 + + +class TestInvisibleFailure: + """Detect sessions where journal claims success but events show errors.""" + + def test_success_claimed_with_errors(self) -> None: + entry = _journal("Completed the update successfully") + events = [ + _event(tool="edit_file"), + _event(event_type="tool_call_error", error_type="permission_denied"), + ] + findings = detect_invisible_failure(entry, events) + assert len(findings) == 1 + assert findings[0]["dissonance_type"] == "invisible_failure" + assert findings[0]["severity"] == "high" + + def test_success_with_acknowledged_error(self) -> None: + entry = _journal("Fixed the error in the config file after initial failure") + events = [ + _event(event_type="tool_call_error", error_type="parse_error"), + _event(tool="edit_file"), + ] + findings = detect_invisible_failure(entry, events) + # Should not flag because agent_did mentions the error + assert len(findings) == 0 + + def test_no_success_keywords_no_finding(self) -> None: + entry = _journal("Tried to update but ran into issues") + events = [ + _event(event_type="tool_call_error", error_type="timeout"), + ] + findings = detect_invisible_failure(entry, events) + assert len(findings) == 0 + + def test_success_no_errors(self) -> None: + entry = _journal("Completed the full migration") + events = [_event(tool="edit_file"), _event(tool="write_file")] + findings = detect_invisible_failure(entry, events) + assert len(findings) == 0 + + +class TestScopeDrift: + """Detect mismatch between event volume and journal description length.""" + + def test_many_tools_brief_description(self) -> None: + entry = _journal("Done.") + events = [_event(tool=f"tool_{i}") for i in range(15)] + findings = detect_scope_drift(entry, events) + assert len(findings) == 1 + assert findings[0]["dissonance_type"] == "understated_action" + assert findings[0]["severity"] == "low" + + def test_few_tools_elaborate_description(self) -> None: + entry = _journal("A" * 600) + events = [_event(tool="read_file")] + findings = detect_scope_drift(entry, events) + assert len(findings) == 1 + assert findings[0]["dissonance_type"] == "phantom_work" + assert findings[0]["severity"] == "medium" + + def test_proportional_no_finding(self) -> None: + entry = _journal("Updated the config and committed changes to git") + events = [_event(tool="edit_file"), _event(tool="bash")] + findings = detect_scope_drift(entry, events) + assert len(findings) == 0 + + +class TestReviewEntry: + """Integration test for the full review pipeline.""" + + def test_multiple_findings_single_entry(self) -> None: + # Silence claimed but message sent AND errors present + entry = _journal("Silence — completed without issues") + events = [ + _event(tool="send_message"), + _event(event_type="tool_call_error", error_type="timeout"), + ] + findings = review_entry(entry, events) + # Should find action_mismatch (silence + send) and invisible_failure (completed + error) + types = {f["dissonance_type"] for f in findings} + assert "action_mismatch" in types + + def test_no_session_id_skipped(self) -> None: + entry = {"agent_did": "test", "timestamp": NOW} + events = [_event()] + findings = review_entry(entry, events) + assert len(findings) == 0 + + def test_no_matching_events_skipped(self) -> None: + entry = _journal("Silence — no response", session_id="sess-1") + events = [_event(session_id="sess-other", tool="send_message")] + findings = review_entry(entry, events) + assert len(findings) == 0 + + def test_findings_have_metadata(self) -> None: + entry = _journal("Silence — no response needed") + events = [_event(tool="send_message")] + findings = review_entry(entry, events) + assert len(findings) == 1 + f = findings[0] + assert "timestamp" in f + assert f["session_id"] == "sess-1" + assert f["journal_timestamp"] == NOW + + +class TestLoadJsonl: + """Test JSONL loading with edge cases.""" + + def test_loads_valid_file(self, tmp_path: Path) -> None: + p = tmp_path / "test.jsonl" + p.write_text('{"a": 1}\n{"b": 2}\n', encoding="utf-8") + result = load_jsonl(p) + assert len(result) == 2 + + def test_skips_blank_lines(self, tmp_path: Path) -> None: + p = tmp_path / "test.jsonl" + p.write_text('{"a": 1}\n\n{"b": 2}\n\n', encoding="utf-8") + result = load_jsonl(p) + assert len(result) == 2 + + def test_skips_malformed_lines(self, tmp_path: Path) -> None: + p = tmp_path / "test.jsonl" + p.write_text('{"a": 1}\nnot json\n{"b": 2}\n', encoding="utf-8") + result = load_jsonl(p) + assert len(result) == 2 + + def test_missing_file_returns_empty(self, tmp_path: Path) -> None: + p = tmp_path / "nonexistent.jsonl" + result = load_jsonl(p) + assert result == []