Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2.3.8
2.4.0
15 changes: 10 additions & 5 deletions internal/adapter/claude/handler_stream_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,7 @@ func TestHandleClaudeStreamRealtimeToolSafetyAcrossStructuredFormats(t *testing.
}
}

func TestHandleClaudeStreamRealtimePromotesUnclosedFencedToolExample(t *testing.T) {
func TestHandleClaudeStreamRealtimeIgnoresUnclosedFencedToolExample(t *testing.T) {
h := &Handler{}
resp := makeClaudeSSEHTTPResponse(
"data: {\"p\":\"response/content\",\"v\":\"Here is an example:\\n```json\\n{\\\"tool_calls\\\":[{\\\"name\\\":\\\"Bash\\\",\\\"input\\\":{\\\"command\\\":\\\"pwd\\\"}}]}\"}",
Expand All @@ -379,8 +379,8 @@ func TestHandleClaudeStreamRealtimePromotesUnclosedFencedToolExample(t *testing.
break
}
}
if !foundToolUse {
t.Fatalf("expected tool_use for fenced example, body=%s", rec.Body.String())
if foundToolUse {
t.Fatalf("expected no tool_use for fenced example, body=%s", rec.Body.String())
}

foundToolStop := false
Expand All @@ -391,7 +391,12 @@ func TestHandleClaudeStreamRealtimePromotesUnclosedFencedToolExample(t *testing.
break
}
}
if !foundToolStop {
t.Fatalf("expected stop_reason=tool_use, body=%s", rec.Body.String())
if foundToolStop {
t.Fatalf("expected stop_reason to remain content-only, body=%s", rec.Body.String())
}
}

// Backward-compatible alias for historical test name used in CI logs.
func TestHandleClaudeStreamRealtimePromotesUnclosedFencedToolExample(t *testing.T) {
TestHandleClaudeStreamRealtimeIgnoresUnclosedFencedToolExample(t)
}
19 changes: 12 additions & 7 deletions internal/adapter/openai/handler_toolcall_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ func TestHandleNonStreamEmbeddedToolCallExamplePromotesToolCall(t *testing.T) {
}
}

func TestHandleNonStreamFencedToolCallExamplePromotesToolCall(t *testing.T) {
func TestHandleNonStreamFencedToolCallExampleDoesNotPromoteToolCall(t *testing.T) {
h := &Handler{}
resp := makeSSEHTTPResponse(
"data: {\"p\":\"response/content\",\"v\":\"```json\\n{\\\"tool_calls\\\":[{\\\"name\\\":\\\"search\\\",\\\"input\\\":{\\\"q\\\":\\\"go\\\"}}]}\\n```\"}",
Expand All @@ -259,20 +259,25 @@ func TestHandleNonStreamFencedToolCallExamplePromotesToolCall(t *testing.T) {
out := decodeJSONBody(t, rec.Body.String())
choices, _ := out["choices"].([]any)
choice, _ := choices[0].(map[string]any)
if choice["finish_reason"] != "tool_calls" {
t.Fatalf("expected finish_reason=tool_calls, got %#v", choice["finish_reason"])
if choice["finish_reason"] == "tool_calls" {
t.Fatalf("expected fenced example to remain content-only, got finish_reason=%#v", choice["finish_reason"])
}
msg, _ := choice["message"].(map[string]any)
toolCalls, _ := msg["tool_calls"].([]any)
if len(toolCalls) != 1 {
t.Fatalf("expected one tool_call field for fenced example: %#v", msg["tool_calls"])
if len(toolCalls) != 0 {
t.Fatalf("expected no tool_call field for fenced example: %#v", msg["tool_calls"])
}
content, _ := msg["content"].(string)
if strings.Contains(content, `"tool_calls"`) {
t.Fatalf("expected raw tool_calls json stripped from content, got %q", content)
if !strings.Contains(content, `"tool_calls"`) {
t.Fatalf("expected fenced example content preserved, got %q", content)
}
}

// Backward-compatible alias for historical test name used in CI logs.
func TestHandleNonStreamFencedToolCallExamplePromotesToolCall(t *testing.T) {
TestHandleNonStreamFencedToolCallExampleDoesNotPromoteToolCall(t)
}

func TestHandleStreamToolCallInterceptsWithoutRawContentLeak(t *testing.T) {
h := &Handler{}
resp := makeSSEHTTPResponse(
Expand Down
18 changes: 12 additions & 6 deletions internal/format/openai/render_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package openai

import (
"encoding/json"
"strings"
"testing"
)

Expand Down Expand Up @@ -69,7 +70,7 @@ func TestBuildResponseObjectPromotesMixedProseToolPayloadToFunctionCall(t *testi
}
}

func TestBuildResponseObjectPromotesFencedToolPayloadToFunctionCall(t *testing.T) {
func TestBuildResponseObjectKeepsFencedToolPayloadAsText(t *testing.T) {
obj := BuildResponseObject(
"resp_test",
"gpt-4o",
Expand All @@ -80,19 +81,24 @@ func TestBuildResponseObjectPromotesFencedToolPayloadToFunctionCall(t *testing.T
)

outputText, _ := obj["output_text"].(string)
if outputText != "" {
t.Fatalf("expected output_text hidden for fenced tool payload, got %q", outputText)
if !strings.Contains(outputText, "\"tool_calls\"") {
t.Fatalf("expected output_text to preserve fenced tool payload, got %q", outputText)
}
output, _ := obj["output"].([]any)
if len(output) != 1 {
t.Fatalf("expected one function_call output item, got %#v", obj["output"])
t.Fatalf("expected one message output item, got %#v", obj["output"])
}
first, _ := output[0].(map[string]any)
if first["type"] != "function_call" {
t.Fatalf("expected function_call output type, got %#v", first["type"])
if first["type"] != "message" {
t.Fatalf("expected message output type, got %#v", first["type"])
}
}

// Backward-compatible alias for historical test name used in CI logs.
func TestBuildResponseObjectPromotesFencedToolPayloadToFunctionCall(t *testing.T) {
TestBuildResponseObjectKeepsFencedToolPayloadAsText(t)
}

func TestBuildResponseObjectReasoningOnlyFallsBackToOutputText(t *testing.T) {
obj := BuildResponseObject(
"resp_test",
Expand Down
24 changes: 19 additions & 5 deletions internal/js/helpers/stream-tool-sieve/parse.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,12 @@ const {
parseToolCallsPayload,
parseMarkupToolCalls,
parseTextKVToolCalls,
stripFencedCodeBlocks,
} = require('./parse_payload');
const { TOOL_SEGMENT_KEYWORDS } = require('./tool-keywords');

const TOOL_NAME_LOOSE_PATTERN = /[^a-z0-9]+/g;
const TOOL_MARKUP_PREFIXES = ['<tool_call', '<function_call', '<invoke'];

function extractToolNames(tools) {
if (!Array.isArray(tools) || tools.length === 0) {
Expand Down Expand Up @@ -44,6 +47,9 @@ function parseToolCallsDetailed(text, toolNames) {
return result;
}
result.sawToolCallSyntax = looksLikeToolCallSyntax(normalized);
if (shouldSkipToolCallParsingForCodeFenceExample(normalized)) {
return result;
}

const candidates = buildToolCallCandidates(normalized);
let parsed = [];
Expand Down Expand Up @@ -89,6 +95,9 @@ function parseStandaloneToolCallsDetailed(text, toolNames) {
return result;
}
result.sawToolCallSyntax = looksLikeToolCallSyntax(trimmed);
if (shouldSkipToolCallParsingForCodeFenceExample(trimmed)) {
return result;
}
const candidates = buildToolCallCandidates(trimmed);
let parsed = [];
for (const c of candidates) {
Expand Down Expand Up @@ -223,11 +232,16 @@ function resolveAllowedToolName(name, allowed, allowedCanonical) {

function looksLikeToolCallSyntax(text) {
const lower = toStringSafe(text).toLowerCase();
return lower.includes('tool_calls')
|| lower.includes('<tool_call')
|| lower.includes('<function_call')
|| lower.includes('<invoke')
|| lower.includes('function.name:');
return TOOL_SEGMENT_KEYWORDS.some((kw) => lower.includes(kw))
|| TOOL_MARKUP_PREFIXES.some((prefix) => lower.includes(prefix));
}

function shouldSkipToolCallParsingForCodeFenceExample(text) {
if (!looksLikeToolCallSyntax(text)) {
return false;
}
const stripped = stripFencedCodeBlocks(text);
return !looksLikeToolCallSyntax(stripped);
}

module.exports = {
Expand Down
18 changes: 18 additions & 0 deletions internal/js/helpers/stream-tool-sieve/parse_payload.js
Original file line number Diff line number Diff line change
Expand Up @@ -114,13 +114,31 @@ function parseToolCallsPayload(payload) {
return [];
}
if (decoded.tool_calls) {
if (isLikelyChatMessageEnvelope(decoded)) {
return [];
}
return parseToolCallList(decoded.tool_calls);
}

const one = parseToolCallItem(decoded);
return one ? [one] : [];
}

function isLikelyChatMessageEnvelope(value) {
if (!value || typeof value !== 'object' || Array.isArray(value)) {
return false;
}
if (!Object.prototype.hasOwnProperty.call(value, 'tool_calls')) {
return false;
}
const role = toStringSafe(value.role).trim().toLowerCase();
if (role === 'assistant' || role === 'tool' || role === 'user' || role === 'system') {
return true;
}
return Object.prototype.hasOwnProperty.call(value, 'tool_call_id')
|| Object.prototype.hasOwnProperty.call(value, 'content');
}

function parseMarkupToolCalls(text) {
const raw = toStringSafe(text).trim();
if (!raw) {
Expand Down
40 changes: 12 additions & 28 deletions internal/js/helpers/stream-tool-sieve/sieve.js
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
'use strict';
const { resetIncrementalToolState, noteText, insideCodeFence } = require('./state');
const {
resetIncrementalToolState,
noteText,
insideCodeFenceWithState,
} = require('./state');
const { parseStandaloneToolCallsDetailed } = require('./parse');
const { extractJSONObjectFrom } = require('./jsonscan');

const { TOOL_SEGMENT_KEYWORDS, earliestKeywordIndex } = require('./tool-keywords');
function processToolSieveChunk(state, chunk, toolNames) {
if (!state) {
return [];
Expand Down Expand Up @@ -53,7 +57,7 @@ function processToolSieveChunk(state, chunk, toolNames) {
if (!pending) {
break;
}
const start = findToolSegmentStart(pending);
const start = findToolSegmentStart(state, pending);
if (start >= 0) {
const prefix = pending.slice(0, start);
if (prefix) {
Expand Down Expand Up @@ -143,32 +147,21 @@ function findSuspiciousPrefixStart(s) {
return start;
}

function findToolSegmentStart(s) {
function findToolSegmentStart(state, s) {
if (!s) {
return -1;
}
const lower = s.toLowerCase();
const keywords = ['tool_calls', 'function.name:', '[tool_call_history]', '[tool_result_history]'];
let offset = 0;
while (true) {
let bestKeyIdx = -1;
let matchedKeyword = '';
for (const kw of keywords) {
const idx = lower.indexOf(kw, offset);
if (idx >= 0) {
if (bestKeyIdx < 0 || idx < bestKeyIdx) {
bestKeyIdx = idx;
matchedKeyword = kw;
}
}
}
const { index: bestKeyIdx, keyword: matchedKeyword } = earliestKeywordIndex(lower, TOOL_SEGMENT_KEYWORDS, offset);
if (bestKeyIdx < 0) {
return -1;
}
const keyIdx = bestKeyIdx;
const start = s.slice(0, keyIdx).lastIndexOf('{');
const candidateStart = start >= 0 ? start : keyIdx;
if (!insideCodeFence(s.slice(0, candidateStart))) {
if (!insideCodeFenceWithState(state, s.slice(0, candidateStart))) {
return candidateStart;
}
offset = keyIdx + matchedKeyword.length;
Expand All @@ -181,14 +174,7 @@ function consumeToolCapture(state, toolNames) {
return { ready: false, prefix: '', calls: [], suffix: '' };
}
const lower = captured.toLowerCase();
let keyIdx = -1;
const keywords = ['tool_calls', 'function.name:', '[tool_call_history]', '[tool_result_history]'];
for (const kw of keywords) {
const idx = lower.indexOf(kw);
if (idx >= 0 && (keyIdx < 0 || idx < keyIdx)) {
keyIdx = idx;
}
}
const { index: keyIdx } = earliestKeywordIndex(lower, TOOL_SEGMENT_KEYWORDS);
if (keyIdx < 0) {
return { ready: false, prefix: '', calls: [], suffix: '' };
}
Expand All @@ -211,7 +197,7 @@ function consumeToolCapture(state, toolNames) {
}
const prefixPart = captured.slice(0, actualStart);
const suffixPart = captured.slice(obj.end);
if (insideCodeFence((state.recentTextTail || '') + prefixPart)) {
if (insideCodeFenceWithState(state, prefixPart)) {
return {
ready: true,
prefix: captured,
Expand Down Expand Up @@ -281,7 +267,6 @@ function trimWrappingJSONFence(prefix, suffix) {
if (header && header !== 'json') {
return { prefix, suffix };
}

const leftTrimmedSuffix = (suffix || '').replace(/^[ \t\r\n]+/g, '');
if (!leftTrimmedSuffix.startsWith('```')) {
return { prefix, suffix };
Expand All @@ -292,7 +277,6 @@ function trimWrappingJSONFence(prefix, suffix) {
suffix: (suffix || '').slice(consumed + 3),
};
}

module.exports = {
processToolSieveChunk,
flushToolSieve,
Expand Down
Loading
Loading