From 542ca6ee2fe835d5dfda0e81329724b42ac37d2e Mon Sep 17 00:00:00 2001 From: SurviveM <254925152+SurviveM@users.noreply.github.com> Date: Fri, 3 Apr 2026 00:42:33 +0800 Subject: [PATCH] fix: scan full session files for keyword hits --- cli.js | 170 ++++++++++++++++++++++--------- tests/e2e/test-session-search.js | 26 ++++- tests/e2e/test-setup.js | 31 ++++++ 3 files changed, 179 insertions(+), 48 deletions(-) diff --git a/cli.js b/cli.js index 3bd11a2..8a35576 100644 --- a/cli.js +++ b/cli.js @@ -4770,25 +4770,7 @@ function extractMessageFromRecord(record, source) { return { role, text }; } -function scanSessionContentForQuery(session, tokens, options = {}) { - if (!session || !Array.isArray(tokens) || tokens.length === 0) { - return { hit: false, count: 0, snippets: [] }; - } - - const filePath = resolveSessionFilePath(session.source, session.filePath, session.sessionId); - if (!filePath) { - return { hit: false, count: 0, snippets: [] }; - } - - const maxBytes = Number.isFinite(Number(options.maxBytes)) - ? Math.max(1024, Number(options.maxBytes)) - : SESSION_CONTENT_READ_BYTES; - const headText = getFileHeadText(filePath, maxBytes); - if (!headText) { - return { hit: false, count: 0, snippets: [] }; - } - - const records = parseJsonlContent(headText); +function createSessionQueryScanState(tokens, options = {}) { const mode = normalizeQueryMode(options.mode); const roleFilter = normalizeRoleFilter(options.roleFilter); const maxMatches = Number.isFinite(Number(options.maxMatches)) @@ -4798,43 +4780,137 @@ function scanSessionContentForQuery(session, tokens, options = {}) { ? Math.max(0, Number(options.snippetLimit)) : 0; - const messages = []; + return { + tokens, + mode, + roleFilter, + maxMatches, + snippetLimit, + count: 0, + snippets: [], + leadingSystem: roleFilter !== 'system' + }; +} + +function consumeSessionQueryMessage(state, message) { + if (!state || typeof state !== 'object' || !message) { + return false; + } + + const role = normalizeRole(message.role); + const text = typeof message.text === 'string' ? message.text : ''; + if (!role || !text) { + return false; + } + + if (state.leadingSystem && (role === 'system' || isBootstrapLikeText(text))) { + return false; + } + state.leadingSystem = false; + + if (state.roleFilter !== 'all' && role !== state.roleFilter) { + return false; + } + if (!matchTokensInText(text, state.tokens, state.mode)) { + return false; + } + + state.count += 1; + if (state.snippetLimit > 0 && state.snippets.length < state.snippetLimit) { + state.snippets.push(truncateText(text)); + } + return state.count >= state.maxMatches; +} + +function buildSessionQueryScanResult(state) { + return { + hit: !!(state && state.count > 0), + count: state && Number.isFinite(state.count) ? state.count : 0, + snippets: state && Array.isArray(state.snippets) ? state.snippets : [] + }; +} + +function scanSessionContentForQueryInRecords(records, source, state) { + if (!Array.isArray(records) || !state) { + return buildSessionQueryScanResult(state); + } + for (const record of records) { - const message = extractMessageFromRecord(record, session.source); - if (!message || !message.text) { + const message = extractMessageFromRecord(record, source); + if (!message) { continue; } - messages.push(message); + if (consumeSessionQueryMessage(state, message)) { + break; + } } - const filteredMessages = roleFilter === 'system' - ? messages - : removeLeadingSystemMessage(messages); + return buildSessionQueryScanResult(state); +} - let count = 0; - const snippets = []; +async function scanSessionContentForQuery(session, tokens, options = {}) { + if (!session || !Array.isArray(tokens) || tokens.length === 0) { + return { hit: false, count: 0, snippets: [] }; + } - for (const message of filteredMessages) { - if (roleFilter !== 'all' && message.role !== roleFilter) { - continue; - } - if (!matchTokensInText(message.text, tokens, mode)) { - continue; + const filePath = resolveSessionFilePath(session.source, session.filePath, session.sessionId); + if (!filePath) { + return { hit: false, count: 0, snippets: [] }; + } + + const rawMaxBytes = Number(options.maxBytes); + const maxBytes = Number.isFinite(rawMaxBytes) && rawMaxBytes > 0 + ? Math.max(1024, rawMaxBytes) + : 0; + const state = createSessionQueryScanState(tokens, options); + let stream; + let rl; + try { + stream = fs.createReadStream(filePath, { encoding: 'utf-8' }); + rl = readline.createInterface({ input: stream, crlfDelay: Infinity }); + + let bytesRead = 0; + for await (const line of rl) { + if (maxBytes > 0 && bytesRead >= maxBytes) { + break; + } + + bytesRead += Buffer.byteLength(line, 'utf-8') + 1; + const trimmed = line.trim(); + if (!trimmed) { + continue; + } + + let record; + try { + record = JSON.parse(trimmed); + } catch (e) { + continue; + } + + const message = extractMessageFromRecord(record, session.source); + if (!message) { + continue; + } + if (consumeSessionQueryMessage(state, message)) { + break; + } } - count += 1; - if (snippetLimit > 0 && snippets.length < snippetLimit) { - snippets.push(truncateText(message.text)); + return buildSessionQueryScanResult(state); + } catch (e) { + return scanSessionContentForQueryInRecords(readJsonlRecords(filePath), session.source, state); + } finally { + if (rl) { + try { rl.close(); } catch (e) {} } - if (count >= maxMatches) { - break; + if (stream && !stream.destroyed && stream.destroy) { + try { stream.destroy(); } catch (e) {} } } - - return { hit: count > 0, count, snippets }; } -function applySessionQueryFilter(sessions, options = {}) { +async function applySessionQueryFilter(sessions, options = {}) { const tokens = Array.isArray(options.tokens) ? options.tokens : []; if (tokens.length === 0) { return sessions; @@ -4848,7 +4924,7 @@ function applySessionQueryFilter(sessions, options = {}) { : DEFAULT_CONTENT_SCAN_LIMIT; const contentScanBytes = Number.isFinite(Number(options.contentScanBytes)) ? Math.max(1024, Number(options.contentScanBytes)) - : SESSION_CONTENT_READ_BYTES; + : 0; let scanned = 0; const results = []; @@ -4866,7 +4942,7 @@ function applySessionQueryFilter(sessions, options = {}) { const shouldScanContent = scope === 'content' || scope === 'all' || !summaryHit; if (shouldScanContent && scanned < contentScanLimit) { scanned += 1; - contentInfo = scanSessionContentForQuery(session, tokens, { + contentInfo = await scanSessionContentForQuery(session, tokens, { mode, roleFilter, maxBytes: contentScanBytes, @@ -5341,7 +5417,7 @@ function listClaudeSessions(limit, options = {}) { return mergeAndLimitSessions(sessions, limit); } -function listAllSessions(params = {}) { +async function listAllSessions(params = {}) { const source = params.source === 'codex' || params.source === 'claude' ? params.source : 'all'; @@ -5383,7 +5459,7 @@ function listAllSessions(params = {}) { let result = sessions; if (hasQuery) { - result = applySessionQueryFilter(result, { + result = await applySessionQueryFilter(result, { tokens: queryTokens, queryMode: params.queryMode, queryScope: params.queryScope, @@ -5419,7 +5495,7 @@ async function listAllSessionsData(params = {}) { } } - const sessions = listAllSessions(params); + const sessions = await listAllSessions(params); const hydratedSessions = await hydrateSessionItemsExactMessageCount(sessions); const result = hydratedSessions.map((item) => { if (!item || typeof item !== 'object' || Array.isArray(item)) { diff --git a/tests/e2e/test-session-search.js b/tests/e2e/test-session-search.js index 685c9ac..c2c49c4 100644 --- a/tests/e2e/test-session-search.js +++ b/tests/e2e/test-session-search.js @@ -22,7 +22,7 @@ async function fetchHtml(port) { } module.exports = async function testSessionSearch(ctx) { - const { api, sessionId, claudeSessionId, daudeSessionId } = ctx; + const { api, sessionId, claudeSessionId, daudeSessionId, lateKeywordSessionId, lateKeywordMessage } = ctx; // ========== Basic Query Tests ========== const claudeSearch = await api('list-sessions', { source: 'claude', query: 'claudecode', limit: 20, forceRefresh: true }); @@ -102,6 +102,30 @@ module.exports = async function testSessionSearch(ctx) { snippet => typeof snippet === 'string' && snippet.includes('222') ), '222 snippets missing numeric token'); + // ========== Late Content Query Tests ========== + const lateKeywordDetail = await api('session-detail', { + source: 'codex', + sessionId: lateKeywordSessionId + }); + assert(Array.isArray(lateKeywordDetail.messages), 'late keyword session detail missing messages'); + assert(lateKeywordDetail.messages.some( + message => message && typeof message.text === 'string' && message.text.includes(lateKeywordMessage) + ), 'late keyword session detail should expose the tail message'); + + const lateKeywordSearch = await api('list-sessions', { + source: 'codex', + query: '提示 通过', + queryScope: 'content', + limit: 20, + forceRefresh: true + }); + const lateKeywordHit = lateKeywordSearch.sessions.find(item => item.sessionId === lateKeywordSessionId); + assert(lateKeywordHit, 'late keyword query should find the tail-only session'); + assert(lateKeywordHit.match && lateKeywordHit.match.hit === true, 'late keyword query missing match metadata'); + assert(Array.isArray(lateKeywordHit.match.snippets) && lateKeywordHit.match.snippets.some( + snippet => typeof snippet === 'string' && snippet.includes('提示') && snippet.includes('通过') + ), 'late keyword query snippets should include the tail message'); + // ========== Pagination Tests ========== const paged = await api('list-sessions', { source: 'claude', diff --git a/tests/e2e/test-setup.js b/tests/e2e/test-setup.js index 0c46a9b..1c46e36 100644 --- a/tests/e2e/test-setup.js +++ b/tests/e2e/test-setup.js @@ -112,6 +112,34 @@ module.exports = async function testSetup(ctx) { ]; fs.writeFileSync(daudeSessionPath, daudeRecords.map(record => JSON.stringify(record)).join('\n') + '\n', 'utf-8'); + const lateKeywordSessionId = 'late-keyword-e2e-session'; + const lateKeywordSessionPath = path.join(sessionsDir, `${lateKeywordSessionId}.jsonl`); + const lateKeywordMessage = '这是后段命中的提示,测试已经通过。'; + const lateKeywordRecords = [ + { + type: 'session_meta', + payload: { id: lateKeywordSessionId, cwd: '/tmp/late-keyword' }, + timestamp: '2025-02-10T00:00:00.000Z' + } + ]; + for (let i = 0; i < 32; i++) { + lateKeywordRecords.push({ + type: 'response_item', + payload: { + type: 'message', + role: i % 2 === 0 ? 'user' : 'assistant', + content: `padding-${String(i).padStart(2, '0')}-${'x'.repeat(12 * 1024)}` + }, + timestamp: `2025-02-10T00:00:${String(i + 1).padStart(2, '0')}.000Z` + }); + } + lateKeywordRecords.push({ + type: 'response_item', + payload: { type: 'message', role: 'assistant', content: lateKeywordMessage }, + timestamp: '2025-02-10T00:01:00.000Z' + }); + fs.writeFileSync(lateKeywordSessionPath, lateKeywordRecords.map(record => JSON.stringify(record)).join('\n') + '\n', 'utf-8'); + const claudeProjectsDir = path.join(tmpHome, '.claude', 'projects'); const claudeProjectDir = path.join(claudeProjectsDir, 'e2e-project'); fs.mkdirSync(claudeProjectDir, { recursive: true }); @@ -155,6 +183,9 @@ module.exports = async function testSetup(ctx) { sessionPath, daudeSessionId, daudeSessionPath, + lateKeywordSessionId, + lateKeywordSessionPath, + lateKeywordMessage, claudeSessionId, claudeSessionPath, noModelsUrl,