From a0a1f0e178790142c4acb43e0b938d0d407efbbf Mon Sep 17 00:00:00 2001 From: Dan Lynch Date: Wed, 8 Apr 2026 00:58:07 +0000 Subject: [PATCH 1/2] fix: escape control characters in build_scan_json() for multi-line tokens The JSON escape loop in build_scan_json() only handled '"' and '\', but not '\n', '\r', '\t'. When token text contains literal newlines (e.g., dollar-quoted function bodies like $$\nBEGIN\n...$$), the raw JSON string had unescaped control characters, causing JSON.parse to throw 'Bad control character in string literal'. This adds proper escape sequences for \n, \r, and \t in the token text escaping loop, matching standard JSON string escaping rules. New tests added for multi-line dollar-quoted strings, tabs, and multi-line C-style comments. --- full/src/wasm_wrapper.c | 13 +++++++++++- full/test/scan.test.js | 45 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 56 insertions(+), 2 deletions(-) diff --git a/full/src/wasm_wrapper.c b/full/src/wasm_wrapper.c index ee3d5f8..282c8f6 100644 --- a/full/src/wasm_wrapper.c +++ b/full/src/wasm_wrapper.c @@ -379,8 +379,19 @@ static char* build_scan_json(PgQuery__ScanResult *scan_result, const char* origi char c = token_text[j]; if (c == '"' || c == '\\') { escaped_text[escaped_pos++] = '\\'; + escaped_text[escaped_pos++] = c; + } else if (c == '\n') { + escaped_text[escaped_pos++] = '\\'; + escaped_text[escaped_pos++] = 'n'; + } else if (c == '\r') { + escaped_text[escaped_pos++] = '\\'; + escaped_text[escaped_pos++] = 'r'; + } else if (c == '\t') { + escaped_text[escaped_pos++] = '\\'; + escaped_text[escaped_pos++] = 't'; + } else { + escaped_text[escaped_pos++] = c; } - escaped_text[escaped_pos++] = c; } escaped_text[escaped_pos] = '\0'; diff --git a/full/test/scan.test.js b/full/test/scan.test.js index 7f86b95..ad15f45 100644 --- a/full/test/scan.test.js +++ b/full/test/scan.test.js @@ -225,5 +225,48 @@ describe("Query Scanning", () => { assert.equal(typeof result1.version, "number"); assert.ok(result1.version > 0); }); + + it("should handle multi-line dollar-quoted strings without JSON errors", () => { + // This tests that the JSON serialization properly escapes control + // characters (newlines, tabs) inside token text fields. + const sql = `CREATE FUNCTION test() RETURNS void AS $$ +BEGIN + RAISE NOTICE 'hello'; +END; +$$ LANGUAGE plpgsql`; + + const result = query.scanSync(sql); + assert.equal(typeof result, "object"); + assert.ok(Array.isArray(result.tokens)); + assert.ok(result.tokens.length > 0); + + // Find the dollar-quoted string token + const dollarToken = result.tokens.find(t => t.text.includes('BEGIN')); + assert.ok(dollarToken, "should have a token containing the function body"); + assert.ok(dollarToken.text.includes('\n'), "token text should contain newlines"); + }); + + it("should handle multi-line tokens with tabs", () => { + const sql = "SELECT $$line1\n\tindented\nline3$$"; + + const result = query.scanSync(sql); + assert.equal(typeof result, "object"); + assert.ok(Array.isArray(result.tokens)); + + const dollarToken = result.tokens.find(t => t.text.includes('indented')); + assert.ok(dollarToken, "should have a token containing the tabbed content"); + }); + + it("should handle multi-line SQL comments", () => { + const sql = "SELECT 1; /* multi\nline\ncomment */ SELECT 2"; + + const result = query.scanSync(sql); + assert.equal(typeof result, "object"); + assert.ok(Array.isArray(result.tokens)); + + const commentToken = result.tokens.find(t => t.tokenName === "C_COMMENT"); + assert.ok(commentToken, "should have a C_COMMENT token"); + assert.ok(commentToken.text.includes('\n'), "comment text should contain newlines"); + }); }); -}); \ No newline at end of file +}); From c09d3b8cb4433d32f826a5e5c8ec977d8c37104a Mon Sep 17 00:00:00 2001 From: Dan Lynch Date: Wed, 8 Apr 2026 01:34:10 +0000 Subject: [PATCH 2/2] test: use template literals with actual newlines for readability --- full/test/scan.test.js | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/full/test/scan.test.js b/full/test/scan.test.js index ad15f45..24d7b4c 100644 --- a/full/test/scan.test.js +++ b/full/test/scan.test.js @@ -227,46 +227,53 @@ describe("Query Scanning", () => { }); it("should handle multi-line dollar-quoted strings without JSON errors", () => { - // This tests that the JSON serialization properly escapes control - // characters (newlines, tabs) inside token text fields. + // Without the fix, scanSync throws: + // "Bad control character in string literal" + // because build_scan_json() doesn't escape \n in the token text. const sql = `CREATE FUNCTION test() RETURNS void AS $$ BEGIN RAISE NOTICE 'hello'; END; $$ LANGUAGE plpgsql`; - + const result = query.scanSync(sql); assert.equal(typeof result, "object"); assert.ok(Array.isArray(result.tokens)); assert.ok(result.tokens.length > 0); - - // Find the dollar-quoted string token + + // The dollar-quoted body spans multiple lines const dollarToken = result.tokens.find(t => t.text.includes('BEGIN')); assert.ok(dollarToken, "should have a token containing the function body"); - assert.ok(dollarToken.text.includes('\n'), "token text should contain newlines"); + assert.ok(dollarToken.text.includes('\n'), "token text should preserve newlines"); }); - it("should handle multi-line tokens with tabs", () => { - const sql = "SELECT $$line1\n\tindented\nline3$$"; - + it("should handle dollar-quoted tokens with tabs", () => { + // Tab characters also break JSON.parse when unescaped. + const sql = `SELECT $$line1 + indented +line3$$`; + const result = query.scanSync(sql); assert.equal(typeof result, "object"); assert.ok(Array.isArray(result.tokens)); - + const dollarToken = result.tokens.find(t => t.text.includes('indented')); assert.ok(dollarToken, "should have a token containing the tabbed content"); }); - it("should handle multi-line SQL comments", () => { - const sql = "SELECT 1; /* multi\nline\ncomment */ SELECT 2"; - + it("should handle multi-line block comments", () => { + // C-style block comments spanning multiple lines hit the same bug. + const sql = `SELECT 1; /* multi +line +comment */ SELECT 2`; + const result = query.scanSync(sql); assert.equal(typeof result, "object"); assert.ok(Array.isArray(result.tokens)); - + const commentToken = result.tokens.find(t => t.tokenName === "C_COMMENT"); assert.ok(commentToken, "should have a C_COMMENT token"); - assert.ok(commentToken.text.includes('\n'), "comment text should contain newlines"); + assert.ok(commentToken.text.includes('\n'), "comment text should preserve newlines"); }); }); });