From df58640a9b32ade16cbbec44416b6d11c572a16a Mon Sep 17 00:00:00 2001 From: Dan Lynch Date: Wed, 8 Apr 2026 01:06:25 +0000 Subject: [PATCH 1/2] test: add failing tests for multi-line token JSON serialization bug These tests demonstrate that scanSync throws 'Bad control character in string literal' when scanning SQL with multi-line tokens (dollar-quoted function bodies, tabs, multi-line C-style comments). The root cause is that build_scan_json() in wasm_wrapper.c only escapes '"' and '\\' in token text, but not '\n', '\r', '\t'. When token text contains literal newlines, the JSON output has unescaped control chars that break JSON.parse. These tests are expected to FAIL on this branch (no fix applied). See PR #147 for the fix. --- full/test/scan.test.js | 46 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/full/test/scan.test.js b/full/test/scan.test.js index 7f86b95..ac965e4 100644 --- a/full/test/scan.test.js +++ b/full/test/scan.test.js @@ -225,5 +225,49 @@ describe("Query Scanning", () => { assert.equal(typeof result1.version, "number"); assert.ok(result1.version > 0); }); + + it("should handle multi-line dollar-quoted strings without JSON errors", () => { + // This tests that the JSON serialization properly escapes control + // characters (newlines, tabs) inside token text fields. + // Without a fix, scanSync throws: "Bad control character in string literal" + const sql = `CREATE FUNCTION test() RETURNS void AS $$ +BEGIN + RAISE NOTICE 'hello'; +END; +$$ LANGUAGE plpgsql`; + + const result = query.scanSync(sql); + assert.equal(typeof result, "object"); + assert.ok(Array.isArray(result.tokens)); + assert.ok(result.tokens.length > 0); + + // Find the dollar-quoted string token + const dollarToken = result.tokens.find(t => t.text.includes('BEGIN')); + assert.ok(dollarToken, "should have a token containing the function body"); + assert.ok(dollarToken.text.includes('\n'), "token text should contain newlines"); + }); + + it("should handle multi-line tokens with tabs", () => { + const sql = "SELECT $$line1\n\tindented\nline3$$"; + + const result = query.scanSync(sql); + assert.equal(typeof result, "object"); + assert.ok(Array.isArray(result.tokens)); + + const dollarToken = result.tokens.find(t => t.text.includes('indented')); + assert.ok(dollarToken, "should have a token containing the tabbed content"); + }); + + it("should handle multi-line SQL comments", () => { + const sql = "SELECT 1; /* multi\nline\ncomment */ SELECT 2"; + + const result = query.scanSync(sql); + assert.equal(typeof result, "object"); + assert.ok(Array.isArray(result.tokens)); + + const commentToken = result.tokens.find(t => t.tokenName === "C_COMMENT"); + assert.ok(commentToken, "should have a C_COMMENT token"); + assert.ok(commentToken.text.includes('\n'), "comment text should contain newlines"); + }); }); -}); \ No newline at end of file +}); From 8948e2bc01e4a7415257d5400f70c22e411f1cc5 Mon Sep 17 00:00:00 2001 From: Dan Lynch Date: Wed, 8 Apr 2026 01:33:15 +0000 Subject: [PATCH 2/2] test: use template literals with actual newlines for readability --- full/test/scan.test.js | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/full/test/scan.test.js b/full/test/scan.test.js index ac965e4..24d7b4c 100644 --- a/full/test/scan.test.js +++ b/full/test/scan.test.js @@ -227,47 +227,53 @@ describe("Query Scanning", () => { }); it("should handle multi-line dollar-quoted strings without JSON errors", () => { - // This tests that the JSON serialization properly escapes control - // characters (newlines, tabs) inside token text fields. - // Without a fix, scanSync throws: "Bad control character in string literal" + // Without the fix, scanSync throws: + // "Bad control character in string literal" + // because build_scan_json() doesn't escape \n in the token text. const sql = `CREATE FUNCTION test() RETURNS void AS $$ BEGIN RAISE NOTICE 'hello'; END; $$ LANGUAGE plpgsql`; - + const result = query.scanSync(sql); assert.equal(typeof result, "object"); assert.ok(Array.isArray(result.tokens)); assert.ok(result.tokens.length > 0); - - // Find the dollar-quoted string token + + // The dollar-quoted body spans multiple lines const dollarToken = result.tokens.find(t => t.text.includes('BEGIN')); assert.ok(dollarToken, "should have a token containing the function body"); - assert.ok(dollarToken.text.includes('\n'), "token text should contain newlines"); + assert.ok(dollarToken.text.includes('\n'), "token text should preserve newlines"); }); - it("should handle multi-line tokens with tabs", () => { - const sql = "SELECT $$line1\n\tindented\nline3$$"; - + it("should handle dollar-quoted tokens with tabs", () => { + // Tab characters also break JSON.parse when unescaped. + const sql = `SELECT $$line1 + indented +line3$$`; + const result = query.scanSync(sql); assert.equal(typeof result, "object"); assert.ok(Array.isArray(result.tokens)); - + const dollarToken = result.tokens.find(t => t.text.includes('indented')); assert.ok(dollarToken, "should have a token containing the tabbed content"); }); - it("should handle multi-line SQL comments", () => { - const sql = "SELECT 1; /* multi\nline\ncomment */ SELECT 2"; - + it("should handle multi-line block comments", () => { + // C-style block comments spanning multiple lines hit the same bug. + const sql = `SELECT 1; /* multi +line +comment */ SELECT 2`; + const result = query.scanSync(sql); assert.equal(typeof result, "object"); assert.ok(Array.isArray(result.tokens)); - + const commentToken = result.tokens.find(t => t.tokenName === "C_COMMENT"); assert.ok(commentToken, "should have a C_COMMENT token"); - assert.ok(commentToken.text.includes('\n'), "comment text should contain newlines"); + assert.ok(commentToken.text.includes('\n'), "comment text should preserve newlines"); }); }); });