From a660d22f85e1f290515067746bf862a39a076744 Mon Sep 17 00:00:00 2001 From: Norman Koch Date: Fri, 8 Sep 2023 09:35:57 +0200 Subject: [PATCH 01/11] fixed typo in error msg, return parseInt for number, allow newlines after number --- bibtexParse.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bibtexParse.js b/bibtexParse.js index 4bac1ac..6dd6497 100644 --- a/bibtexParse.js +++ b/bibtexParse.js @@ -173,12 +173,12 @@ return this.value_quotes(); } else { var k = this.key(); - if (k.match("^[0-9]+$")) - return k; + if (k.match("^[0-9]+\n*$")) + return parseInt(k); else if (this.months.indexOf(k.toLowerCase()) >= 0) return k.toLowerCase(); else - throw "Value expected: single_value" + this.input.substring(start) + ' for key: ' + k; + throw "Value expected: single_value " + this.input.substring(start) + ' for key: ' + k; }; }; From 290e8a9b72f9bc46ce6ac0f97f6aaba0faeeba2a Mon Sep 17 00:00:00 2001 From: Norman Koch Date: Fri, 8 Sep 2023 09:50:14 +0200 Subject: [PATCH 02/11] keep value types --- bibtexParse.js | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/bibtexParse.js b/bibtexParse.js index 6dd6497..3342189 100644 --- a/bibtexParse.js +++ b/bibtexParse.js @@ -190,7 +190,11 @@ this.match("#"); values.push(this.single_value()); }; - return values.join(""); + if(values.length == 1) { + return values[0]; + } else { + return values.join(""); + } }; this.key = function(optional) { From 604424093b8feda0457b04ab34ba976cc51eb2a7 Mon Sep 17 00:00:00 2001 From: Norman Koch Date: Fri, 8 Sep 2023 10:56:55 +0200 Subject: [PATCH 03/11] made parsing more resilient --- bibtexParse.js | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/bibtexParse.js b/bibtexParse.js index 3342189..04eaf7a 100644 --- a/bibtexParse.js +++ b/bibtexParse.js @@ -315,6 +315,70 @@ exports.toJSON = function(bibtex) { var b = new BibtexParser(); + function cleanBibtex(bibtex_code) { + // searches for lines that should begin with @, but dont, and add @ for resiliency + var at_regex = /(\s+|^)(\w+\{)/ig; + bibtex_code = bibtex_code.replace(at_regex, (match) => `${match[1].replace(/[\n\r\R]*/, "")}\n@${match.replace(/[\n\r\R]*/, "")}`); + + // remove spaces before @ at beginning of line + var space_regex = /^\s*@/g; + bibtex_code = bibtex_code.replace(space_regex, "@"); + + var search_missing_commas_regex_stage_one = /(.*=.*)/g; + bibtex_code = bibtex_code.replace(search_missing_commas_regex_stage_one, "$1,"); + + var search_missing_commas_regex_stage_two = /(,\s*)+/g; + bibtex_code = bibtex_code.replace(search_missing_commas_regex_stage_two, ",\n"); + + var find_unfinished_double_quotes = /(=\s*"[^"]*?),+/g; + bibtex_code = bibtex_code.replace(find_unfinished_double_quotes, `$1",`); + + var find_unfinished_bracket_quotes = /(=\s*\{[^\}]*?),+/g; + bibtex_code = bibtex_code.replace(find_unfinished_bracket_quotes, `$1},`); + + var find_missing_quotation = /(.*=)([^\{"'].*?[^\}"']),/g; + bibtex_code = bibtex_code.replace(find_missing_quotation, `$1{$2},`); + + var find_missing_bracket_quote_start = /(.*=)([^\{].*?\}),/g; + bibtex_code = bibtex_code.replace(find_missing_bracket_quote_start, `$1{$2,`); + + var find_missing_article_type = /^\s*(?<=@)([a-zA-Z0-9_]*)/g; + if(bibtex_code.match(find_missing_article_type)) { + bibtex_code = bibtex_code.replace(find_missing_article_type, `@article{$1`); + console.log("Found no article type, added @article"); + } + + var lines = bibtex_code.split(/[\n\r]/); + + function line_could_be_valid_syntax (line) { + if(line.match("^\s*@")) { + return true; + } else if(line.match("=")) { + return true; + } else if(line.match("}")) { + return true; + } else if(line.match(/^\s*$/)) { + return true; + } else { + return false; + } + } + + var new_lines = []; + + for (var i = 0; i < lines.length; i++) { + if(line_could_be_valid_syntax(lines[i])) { + new_lines.push(lines[i]); + } else { + console.warn("The line " + lines[i] + " does not seem to contain a valid bibtex line"); + } + } + + return new_lines.join("\n"); + } + + bibtex = cleanBibtex(bibtex); + b.setInput(bibtex); b.bibtex(); return b.entries; From ac044b94d17ef5379bb31e91ac10f4c51d4e70b8 Mon Sep 17 00:00:00 2001 From: Norman Koch Date: Fri, 8 Sep 2023 11:02:12 +0200 Subject: [PATCH 04/11] example_site --- example_site/index.html | 157 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 157 insertions(+) create mode 100644 example_site/index.html diff --git a/example_site/index.html b/example_site/index.html new file mode 100644 index 0000000..18cd43f --- /dev/null +++ b/example_site/index.html @@ -0,0 +1,157 @@ + + + + BibTeX Parser + + + + +

BibTeX to JSON Parser

+ + + + + +

+    
+ + + + + From 9bd904bb8dbb61aff0cbada227c9e6f54a3d95f4 Mon Sep 17 00:00:00 2001 From: Norman Koch Date: Fri, 8 Sep 2023 11:09:17 +0200 Subject: [PATCH 05/11] fixed return value --- example_site/index.html | 41 ++++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/example_site/index.html b/example_site/index.html index 18cd43f..fd0f882 100644 --- a/example_site/index.html +++ b/example_site/index.html @@ -132,25 +132,28 @@

BibTeX to JSON Parser

From 0bc8310169b5bc747a72a495697ea4c366bcd164 Mon Sep 17 00:00:00 2001 From: Norman Koch Date: Fri, 8 Sep 2023 11:20:55 +0200 Subject: [PATCH 06/11] toBibtex --- example_site/index.html | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/example_site/index.html b/example_site/index.html index fd0f882..e5853cc 100644 --- a/example_site/index.html +++ b/example_site/index.html @@ -128,7 +128,10 @@

BibTeX to JSON Parser

+

JSON


+    

BibTeX

+