From cfcc11a99271d1c872973aedc61d1b1955496424 Mon Sep 17 00:00:00 2001 From: Fernando Arreola Date: Tue, 20 Jun 2023 18:32:37 +0000 Subject: [PATCH 1/2] Fix build_titles. --- build_titles.malloysql | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/build_titles.malloysql b/build_titles.malloysql index aa62b29..bd4b4b9 100644 --- a/build_titles.malloysql +++ b/build_titles.malloysql @@ -37,7 +37,7 @@ copy ( regexp_replace(characters,'[\[\]\"]','','g'), ',') as characters FROM read_csv_auto('data/title.principals.tsv.gz', delim='\t', quote='',header=True) as p - INNER JOIN 'titles.parquet' as t ON t.tconst = p.tconst + INNER JOIN 'data/titles.parquet' as t ON t.tconst = p.tconst ) to 'data/principals.parquet' (FORMAT 'parquet', CODEC 'ZSTD') >>>sql connection:duckdb @@ -49,6 +49,6 @@ copy ( case WHEN regexp_matches(birthYear,'[0-9]+') THEN CAST(birthYear as integer) END as birthYear, case WHEN regexp_matches(deathYear,'[0-9]+') THEN CAST(deathYear as integer) END as deathYear, FROM read_csv_auto('data/name.basics.tsv.gz', delim='\t', quote='',header=True, all_varchar=true) as n - INNER JOIN 'principals.parquet' as p ON n.nconst = p.nconst + INNER JOIN 'data/principals.parquet' as p ON n.nconst = p.nconst GROUP BY 1,2,3,4,5 -) to 'data/names.parquet' (FORMAT 'parquet', CODEC 'ZSTD') +) to 'data/names.parquet' (FORMAT 'parquet', CODEC 'ZSTD') \ No newline at end of file From 64d85800b904879f68ee6637e3943b916f3b6f17 Mon Sep 17 00:00:00 2001 From: Fernando Arreola Date: Tue, 20 Jun 2023 18:42:21 +0000 Subject: [PATCH 2/2] New line. --- build_titles.malloysql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build_titles.malloysql b/build_titles.malloysql index bd4b4b9..57bb12c 100644 --- a/build_titles.malloysql +++ b/build_titles.malloysql @@ -51,4 +51,4 @@ copy ( FROM read_csv_auto('data/name.basics.tsv.gz', delim='\t', quote='',header=True, all_varchar=true) as n INNER JOIN 'data/principals.parquet' as p ON n.nconst = p.nconst GROUP BY 1,2,3,4,5 -) to 'data/names.parquet' (FORMAT 'parquet', CODEC 'ZSTD') \ No newline at end of file +) to 'data/names.parquet' (FORMAT 'parquet', CODEC 'ZSTD')