diff --git a/build_titles.malloysql b/build_titles.malloysql index aa62b29..57bb12c 100644 --- a/build_titles.malloysql +++ b/build_titles.malloysql @@ -37,7 +37,7 @@ copy ( regexp_replace(characters,'[\[\]\"]','','g'), ',') as characters FROM read_csv_auto('data/title.principals.tsv.gz', delim='\t', quote='',header=True) as p - INNER JOIN 'titles.parquet' as t ON t.tconst = p.tconst + INNER JOIN 'data/titles.parquet' as t ON t.tconst = p.tconst ) to 'data/principals.parquet' (FORMAT 'parquet', CODEC 'ZSTD') >>>sql connection:duckdb @@ -49,6 +49,6 @@ copy ( case WHEN regexp_matches(birthYear,'[0-9]+') THEN CAST(birthYear as integer) END as birthYear, case WHEN regexp_matches(deathYear,'[0-9]+') THEN CAST(deathYear as integer) END as deathYear, FROM read_csv_auto('data/name.basics.tsv.gz', delim='\t', quote='',header=True, all_varchar=true) as n - INNER JOIN 'principals.parquet' as p ON n.nconst = p.nconst + INNER JOIN 'data/principals.parquet' as p ON n.nconst = p.nconst GROUP BY 1,2,3,4,5 ) to 'data/names.parquet' (FORMAT 'parquet', CODEC 'ZSTD')