From f55032bae0063662637f46b8c0d27fc4177b36fb Mon Sep 17 00:00:00 2001 From: Guillem Borrell Nogueras Date: Tue, 17 Jan 2023 18:59:28 +0100 Subject: [PATCH] Update 'Copy data to postgresql' --- Copy-data-to-postgresql.md | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/Copy-data-to-postgresql.md b/Copy-data-to-postgresql.md index 25f084f..0d66fe7 100644 --- a/Copy-data-to-postgresql.md +++ b/Copy-data-to-postgresql.md @@ -1,5 +1,5 @@ ``` -$ head -n 100 /data/raw/data.csv | iconv -c -f ASCII -t UTF-8 | target/debug/dr csv -i | target/debug/dr schema -p -n customer_base_data +$ head -n 100 /data/raw/data.csv | iconv -c -f ASCII -t UTF-8 | dr csv -i | dr schema -p -n customer_base_data CREATE TABLE IF NOT EXISTS "customer_base_data" ( ); ALTER TABLE "customer_base_data" ADD COLUMN "SWO_Region" varchar(128); @@ -25,13 +25,13 @@ ALTER TABLE "customer_base_data" ADD COLUMN "2022" real; We can pipe that with ``` -head -n 100 /data/raw/BCG\ Report\ 01\ Customer\ Base\ Data\ -\ 2023-01-16.csv | iconv -c -f ASCII -t UTF-8 | target/debug/dr csv -i | target/debug/dr schema -p -n customer_base_data | psql -U postgres -h localhost +head -n 100 data.csv | iconv -c -f ASCII -t UTF-8 | dr csv -i | dr schema -p -n customer_base_data | psql -U postgres -h localhost ``` Then we want to create a new file striping the header ``` -tail -n +2 /data/raw/data.csv | iconv -c -f ASCII -t UTF-8 | psql -U postgres -h localhost -c "\copy customer_base_data from stdin with (FORMAT 'csv', DELIMITER ',', QUOTE '\"')" +tail -n +2 data.csv | iconv -c -f ASCII -t UTF-8 | psql -U postgres -h localhost -c "\copy customer_base_data from stdin with (FORMAT 'csv', DELIMITER ',', QUOTE '\"')" ``` To change the semicolon separator with comma: @@ -50,3 +50,9 @@ psql -U postgres -h localhost -c 'ALTER TABLE "opportunities" ALTER COLUMN "Prod ``` psql -U postgres -h localhost -c 'copy (select * from opportunities limit 10) to stdout (FORMAT 'csv', HEADER)' | dr csv -i -a ``` + +And even convert that output to parquet + +``` +psql -U postgres -h localhost -c 'copy (select * from opportunities limit 10) to stdout (FORMAT 'csv', HEADER)' | dr csv -i -P opportunities.pq +```