Update 'Copy data to postgresql'

Guillem Borrell Nogueras 2023-01-17 18:59:28 +01:00
parent c9ddf392a7
commit f55032bae0

@ -1,5 +1,5 @@
``` ```
$ head -n 100 /data/raw/data.csv | iconv -c -f ASCII -t UTF-8 | target/debug/dr csv -i | target/debug/dr schema -p -n customer_base_data $ head -n 100 /data/raw/data.csv | iconv -c -f ASCII -t UTF-8 | dr csv -i | dr schema -p -n customer_base_data
CREATE TABLE IF NOT EXISTS "customer_base_data" ( ); CREATE TABLE IF NOT EXISTS "customer_base_data" ( );
ALTER TABLE "customer_base_data" ADD COLUMN "SWO_Region" varchar(128); ALTER TABLE "customer_base_data" ADD COLUMN "SWO_Region" varchar(128);
@ -25,13 +25,13 @@ ALTER TABLE "customer_base_data" ADD COLUMN "2022" real;
We can pipe that with We can pipe that with
``` ```
head -n 100 /data/raw/BCG\ Report\ 01\ Customer\ Base\ Data\ -\ 2023-01-16.csv | iconv -c -f ASCII -t UTF-8 | target/debug/dr csv -i | target/debug/dr schema -p -n customer_base_data | psql -U postgres -h localhost head -n 100 data.csv | iconv -c -f ASCII -t UTF-8 | dr csv -i | dr schema -p -n customer_base_data | psql -U postgres -h localhost
``` ```
Then we want to create a new file striping the header Then we want to create a new file striping the header
``` ```
tail -n +2 /data/raw/data.csv | iconv -c -f ASCII -t UTF-8 | psql -U postgres -h localhost -c "\copy customer_base_data from stdin with (FORMAT 'csv', DELIMITER ',', QUOTE '\"')" tail -n +2 data.csv | iconv -c -f ASCII -t UTF-8 | psql -U postgres -h localhost -c "\copy customer_base_data from stdin with (FORMAT 'csv', DELIMITER ',', QUOTE '\"')"
``` ```
To change the semicolon separator with comma: To change the semicolon separator with comma:
@ -50,3 +50,9 @@ psql -U postgres -h localhost -c 'ALTER TABLE "opportunities" ALTER COLUMN "Prod
``` ```
psql -U postgres -h localhost -c 'copy (select * from opportunities limit 10) to stdout (FORMAT 'csv', HEADER)' | dr csv -i -a psql -U postgres -h localhost -c 'copy (select * from opportunities limit 10) to stdout (FORMAT 'csv', HEADER)' | dr csv -i -a
``` ```
And even convert that output to parquet
```
psql -U postgres -h localhost -c 'copy (select * from opportunities limit 10) to stdout (FORMAT 'csv', HEADER)' | dr csv -i -P opportunities.pq
```