6 Copy data to postgresql
Guillem Borrell Nogueras edited this page 1 year ago
$ head -n 100 /data/raw/data.csv | dr schema -i -p -n customer_base_data

CREATE TABLE IF NOT EXISTS "customer_base_data" (  );
ALTER TABLE "customer_base_data" ADD COLUMN "SWO_Region" varchar(128);
ALTER TABLE "customer_base_data" ADD COLUMN "CompanyID" integer;
ALTER TABLE "customer_base_data" ADD COLUMN "CDG_Code" varchar(128);
ALTER TABLE "customer_base_data" ADD COLUMN "CDG_Name" varchar(128);
ALTER TABLE "customer_base_data" ADD COLUMN "SelltoCustomerNo" varchar(128);
ALTER TABLE "customer_base_data" ADD COLUMN "Column1" varchar(128);
ALTER TABLE "customer_base_data" ADD COLUMN "ContactNo" varchar(128);
ALTER TABLE "customer_base_data" ADD COLUMN "CustomerName" varchar(128);
ALTER TABLE "customer_base_data" ADD COLUMN "Customer Region" varchar(128);
ALTER TABLE "customer_base_data" ADD COLUMN "Customer Country" varchar(128);
ALTER TABLE "customer_base_data" ADD COLUMN "Customer ProfitCenter" varchar(128);
ALTER TABLE "customer_base_data" ADD COLUMN "AccountManager" varchar(128);
ALTER TABLE "customer_base_data" ADD COLUMN "First_Inv_Date" varchar(128);
ALTER TABLE "customer_base_data" ADD COLUMN "Last_Inv_Date" varchar(128);
ALTER TABLE "customer_base_data" ADD COLUMN "Count_Inv" integer;
ALTER TABLE "customer_base_data" ADD COLUMN "2020" real;
ALTER TABLE "customer_base_data" ADD COLUMN "2021" real;
ALTER TABLE "customer_base_data" ADD COLUMN "2022" real;

We can pipe that with

head -n 100 data.csv | iconv -c -f ASCII -t UTF-8 | dr csv -i | dr schema -p -n customer_base_data | psql -U postgres -h localhost

Then we want to create a new file striping the header

tail -n +2 data.csv | iconv -c -f ASCII -t UTF-8 | psql -U postgres -h localhost -c "\copy customer_base_data from stdin with (FORMAT 'csv', DELIMITER ',', QUOTE '\"')"

To change the semicolon separator with comma:

sed --expression='s/;/,/g'

You may also need to alter some columns

psql -U postgres -h localhost -c 'ALTER TABLE "opportunities" ALTER COLUMN "ProductName" type varchar(1024);' 

dr can also be used to get and process data from postgres

psql -U postgres -h localhost -c 'copy (select * from opportunities limit 10) to stdout (FORMAT 'csv', HEADER)' | dr csv -i -a

And even convert that output to parquet

psql -U postgres -h localhost -c 'copy (select * from opportunities limit 10) to stdout (FORMAT 'csv', HEADER)' | dr csv -i -P opportunities.pq