6
Copy data to postgresql
Guillem Borrell Nogueras edited this page 2023-01-17 19:13:26 +01:00
$ head -n 100 /data/raw/data.csv | dr schema -i -p -n customer_base_data
CREATE TABLE IF NOT EXISTS "customer_base_data" ( );
ALTER TABLE "customer_base_data" ADD COLUMN "SWO_Region" varchar(128);
ALTER TABLE "customer_base_data" ADD COLUMN "CompanyID" integer;
ALTER TABLE "customer_base_data" ADD COLUMN "CDG_Code" varchar(128);
ALTER TABLE "customer_base_data" ADD COLUMN "CDG_Name" varchar(128);
ALTER TABLE "customer_base_data" ADD COLUMN "SelltoCustomerNo" varchar(128);
ALTER TABLE "customer_base_data" ADD COLUMN "Column1" varchar(128);
ALTER TABLE "customer_base_data" ADD COLUMN "ContactNo" varchar(128);
ALTER TABLE "customer_base_data" ADD COLUMN "CustomerName" varchar(128);
ALTER TABLE "customer_base_data" ADD COLUMN "Customer Region" varchar(128);
ALTER TABLE "customer_base_data" ADD COLUMN "Customer Country" varchar(128);
ALTER TABLE "customer_base_data" ADD COLUMN "Customer ProfitCenter" varchar(128);
ALTER TABLE "customer_base_data" ADD COLUMN "AccountManager" varchar(128);
ALTER TABLE "customer_base_data" ADD COLUMN "First_Inv_Date" varchar(128);
ALTER TABLE "customer_base_data" ADD COLUMN "Last_Inv_Date" varchar(128);
ALTER TABLE "customer_base_data" ADD COLUMN "Count_Inv" integer;
ALTER TABLE "customer_base_data" ADD COLUMN "2020" real;
ALTER TABLE "customer_base_data" ADD COLUMN "2021" real;
ALTER TABLE "customer_base_data" ADD COLUMN "2022" real;
We can pipe that with
head -n 100 data.csv | iconv -c -f ASCII -t UTF-8 | dr csv -i | dr schema -p -n customer_base_data | psql -U postgres -h localhost
Then we want to create a new file striping the header
tail -n +2 data.csv | iconv -c -f ASCII -t UTF-8 | psql -U postgres -h localhost -c "\copy customer_base_data from stdin with (FORMAT 'csv', DELIMITER ',', QUOTE '\"')"
To change the semicolon separator with comma:
sed --expression='s/;/,/g'
You may also need to alter some columns
psql -U postgres -h localhost -c 'ALTER TABLE "opportunities" ALTER COLUMN "ProductName" type varchar(1024);'
dr
can also be used to get and process data from postgres
psql -U postgres -h localhost -c 'copy (select * from opportunities limit 10) to stdout (FORMAT 'csv', HEADER)' | dr csv -i -a
And even convert that output to parquet
psql -U postgres -h localhost -c 'copy (select * from opportunities limit 10) to stdout (FORMAT 'csv', HEADER)' | dr csv -i -P opportunities.pq