Skip to content

Instantly share code, notes, and snippets.

@zhangxu
Last active October 29, 2015 18:13
Show Gist options
  • Save zhangxu/493eef030d7f06fa17a5 to your computer and use it in GitHub Desktop.
Save zhangxu/493eef030d7f06fa17a5 to your computer and use it in GitHub Desktop.
Copy from CSV File to Table
#!/bin/bash
zipfile=$1
output_folder=/tmp/`echo $RANDOM`
sql="$output_folder"/copy.sql
test ! -d $output_folder || rm -rf $output_folder
unzip -q $zipfile "*DailyData.csv" -d $output_folder && \
echo "truncate table raw_ag_data;" > $sql && \
find $output_folder -name "*.csv" -type f -print0 | xargs -0 -I {} echo \
"\\copy raw_ag_data(farm_name,ranch_name,field_name,zone_name,date,temp_high,temp_low,temp_soil,wind_speed,avg_humidity,precipitation,cimis_et0,cimis_etc,observed_etc,degree_days,cum_degree_days,water_efficiency,quality_spread,wind_run,chillhours_45f,chillhours_32f) from '{}' csv header" >> $sql && \
cat - << EOF >> $sql
with zs as (select f.name as f_name, r.name as r_name, p.name as p_name, z.name as z_name, z.id as z_id, z.parcel_id as p_id from
farms f left outer join ranches r on r.farm_id = f.id
left outer join parcels p on p.ranch_id = r.id
left outer join zones z on z.parcel_id = p.id)
update raw_ag_data a set zone_id = zs.z_id, parcel_id = zs.p_id, date = date + interval '8 hours'
from zs
where a.farm_name = zs.f_name and a.ranch_name = zs.r_name and a.field_name = zs.p_name and a.zone_name = zs.z_name;
EOF
PGPASSWORD="ERYCd-hf3479-DpiEQ" psql -h agls-r1.cl3ztejxmrbi.us-west-1.rds.amazonaws.com -p 5432 -U agls -d demo < $sql
rm -rf $output_folder
create table if not exists raw_ag_data (
farm_name varchar(255),
ranch_name varchar(255),
field_name varchar(255),
zone_name varchar(255),
date timestamp without time zone,
temp_high double precision ,
temp_low double precision ,
temp_soil double precision ,
wind_speed double precision ,
avg_humidity double precision ,
precipitation double precision ,
cimis_et0 double precision ,
cimis_etc double precision ,
observed_etc double precision ,
degree_days double precision ,
cum_degree_days double precision ,
water_efficiency double precision ,
quality_spread double precision ,
wind_run double precision ,
chillhours_45f double precision ,
chillhours_32f double precision ,
zone_id integer,
parcel_id integer
);
create table if not exists raw_zones(
farm_name varchar(255),
ranch_name varchar(255),
field_name varchar(255),
zone_name varchar(255),
acreage double precision,
soil_type varchar(255),
aws_100 varchar(255),
zone_id integer
);
\copy raw_ag_data(farm_name,ranch_name,field_name,zone_name,date,temp_high,temp_low,temp_soil,wind_speed,avg_humidity,precipitation,cimis_et0,cimis_etc,observed_etc,degree_days,cum_degree_days,water_efficiency,quality_spread,wind_run,chillhours_45f,chillhours_32f) from '/workspace/agls/data/output.demo/Agralogics Farm/As/Fields/Field 6/Zones/WholeField/Agralogics Farm_As_Field 6_WholeField_DailyData.csv' csv header
\copy raw_ag_data(farm_name,ranch_name,field_name,zone_name,date,temp_high,temp_low,temp_soil,wind_speed,avg_humidity,precipitation,cimis_et0,cimis_etc,observed_etc,degree_days,cum_degree_days,water_efficiency,quality_spread,wind_run,chillhours_45f,chillhours_32f) from '/workspace/agls/data/output.demo/Agralogics Farm/Reds/Fields/R1/Zones/WholeField/Agralogics Farm_Reds_R1_WholeField_DailyData.csv' csv header
with zs as (select f.name as f_name, r.name as r_name, p.name as p_name, z.name as z_name, z.id as z_id from
farms f left outer join ranches r on r.farm_id = f.id
left outer join parcels p on p.ranch_id = r.id
left outer join zones z on z.parcel_id = p.id)
-- select * from agronomic_data a join zs on a.zone_id = zs.z_id
update raw_ag_data a set zone_id = zs.z_id, for_date = for_date + interval '8 hours'
from zs
where a.farm_name = zs.f_name and a.ranch_name = zs.r_name and a.field_name = zs.p_name and a.zone_name = zs.z_name;
with zids as (select zones.id from zones join parcels on zones.parcel_id = parcels.id join ranches on parcels.ranch_id = ranches.id join farms on ranches.farm_id = farms.id and farms.name = 'Agralogics Farm')
select wind_run from agronomic_data where zone_id in (select id from zids) order by id desc limit 100;
#!/bin/bash
zipfile=$1
output_folder=/tmp/`echo $RANDOM`
sql="$output_folder"/copy.sql
csv="$output_folder"/zones.csv
test ! -d $output_folder || rm -rf $output_folder
unzip -q $zipfile "*_zones.csv" -d $output_folder && \
#find $output_folder -name "*.csv" -type f -print0 | xargs -I {} -0 tail -n+2 "{}" >> $csv && \
find $output_folder -name "*.csv" -type f -exec tail -n+2 "{}" \; >> $csv && \
echo "truncate table raw_zones;" > $sql && \
echo "\\copy raw_zones(farm_name, ranch_name, field_name, zone_name, acreage, soil_type, aws_100) from '$csv' csv" >> $sql && \
cat - << EOF >> $sql
with zs as (select f.name as f_name, r.name as r_name, p.name as p_name, z.name as z_name, z.id as z_id, z.parcel_id as p_id from
farms f left outer join ranches r on r.farm_id = f.id
left outer join parcels p on p.ranch_id = r.id
left outer join zones z on z.parcel_id = p.id)
update raw_zones a set zone_id = zs.z_id
from zs
where a.farm_name = zs.f_name and a.ranch_name = zs.r_name and a.field_name = zs.p_name and a.zone_name = zs.z_name;
EOF
PGPASSWORD="ERYCd-hf3479-DpiEQ" psql -h agls-r1.cl3ztejxmrbi.us-west-1.rds.amazonaws.com -p 5432 -U agls -d demo < $sql
rm -rf $output_folder
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment