Created
July 3, 2018 13:51
-
-
Save cswinter/e8b69f2762e2f06417926effe2aeb41f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
COPY ( | |
SELECT trips.id, | |
trips.vendor_id, | |
trips.pickup_datetime, | |
trips.dropoff_datetime, | |
trips.store_and_fwd_flag, | |
trips.rate_code_id, | |
trips.pickup_longitude, | |
trips.pickup_latitude, | |
trips.dropoff_longitude, | |
trips.dropoff_latitude, | |
trips.passenger_count, | |
trips.trip_distance, | |
trips.fare_amount, | |
trips.extra, | |
trips.mta_tax, | |
trips.tip_amount, | |
trips.tolls_amount, | |
trips.ehail_fee, | |
trips.improvement_surcharge, | |
trips.total_amount, | |
trips.payment_type, | |
trips.trip_type, | |
trips.pickup, | |
trips.dropoff, | |
cab_types.type cab_type, | |
weather.precipitation rain, | |
weather.snow_depth, | |
weather.snowfall, | |
weather.max_temperature max_temp, | |
weather.min_temperature min_temp, | |
weather.average_wind_speed wind, | |
pick_up.gid pickup_nyct2010_gid, | |
pick_up.ctlabel pickup_ctlabel, | |
pick_up.borocode pickup_borocode, | |
pick_up.boroname pickup_boroname, | |
pick_up.ct2010 pickup_ct2010, | |
pick_up.boroct2010 pickup_boroct2010, | |
pick_up.cdeligibil pickup_cdeligibil, | |
pick_up.ntacode pickup_ntacode, | |
pick_up.ntaname pickup_ntaname, | |
pick_up.puma pickup_puma, | |
drop_off.gid dropoff_nyct2010_gid, | |
drop_off.ctlabel dropoff_ctlabel, | |
drop_off.borocode dropoff_borocode, | |
drop_off.boroname dropoff_boroname, | |
drop_off.ct2010 dropoff_ct2010, | |
drop_off.boroct2010 dropoff_boroct2010, | |
drop_off.cdeligibil dropoff_cdeligibil, | |
drop_off.ntacode dropoff_ntacode, | |
drop_off.ntaname dropoff_ntaname, | |
drop_off.puma dropoff_puma | |
FROM trips | |
LEFT JOIN cab_types | |
ON trips.cab_type_id = cab_types.id | |
LEFT JOIN central_park_weather_observations weather | |
ON weather.date = trips.pickup_datetime::date | |
LEFT JOIN nyct2010 pick_up | |
ON pick_up.gid = trips.pickup_nyct2010_gid | |
LEFT JOIN nyct2010 drop_off | |
ON drop_off.gid = trips.dropoff_nyct2010_gid | |
) TO PROGRAM | |
'split -l 20000000 --filter="gzip > /home/clemens/nyc-taxi-data-denormalized/trips_\$FILE.csv.gz"' | |
WITH CSV; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment