Skip to content

Instantly share code, notes, and snippets.

@pramsey
Created October 8, 2015 19:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pramsey/51eae0e55143c742f324 to your computer and use it in GitHub Desktop.
Save pramsey/51eae0e55143c742f324 to your computer and use it in GitHub Desktop.
-- place data in s3 for consumption by redshift
-- unzip -p taxi.zip | perl strip.pl | gzip -9 > taxi.gz
-- aws s3 cp taxi.gz s3://taxitest/taxi.gz
CREATE TABLE taxis (
pickup_datetime varchar,
passenger_count integer,
trip_time_in_secs real,
trip_distance real,
pickup_longitude real,
pickup_latitude real,
easting real,
northing real
);
COPY taxis FROM 's3://taxitest/taxi.gz'
CREDENTIALS 'aws_access_key_id=xxx;aws_secret_access_key=xxx'
DELIMITER '\t' GZIP;
-- how big is test table?
-- 14595416
select count(*) from taxis;
-- summarize whole table into points at zoom level 8 (resolution 611 m/pixel)
-- 15308
-- 1.2s cold, 0.3s hot
select count(*) from
(
select
floor(easting/611) as x,
floor(northing/611) as y,
avg(trip_time_in_secs) as trip_time_in_secs,
avg(trip_distance) as trip_distance
from taxis
group by x, y
) as foo;
-- summarize and filter whole table into points at zoom level 8 (resolution 611 m/pixel)
-- 156
-- 1.0s cold, 0.2s hot
select count(*) from
(
select
floor(easting/611) as x,
floor(northing/611) as y,
avg(trip_time_in_secs) as trip_time_in_secs,
avg(trip_distance) as trip_distance
from taxis
where trip_time_in_secs > 100 and trip_time_in_secs < 320
group by x, y
) as foo;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment