Skip to content

Instantly share code, notes, and snippets.

@bertt
Last active October 11, 2018 11:54
Show Gist options
  • Save bertt/2faf910e5417c48e9a3a57266fe53a8a to your computer and use it in GitHub Desktop.
Save bertt/2faf910e5417c48e9a3a57266fe53a8a to your computer and use it in GitHub Desktop.
# Sample datat set: New York City taxicab data (10 milion records)
## 1] met timescaledb
d:\> cd D:\gisdata\nyc_cabs
d:\gisdata\nyc_cabs> bash
// start database docker
$ docker run -d --name timescaledb -p 5432:5432 -e POSTGRES_PASSWORD=postgres timescale/timescaledb-postgis
// create schema for taxi rides
$ psql -U postgres -d postgres -h localhost < nyc_data.sql
// import data
$ psql -U postgres -d postgres -h localhost -c "\COPY rides FROM nyc_data_rides.csv CSV"
$ psql postgres postgres
postgres=# \d rides
1) postgres=# SELECT date_trunc('day', pickup_datetime) as day, avg(fare_amount) FROM rides WHERE passenger_count > 1 AND pickup_datetime < '2016-01-08' GROUP BY day ORDER BY day;
2) postgres=# SELECT date_trunc('day', pickup_datetime) as day, COUNT(*) FROM rides GROUP BY day ORDER BY day LIMIT 5;
// (using the TimescaleDB "time_bucket" function
3) postgres=# SELECT time_bucket('5 minute', pickup_datetime) AS five_min, count(*) FROM rides WHERE pickup_datetime < '2016-01-01 02:00' GROUP BY five_min ORDER BY five_min;
postgres=# SELECT rate_code, COUNT(vendor_id) as num_trips FROM rides WHERE pickup_datetime < '2016-01-08' GROUP BY rate_code ORDER BY rate_code;
postgres=# SELECT * FROM rates;
4) postgres=# SELECT rates.description, COUNT(vendor_id) as num_trips, AVG(dropoff_datetime - pickup_datetime) as avg_trip_duration, AVG(total_amount) as avg_total, AVG(tip_amount) as avg_tip, MIN(trip_distance) as min_distance, AVG(trip_distance) as avg_distance, MAX(trip_distance) as max_distance, AVG(passenger_count) as avg_passengers FROM rides JOIN rates on rides.rate_code = rates.rate_code WHERE rides.rate_code in (2,3) AND pickup_datetime < '2016-02-01' GROUP BY rates.description ORDER BY rates.description;
postgre = # EXPLAIN SELECT * FROM rides;
postgres=# SELECT COUNT(*) FROM _timescaledb_internal._hyper_1_2_chunk;
postgres=# CREATE EXTENSION postgis;
//. US National Atlas Equal Area? why
postgres=# ALTER TABLE rides ADD COLUMN pickup_geom geometry(POINT,2163);
postgres=# ALTER TABLE rides ADD COLUMN dropoff_geom geometry(POINT,2163);
postgres=# UPDATE rides SET pickup_geom = ST_Transform(ST_SetSRID(ST_MakePoint(pickup_longitude,pickup_latitude),4326),2163);
5) postgres=# SELECT time_bucket('30 minutes', pickup_datetime) AS thirty_min, COUNT(*) AS near_times_sq FROM rides WHERE ST_Distance(pickup_geom, ST_Transform(ST_SetSRID(ST_MakePoint(-73.9851,40.7589),4326),2163)) < 400 AND pickup_datetime < '2016-01-01 14:00' GROUP BY thirty_min ORDER BY thirty_min;
query 1) 3 seconden
query 2) 5 seconden
query 3) 0.4 seconden
query 4) 2 seconden (eerste keer 46 seconden)
query 5) 0.5 seconden
## 2] zonder timescaledb
$ docker run -d --name postgis -p 5433:5432 -e POSTGRES_PASSWORD=postgres mdillon/postgis
$ psql -U postgres -d postgres -h localhost -p 5433 < nyc_data-no_timescale.sql
$ psql -U postgres -d postgres -h localhost -p 5433 -c "\COPY rides FROM nyc_data_rides.csv CSV"
$ psql postgres postgres -p 5433
query 1) 7 seconds
query 2) 15 seconds
query 3) time_bucket bestaat niet
query 4) 2 seconden
query 5) time bucket bestaat niet
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment