Last active
October 11, 2018 11:54
-
-
Save bertt/2faf910e5417c48e9a3a57266fe53a8a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Sample datat set: New York City taxicab data (10 milion records) | |
## 1] met timescaledb | |
d:\> cd D:\gisdata\nyc_cabs | |
d:\gisdata\nyc_cabs> bash | |
// start database docker | |
$ docker run -d --name timescaledb -p 5432:5432 -e POSTGRES_PASSWORD=postgres timescale/timescaledb-postgis | |
// create schema for taxi rides | |
$ psql -U postgres -d postgres -h localhost < nyc_data.sql | |
// import data | |
$ psql -U postgres -d postgres -h localhost -c "\COPY rides FROM nyc_data_rides.csv CSV" | |
$ psql postgres postgres | |
postgres=# \d rides | |
1) postgres=# SELECT date_trunc('day', pickup_datetime) as day, avg(fare_amount) FROM rides WHERE passenger_count > 1 AND pickup_datetime < '2016-01-08' GROUP BY day ORDER BY day; | |
2) postgres=# SELECT date_trunc('day', pickup_datetime) as day, COUNT(*) FROM rides GROUP BY day ORDER BY day LIMIT 5; | |
// (using the TimescaleDB "time_bucket" function | |
3) postgres=# SELECT time_bucket('5 minute', pickup_datetime) AS five_min, count(*) FROM rides WHERE pickup_datetime < '2016-01-01 02:00' GROUP BY five_min ORDER BY five_min; | |
postgres=# SELECT rate_code, COUNT(vendor_id) as num_trips FROM rides WHERE pickup_datetime < '2016-01-08' GROUP BY rate_code ORDER BY rate_code; | |
postgres=# SELECT * FROM rates; | |
4) postgres=# SELECT rates.description, COUNT(vendor_id) as num_trips, AVG(dropoff_datetime - pickup_datetime) as avg_trip_duration, AVG(total_amount) as avg_total, AVG(tip_amount) as avg_tip, MIN(trip_distance) as min_distance, AVG(trip_distance) as avg_distance, MAX(trip_distance) as max_distance, AVG(passenger_count) as avg_passengers FROM rides JOIN rates on rides.rate_code = rates.rate_code WHERE rides.rate_code in (2,3) AND pickup_datetime < '2016-02-01' GROUP BY rates.description ORDER BY rates.description; | |
postgre = # EXPLAIN SELECT * FROM rides; | |
postgres=# SELECT COUNT(*) FROM _timescaledb_internal._hyper_1_2_chunk; | |
postgres=# CREATE EXTENSION postgis; | |
//. US National Atlas Equal Area? why | |
postgres=# ALTER TABLE rides ADD COLUMN pickup_geom geometry(POINT,2163); | |
postgres=# ALTER TABLE rides ADD COLUMN dropoff_geom geometry(POINT,2163); | |
postgres=# UPDATE rides SET pickup_geom = ST_Transform(ST_SetSRID(ST_MakePoint(pickup_longitude,pickup_latitude),4326),2163); | |
5) postgres=# SELECT time_bucket('30 minutes', pickup_datetime) AS thirty_min, COUNT(*) AS near_times_sq FROM rides WHERE ST_Distance(pickup_geom, ST_Transform(ST_SetSRID(ST_MakePoint(-73.9851,40.7589),4326),2163)) < 400 AND pickup_datetime < '2016-01-01 14:00' GROUP BY thirty_min ORDER BY thirty_min; | |
query 1) 3 seconden | |
query 2) 5 seconden | |
query 3) 0.4 seconden | |
query 4) 2 seconden (eerste keer 46 seconden) | |
query 5) 0.5 seconden | |
## 2] zonder timescaledb | |
$ docker run -d --name postgis -p 5433:5432 -e POSTGRES_PASSWORD=postgres mdillon/postgis | |
$ psql -U postgres -d postgres -h localhost -p 5433 < nyc_data-no_timescale.sql | |
$ psql -U postgres -d postgres -h localhost -p 5433 -c "\COPY rides FROM nyc_data_rides.csv CSV" | |
$ psql postgres postgres -p 5433 | |
query 1) 7 seconds | |
query 2) 15 seconds | |
query 3) time_bucket bestaat niet | |
query 4) 2 seconden | |
query 5) time bucket bestaat niet |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment