bertt/Timescale DB sample queries

## Timescale DB sample queries
# Sample datat set: New York City taxicab data (10 milion records)

## 1] met timescaledb

d:\> cd D:\gisdata\nyc_cabs
d:\gisdata\nyc_cabs> bash
// start database docker
$ docker run -d --name timescaledb -p 5432:5432 -e POSTGRES_PASSWORD=postgres timescale/timescaledb-postgis
// create schema for taxi rides
$ psql -U postgres -d postgres -h localhost < nyc_data.sql
// import data
$ psql -U postgres -d postgres -h localhost -c "\COPY rides FROM nyc_data_rides.csv CSV"
$ psql postgres postgres
postgres=# \d rides
1) postgres=#  SELECT date_trunc('day', pickup_datetime) as day, avg(fare_amount) FROM rides WHERE passenger_count > 1 AND pickup_datetime < '2016-01-08' GROUP BY day ORDER BY day;
2) postgres=# SELECT date_trunc('day', pickup_datetime) as day, COUNT(*) FROM rides GROUP BY day ORDER BY day LIMIT 5;
// (using the TimescaleDB "time_bucket" function
3) postgres=# SELECT time_bucket('5 minute', pickup_datetime) AS five_min, count(*) FROM rides WHERE pickup_datetime < '2016-01-01 02:00' GROUP BY five_min ORDER BY five_min;
postgres=# SELECT rate_code, COUNT(vendor_id) as num_trips FROM rides WHERE pickup_datetime < '2016-01-08' GROUP BY rate_code ORDER BY rate_code;
postgres=# SELECT * FROM rates;
4) postgres=# SELECT rates.description, COUNT(vendor_id) as num_trips, AVG(dropoff_datetime - pickup_datetime) as avg_trip_duration, AVG(total_amount) as avg_total, AVG(tip_amount) as avg_tip, MIN(trip_distance) as min_distance, AVG(trip_distance) as avg_distance, MAX(trip_distance) as max_distance, AVG(passenger_count) as avg_passengers FROM rides JOIN rates on rides.rate_code = rates.rate_code WHERE rides.rate_code in (2,3) AND pickup_datetime < '2016-02-01' GROUP BY rates.description ORDER BY rates.description;
postgre = # EXPLAIN SELECT * FROM rides;
postgres=# SELECT COUNT(*) FROM _timescaledb_internal._hyper_1_2_chunk;
postgres=# CREATE EXTENSION postgis;
//. US National Atlas Equal Area? why
postgres=# ALTER TABLE rides ADD COLUMN pickup_geom geometry(POINT,2163);
postgres=# ALTER TABLE rides ADD COLUMN dropoff_geom geometry(POINT,2163);
postgres=# UPDATE rides SET pickup_geom = ST_Transform(ST_SetSRID(ST_MakePoint(pickup_longitude,pickup_latitude),4326),2163);
5) postgres=# SELECT time_bucket('30 minutes', pickup_datetime) AS thirty_min, COUNT(*) AS near_times_sq FROM rides WHERE ST_Distance(pickup_geom, ST_Transform(ST_SetSRID(ST_MakePoint(-73.9851,40.7589),4326),2163)) < 400 AND pickup_datetime < '2016-01-01 14:00' GROUP BY thirty_min ORDER BY thirty_min;

query 1) 3 seconden
query 2) 5 seconden
query 3) 0.4 seconden
query 4) 2 seconden (eerste keer 46 seconden)
query 5)  0.5 seconden

## 2] zonder timescaledb
$ docker run -d --name postgis -p 5433:5432 -e POSTGRES_PASSWORD=postgres mdillon/postgis
$ psql -U postgres -d postgres -h localhost -p 5433 < nyc_data-no_timescale.sql
$ psql -U postgres -d postgres -h localhost -p 5433 -c "\COPY rides FROM nyc_data_rides.csv CSV"
$ psql postgres postgres -p 5433

query 1)  7 seconds
query 2) 15 seconds
query 3) time_bucket bestaat niet
query 4) 2 seconden
query 5) time bucket bestaat niet
	# Sample datat set: New York City taxicab data (10 milion records)

	## 1] met timescaledb

	d:\> cd D:\gisdata\nyc_cabs
	d:\gisdata\nyc_cabs> bash
	// start database docker
	$ docker run -d --name timescaledb -p 5432:5432 -e POSTGRES_PASSWORD=postgres timescale/timescaledb-postgis
	// create schema for taxi rides
	$ psql -U postgres -d postgres -h localhost < nyc_data.sql
	// import data
	$ psql -U postgres -d postgres -h localhost -c "\COPY rides FROM nyc_data_rides.csv CSV"
	$ psql postgres postgres
	postgres=# \d rides
	1) postgres=# SELECT date_trunc('day', pickup_datetime) as day, avg(fare_amount) FROM rides WHERE passenger_count > 1 AND pickup_datetime < '2016-01-08' GROUP BY day ORDER BY day;
	2) postgres=# SELECT date_trunc('day', pickup_datetime) as day, COUNT(*) FROM rides GROUP BY day ORDER BY day LIMIT 5;
	// (using the TimescaleDB "time_bucket" function
	3) postgres=# SELECT time_bucket('5 minute', pickup_datetime) AS five_min, count(*) FROM rides WHERE pickup_datetime < '2016-01-01 02:00' GROUP BY five_min ORDER BY five_min;
	postgres=# SELECT rate_code, COUNT(vendor_id) as num_trips FROM rides WHERE pickup_datetime < '2016-01-08' GROUP BY rate_code ORDER BY rate_code;
	postgres=# SELECT * FROM rates;
	4) postgres=# SELECT rates.description, COUNT(vendor_id) as num_trips, AVG(dropoff_datetime - pickup_datetime) as avg_trip_duration, AVG(total_amount) as avg_total, AVG(tip_amount) as avg_tip, MIN(trip_distance) as min_distance, AVG(trip_distance) as avg_distance, MAX(trip_distance) as max_distance, AVG(passenger_count) as avg_passengers FROM rides JOIN rates on rides.rate_code = rates.rate_code WHERE rides.rate_code in (2,3) AND pickup_datetime < '2016-02-01' GROUP BY rates.description ORDER BY rates.description;
	postgre = # EXPLAIN SELECT * FROM rides;
	postgres=# SELECT COUNT(*) FROM _timescaledb_internal._hyper_1_2_chunk;
	postgres=# CREATE EXTENSION postgis;
	//. US National Atlas Equal Area? why
	postgres=# ALTER TABLE rides ADD COLUMN pickup_geom geometry(POINT,2163);
	postgres=# ALTER TABLE rides ADD COLUMN dropoff_geom geometry(POINT,2163);
	postgres=# UPDATE rides SET pickup_geom = ST_Transform(ST_SetSRID(ST_MakePoint(pickup_longitude,pickup_latitude),4326),2163);
	5) postgres=# SELECT time_bucket('30 minutes', pickup_datetime) AS thirty_min, COUNT(*) AS near_times_sq FROM rides WHERE ST_Distance(pickup_geom, ST_Transform(ST_SetSRID(ST_MakePoint(-73.9851,40.7589),4326),2163)) < 400 AND pickup_datetime < '2016-01-01 14:00' GROUP BY thirty_min ORDER BY thirty_min;

	query 1) 3 seconden
	query 2) 5 seconden
	query 3) 0.4 seconden
	query 4) 2 seconden (eerste keer 46 seconden)
	query 5) 0.5 seconden

	## 2] zonder timescaledb
	$ docker run -d --name postgis -p 5433:5432 -e POSTGRES_PASSWORD=postgres mdillon/postgis
	$ psql -U postgres -d postgres -h localhost -p 5433 < nyc_data-no_timescale.sql
	$ psql -U postgres -d postgres -h localhost -p 5433 -c "\COPY rides FROM nyc_data_rides.csv CSV"
	$ psql postgres postgres -p 5433

	query 1) 7 seconds
	query 2) 15 seconds
	query 3) time_bucket bestaat niet
	query 4) 2 seconden
	query 5) time bucket bestaat niet