Skip to content

Instantly share code, notes, and snippets.

@ImreSamu
Last active June 17, 2020 17:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ImreSamu/af34e1b4807fc4c3afad29d1b238252d to your computer and use it in GitHub Desktop.
Save ImreSamu/af34e1b4807fc4c3afad29d1b238252d to your computer and use it in GitHub Desktop.
sqlite3 test - reference file ...
#!/usr/bin/env bash
set -o errexit
set -o pipefail
set -o nounset
dbase=taxi_test10.db
rm -f ${dbase}
# wget https://nyc-tlc.s3.amazonaws.com/trip+data/green_tripdata_2019-12.csv
echo """
PRAGMA foreign_keys;
CREATE TABLE vendor(
VendorID INT NOT NULL PRIMARY KEY,
VendorLabel TEXT
);
INSERT INTO vendor (VendorID,VendorLabel)
VALUES
(1, 'Creative Mobile Technologies, LLC'),
(2, 'Verifone Inc.')
;
-- ratecode
CREATE TABLE ratecode(
RateCodeID INT NOT NULL PRIMARY KEY,
RateCodeLabel TEXT
);
INSERT INTO ratecode (RateCodeID,RateCodeLabel)
VALUES
(1, 'Standard Rate'),
(2, 'JFK'),
(3, 'Newark'),
(4, 'Nassau or Westchester'),
(5, 'Negotiated Fare'),
(6, 'Group Ride')
;
CREATE TABLE payment_type(
Payment_type INT NOT NULL PRIMARY KEY,
Payment_type_label TEXT
);
INSERT INTO payment_type (Payment_type,Payment_type_label)
VALUES
(1, 'Credit card'),
(2, 'Cash'),
(3, 'No charge'),
(4, 'Dispute'),
(5, 'Unknown'),
(6, 'Voided Trip')
;
CREATE TABLE trip_type(
Trip_type INT NOT NULL PRIMARY KEY,
Trip_type_label TEXT
);
INSERT INTO trip_type (Trip_type,Trip_type_label)
VALUES
(1,'Street-hail'),
(2,'Dispatch')
;
CREATE TABLE taxi201912(
VendorID INT NOT NULL,
lpep_pickup_datetime TEXT,
lpep_dropoff_datetime TEXT,
store_and_fwd_flag TEXT,
RatecodeID INT NOT NULL,
PULocationID INT NOT NULL,
DOLocationID INT NOT NULL,
passenger_count INT NOT NULL,
trip_distance REAL NOT NULL,
fare_amount REAL NOT NULL,
extra REAL NOT NULL,
mta_tax REAL NOT NULL,
tip_amount REAL NOT NULL,
tolls_amount REAL NOT NULL,
ehail_fee REAL NOT NULL,
improvement_surcharge REAL NOT NULL,
total_amount REAL NOT NULL,
payment_type INT NOT NULL,
trip_type INT NOT NULL,
congestion_surcharge REAL NOT NULL,
FOREIGN KEY (VendorID)
REFERENCES vendor (VendorID),
FOREIGN KEY (RateCodeID)
REFERENCES ratecode (RateCodeID),
FOREIGN KEY (Payment_type)
REFERENCES payment_type (Payment_type),
FOREIGN KEY (Trip_type)
REFERENCES trip_type (Trip_type)
);
---------------------------------------------------------
-- example view - with Labels - for reading to EXCEL
---------------------------------------------------------
CREATE VIEW taxi_view1 AS
SELECT
vendor.VendorLabel,
ratecode.RateCodeLabel,
payment_type.Payment_Type_label,
trip_type.Trip_type_label,
strftime('%W',lpep_pickup_datetime) AS pickup_weekofyear,
taxi.*
FROM
taxi201912 as taxi
LEFT JOIN vendor ON vendor.VendorID = taxi.VendorID
LEFT JOIN ratecode ON ratecode.RateCodeID = taxi.RateCodeID
LEFT JOIN payment_type ON payment_type.Payment_type = taxi.Payment_type
LEFT JOIN trip_type ON trip_type.Trip_type = taxi.Trip_type
;
---------------------------------------------------------
-- example preprocessed CEO report - for reading to EXCEL
---------------------------------------------------------
CREATE VIEW ceo_report_001_weeks AS
SELECT
strftime('%Y-%W week',lpep_pickup_datetime) AS pickup_weekofyear,
vendor.VendorLabel,
ratecode.RateCodeLabel,
payment_type.Payment_Type_label,
trip_type.Trip_type_label,
count(*) taxi_trips,
sum(total_amount) sum_total_amount,
avg(total_amount) avg_total_amount,
max(total_amount) max_total_amount
FROM
taxi201912 as taxi
LEFT JOIN vendor ON vendor.VendorID = taxi.VendorID
LEFT JOIN ratecode ON ratecode.RateCodeID = taxi.RateCodeID
LEFT JOIN payment_type ON payment_type.Payment_type = taxi.Payment_type
LEFT JOIN trip_type ON trip_type.Trip_type = taxi.Trip_type
GROUP BY 1,2,3,4,5
;
CREATE VIEW ceo_report_002_by_hour AS
SELECT
strftime('%H hour',lpep_pickup_datetime) AS pickup_hour,
trip_type.Trip_type_label,
count(*) taxi_trips,
sum(total_amount) sum_total_amount,
avg(total_amount) avg_total_amount,
max(total_amount) max_total_amount
FROM
taxi201912 as taxi
LEFT JOIN vendor ON vendor.VendorID = taxi.VendorID
LEFT JOIN ratecode ON ratecode.RateCodeID = taxi.RateCodeID
LEFT JOIN payment_type ON payment_type.Payment_type = taxi.Payment_type
LEFT JOIN trip_type ON trip_type.Trip_type = taxi.Trip_type
GROUP BY 1,2
;
""" | sqlite3 ${dbase}
sqlite3 -csv -header ${dbase} ".import '| tail -n +2 green_tripdata_2019-12.csv' taxi201912"
echo """
vacuum;
analyze;
""" | sqlite3 ${dbase}
ls -Gg --block-size=M ${dbase}
echo """
-- verify
select count(*) as _cnt from taxi201912;
pragma table_info(taxi201912);
select * from ceo_report_002_by_hour;
""" | sqlite3 ${dbase}
0
-rw-r--r-- 1 49M Jun 17 19:32 taxi_test10.db
450627
0|VendorID|INT|1||0
1|lpep_pickup_datetime|TEXT|0||0
2|lpep_dropoff_datetime|TEXT|0||0
3|store_and_fwd_flag|TEXT|0||0
4|RatecodeID|INT|1||0
5|PULocationID|INT|1||0
6|DOLocationID|INT|1||0
7|passenger_count|INT|1||0
8|trip_distance|REAL|1||0
9|fare_amount|REAL|1||0
10|extra|REAL|1||0
11|mta_tax|REAL|1||0
12|tip_amount|REAL|1||0
13|tolls_amount|REAL|1||0
14|ehail_fee|REAL|1||0
15|improvement_surcharge|REAL|1||0
16|total_amount|REAL|1||0
17|payment_type|INT|1||0
18|trip_type|INT|1||0
19|congestion_surcharge|REAL|1||0
00 hour||448|16853.75|37.6199776785714|120.5
00 hour|Dispatch|284|7152.22000000001|25.1838732394367|216.2
00 hour|Street-hail|9643|133003.080000012|13.7927076635914|488.8
01 hour||223|10046.89|45.0533183856502|120.5
01 hour|Dispatch|255|6814.50000000001|26.7235294117648|320.0
01 hour|Street-hail|6842|94421.0300000078|13.8002090032166|213.8
02 hour||107|4797.17|44.8333644859813|120.5
02 hour|Dispatch|296|9594.05|32.4123310810811|290.0
02 hour|Street-hail|4575|64416.7200000038|14.08015737705|130.24
03 hour||259|10076.39|38.9049806949806|109.5
03 hour|Dispatch|303|10727.63|35.4047194719471|250.0
03 hour|Street-hail|3505|53768.1900000019|15.3404251069906|376.3
04 hour||988|32987.4|33.3880566801619|93.5
04 hour|Dispatch|291|9557.27|32.842852233677|180.0
04 hour|Street-hail|3043|47609.3000000012|15.6455142951039|293.02
05 hour||1607|57856.5999999999|36.0028624766645|96.0
05 hour|Dispatch|195|4881.73|25.0345128205128|250.0
05 hour|Street-hail|2132|39392.6800000002|18.4768667917449|154.8
06 hour||3156|119161.3|37.7570659062103|109.5
06 hour|Dispatch|189|3670.98|19.4231746031746|235.0
06 hour|Street-hail|3830|74768.2100000032|19.5217258485648|200.8
07 hour||4665|157199.16|33.6975691318328|93.5
07 hour|Dispatch|326|5086.94000000001|15.6041104294479|150.3
07 hour|Street-hail|8482|137547.420000007|16.2163900023588|183.3
08 hour||5854|185667.660000001|31.7163751281176|96.0
08 hour|Dispatch|371|6591.48000000002|17.7667924528302|132.36
08 hour|Street-hail|13964|219199.449999968|15.6974684904016|270.55
09 hour||7190|218084.020000002|30.3315744089015|96.0
09 hour|Dispatch|459|7479.43000000002|16.295054466231|159.9
09 hour|Street-hail|16143|247994.519999954|15.3623564393207|275.8
10 hour||7597|233303.420000003|30.7099407660922|88.5
10 hour|Dispatch|425|7520.27000000002|17.6947529411765|298.04
10 hour|Street-hail|16354|245360.479999953|15.0030867066132|235.3
11 hour||7147|225081.740000001|31.4931775570171|93.5
11 hour|Dispatch|385|7705.72000000003|20.0148571428572|402.8
11 hour|Street-hail|16898|254404.209999948|15.0552852408538|214.86
12 hour||7177|228174.230000002|31.7924244113142|123.25
12 hour|Dispatch|394|8670.32000000001|22.0058883248731|400.3
12 hour|Street-hail|17024|256909.059999945|15.0909927161622|310.3
13 hour||6993|218809.920000002|31.2898498498501|98.5
13 hour|Dispatch|379|7436.64000000002|19.6217414248022|175.3
13 hour|Street-hail|18334|273319.199999937|14.9077778989821|298.8
14 hour||6678|214460.450000002|32.1144728960769|101.25
14 hour|Dispatch|379|7524.33000000002|19.8531134564644|250.3
14 hour|Street-hail|19697|302472.329999921|15.3562638980516|350.3
15 hour||6677|224570.44|33.6334341770257|104.0
15 hour|Dispatch|367|8247.98000000002|22.4740599455041|208.49
15 hour|Street-hail|22363|339999.609999903|15.2036672181685|258.8
16 hour||5512|192059.870000002|34.8439531930337|104.0
16 hour|Dispatch|382|8556.31000000001|22.3987172774869|216.36
16 hour|Street-hail|23305|369133.739999891|15.8392508045437|192.3
17 hour||4854|167321.15|34.4707766790277|120.5
17 hour|Dispatch|389|8603.76000000001|22.1176349614396|200.3
17 hour|Street-hail|25583|394566.169999878|15.4229828401625|148.07
18 hour||3747|126752.62|33.8277608753669|104.0
18 hour|Dispatch|387|8875.24000000001|22.9334366925065|500.3
18 hour|Street-hail|26290|388295.859999879|14.769717002658|176.05
19 hour||2824|99922.6899999999|35.3833888101983|101.25
19 hour|Dispatch|441|8751.46000000002|19.8445804988663|200.3
19 hour|Street-hail|24577|351877.809999897|14.3173621678763|177.87
20 hour||2268|79183.4500000001|34.9133377425044|98.5
20 hour|Dispatch|388|8150.56000000002|21.0065979381444|150.3
20 hour|Street-hail|21306|302236.989999921|14.1855341218399|161.8
21 hour||2062|72012.6199999999|34.923676042677|106.75
21 hour|Dispatch|390|7254.90000000002|18.6023076923077|200.3
21 hour|Street-hail|18466|259904.869999944|14.0747790533924|452.8
22 hour||1624|56979.4199999999|35.0858497536945|123.25
22 hour|Dispatch|342|7509.20000000002|21.9567251461989|200.3
22 hour|Street-hail|16013|229931.669999958|14.3590626366051|239.8
23 hour||1070|36994.56|34.5743551401869|88.5
23 hour|Dispatch|342|8812.93000000001|25.7688011695907|285.0
23 hour|Street-hail|13172|189184.619999979|14.3626343759474|408.8
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment