Skip to content

Instantly share code, notes, and snippets.

@cswinter
Created August 12, 2018 18:12
Show Gist options
  • Save cswinter/0920d8f2847ce6ce838b44ca15f1bfc0 to your computer and use it in GitHub Desktop.
Save cswinter/0920d8f2847ce6ce838b44ca15f1bfc0 to your computer and use it in GitHub Desktop.
➜ nyc-clickhouse clickhouse-client --max_threads=12 --max_memory_usage=60000000000
ClickHouse client version 1.1.54388.
Connecting to localhost:9000 as user default.
Connected to ClickHouse server version 1.1.54388.
Archon :) show tables
SHOW TABLES
┌─name──┐
│ trips │
└───────┘
1 rows in set. Elapsed: 0.005 sec.
Archon :) CREATE TABLE trips_mergetree
:-] ENGINE = MergeTree(pickup_date, pickup_datetime, 8192)
:-] AS SELECT
:-] trip_id,
:-] CAST(vendor_id AS Enum8('1' = 1, '2' = 2, '3' = 3, 'CMT' = 4, 'VTS' = 5, 'DDS' = 6, 'B02512' = 10, 'B02598' = 11, 'B02617' = 12, 'B02682' = 13, 'B02764' = 14)) AS vendor_id,
:-] toDate(pickup_datetime) AS pickup_date,
:-] ifNull(pickup_datetime, toDateTime(0)) AS pickup_datetime,
:-] toDate(dropoff_datetime) AS dropoff_date,
:-] ifNull(dropoff_datetime, toDateTime(0)) AS dropoff_datetime,
:-] assumeNotNull(store_and_fwd_flag) IN ('Y', '1', '2') AS store_and_fwd_flag,
:-] assumeNotNull(rate_code_id) AS rate_code_id,
:-] assumeNotNull(pickup_longitude) AS pickup_longitude,
:-] assumeNotNull(pickup_latitude) AS pickup_latitude,
:-] assumeNotNull(dropoff_longitude) AS dropoff_longitude,
:-] assumeNotNull(dropoff_latitude) AS dropoff_latitude,
:-] assumeNotNull(passenger_count) AS passenger_count,
:-] assumeNotNull(trip_distance) AS trip_distance,
:-] assumeNotNull(fare_amount) AS fare_amount,
:-] assumeNotNull(extra) AS extra,
:-] assumeNotNull(mta_tax) AS mta_tax,
:-] assumeNotNull(tip_amount) AS tip_amount,
:-] assumeNotNull(tolls_amount) AS tolls_amount,
:-] assumeNotNull(ehail_fee) AS ehail_fee,
:-] assumeNotNull(improvement_surcharge) AS improvement_surcharge,
:-] assumeNotNull(total_amount) AS total_amount,
:-] CAST((assumeNotNull(payment_type) AS pt) IN ('CSH', 'CASH', 'Cash', 'CAS', 'Cas', '1') ? 'CSH' : (pt IN ('CRD', 'Credit', 'Cre', 'CRE', 'CREDIT', '2') ? 'CRE' : (pt IN ('NOC', 'No Charge', 'No', '3') ? 'NOC' : (pt IN ('DIS', 'Dispute', 'Dis', '4') ? 'DIS' : 'UNK'))) AS Enum8('CSH' = 1, 'CRE' = 2, 'UNK' = 0, 'NOC' = 3, 'DIS' = 4)) AS payment_type_,
:-] assumeNotNull(trip_type) AS trip_type,
:-] ifNull(toFixedString(unhex(pickup), 25), toFixedString('', 25)) AS pickup,
:-] ifNull(toFixedString(unhex(dropoff), 25), toFixedString('', 25)) AS dropoff,
:-] CAST(assumeNotNull(cab_type) AS Enum8('yellow' = 1, 'green' = 2, 'uber' = 3)) AS cab_type,
:-] assumeNotNull(pickup_nyct2010_gid) AS pickup_nyct2010_gid,
:-] toFloat32(ifNull(pickup_ctlabel, '0')) AS pickup_ctlabel,
:-] assumeNotNull(pickup_borocode) AS pickup_borocode,
:-] assumeNotNull(pickup_boroname) AS pickup_ct2010,
:-] toFixedString(ifNull(pickup_boroct2010, '0000000'), 7) AS pickup_boroct2010,
:-] assumeNotNull(ifNull(pickup_cdeligibil, ' ')) AS pickup_cdeligibil,
:-] toFixedString(ifNull(pickup_ntacode, '0000'), 4) AS pickup_ntacode,
:-] assumeNotNull(pickup_ntaname) AS pickup_ntaname,
:-] toUInt16(ifNull(pickup_puma, '0')) AS pickup_puma,
:-] assumeNotNull(dropoff_nyct2010_gid) AS dropoff_nyct2010_gid,
:-] toFloat32(ifNull(dropoff_ctlabel, '0')) AS dropoff_ctlabel,
:-] assumeNotNull(dropoff_borocode) AS dropoff_borocode,
:-] assumeNotNull(dropoff_boroname) AS dropoff_ct2010,
:-] toFixedString(ifNull(dropoff_boroct2010, '0000000'), 7) AS dropoff_boroct2010,
:-] assumeNotNull(ifNull(dropoff_cdeligibil, ' ')) AS dropoff_cdeligibil,
:-] toFixedString(ifNull(dropoff_ntacode, '0000'), 4) AS dropoff_ntacode,
:-] assumeNotNull(dropoff_ntaname) AS dropoff_ntaname,
:-] toUInt16(ifNull(dropoff_puma, '0')) AS dropoff_puma
:-] FROM trips
CREATE TABLE trips_mergetree
ENGINE = MergeTree(pickup_date, pickup_datetime, 8192) AS
SELECT
trip_id,
CAST(vendor_id, 'Enum8(\'1\' = 1, \'2\' = 2, \'3\' = 3, \'CMT\' = 4, \'VTS\' = 5, \'DDS\' = 6, \'B02512\' = 10, \'B02598\' = 11, \'B02617\' = 12, \'B02682\' = 13, \'B02764\' = 14)') AS vendor_id,
toDate(pickup_datetime) AS pickup_date,
ifNull(pickup_datetime, toDateTime(0)) AS pickup_datetime,
toDate(dropoff_datetime) AS dropoff_date,
ifNull(dropoff_datetime, toDateTime(0)) AS dropoff_datetime,
assumeNotNull(store_and_fwd_flag) IN ('Y', '1', '2') AS store_and_fwd_flag,
assumeNotNull(rate_code_id) AS rate_code_id,
assumeNotNull(pickup_longitude) AS pickup_longitude,
assumeNotNull(pickup_latitude) AS pickup_latitude,
assumeNotNull(dropoff_longitude) AS dropoff_longitude,
assumeNotNull(dropoff_latitude) AS dropoff_latitude,
assumeNotNull(passenger_count) AS passenger_count,
assumeNotNull(trip_distance) AS trip_distance,
assumeNotNull(fare_amount) AS fare_amount,
assumeNotNull(extra) AS extra,
assumeNotNull(mta_tax) AS mta_tax,
assumeNotNull(tip_amount) AS tip_amount,
assumeNotNull(tolls_amount) AS tolls_amount,
assumeNotNull(ehail_fee) AS ehail_fee,
assumeNotNull(improvement_surcharge) AS improvement_surcharge,
assumeNotNull(total_amount) AS total_amount,
CAST(if((assumeNotNull(payment_type) AS pt) IN ('CSH', 'CASH', 'Cash', 'CAS', 'Cas', '1'), 'CSH', if(pt IN ('CRD', 'Credit', 'Cre', 'CRE', 'CREDIT', '2'), 'CRE', if(pt IN ('NOC', 'No Charge', 'No', '3'), 'NOC', if(pt IN ('DIS', 'Dispute', 'Dis', '4'), 'DIS', 'UNK')))), 'Enum8(\'CSH\' = 1, \'CRE\' = 2, \'UNK\' = 0, \'NOC\' = 3, \'DIS\' = 4)') AS payment_type_,
assumeNotNull(trip_type) AS trip_type,
ifNull(toFixedString(unhex(pickup), 25), toFixedString('', 25)) AS pickup,
ifNull(toFixedString(unhex(dropoff), 25), toFixedString('', 25)) AS dropoff,
CAST(assumeNotNull(cab_type), 'Enum8(\'yellow\' = 1, \'green\' = 2, \'uber\' = 3)') AS cab_type,
assumeNotNull(pickup_nyct2010_gid) AS pickup_nyct2010_gid,
toFloat32(ifNull(pickup_ctlabel, '0')) AS pickup_ctlabel,
assumeNotNull(pickup_borocode) AS pickup_borocode,
assumeNotNull(pickup_boroname) AS pickup_ct2010,
toFixedString(ifNull(pickup_boroct2010, '0000000'), 7) AS pickup_boroct2010,
assumeNotNull(ifNull(pickup_cdeligibil, ' ')) AS pickup_cdeligibil,
toFixedString(ifNull(pickup_ntacode, '0000'), 4) AS pickup_ntacode,
assumeNotNull(pickup_ntaname) AS pickup_ntaname,
toUInt16(ifNull(pickup_puma, '0')) AS pickup_puma,
assumeNotNull(dropoff_nyct2010_gid) AS dropoff_nyct2010_gid,
toFloat32(ifNull(dropoff_ctlabel, '0')) AS dropoff_ctlabel,
assumeNotNull(dropoff_borocode) AS dropoff_borocode,
assumeNotNull(dropoff_boroname) AS dropoff_ct2010,
toFixedString(ifNull(dropoff_boroct2010, '0000000'), 7) AS dropoff_boroct2010,
assumeNotNull(ifNull(dropoff_cdeligibil, ' ')) AS dropoff_cdeligibil,
toFixedString(ifNull(dropoff_ntacode, '0000'), 4) AS dropoff_ntacode,
assumeNotNull(dropoff_ntaname) AS dropoff_ntaname,
toUInt16(ifNull(dropoff_puma, '0')) AS dropoff_puma
FROM trips
Ok.
0 rows in set. Elapsed: 3358.663 sec. Processed 1.46 billion rows, 693.93 GB (436.12 thousand rows/s., 206.61 MB/s.)
Archon :)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment