Created
June 6, 2019 12:43
-
-
Save gidutz/86c701c621e48860c8ab135df56a8076 to your computer and use it in GitHub Desktop.
chicago taxi feature extraction
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
WITH dataset AS( SELECT | |
EXTRACT(HOUR FROM trip_start_timestamp) trip_start_hour | |
, EXTRACT(DAYOFWEEK FROM trip_start_timestamp) trip_start_weekday | |
, EXTRACT(WEEK FROM trip_start_timestamp) trip_start_week | |
, EXTRACT(DAYOFYEAR FROM trip_start_timestamp) trip_start_yearday | |
, EXTRACT(MONTH FROM trip_start_timestamp) trip_start_month | |
, (trip_miles * 1.60934 ) / ((trip_seconds + .01) / (60 * 60)) trip_speed_kmph | |
, trip_miles | |
, pickup_latitude | |
, pickup_longitude | |
, dropoff_latitude | |
, dropoff_longitude | |
, pickup_community_area | |
, dropoff_community_area | |
, ST_DISTANCE( | |
(ST_GEOGPOINT(pickup_longitude,pickup_latitude)), | |
(ST_GEOGPOINT(dropoff_longitude,dropoff_latitude))) air_distance | |
, CAST (trip_seconds AS FLOAT64) trip_seconds | |
FROM `bigquery-public-data.chicago_taxi_trips.taxi_trips` | |
WHERE RAND() < (3000000/112860054) --sample maximum ~3M records | |
AND trip_start_timestamp < '2016-01-01' | |
AND pickup_location IS NOT NULL | |
AND dropoff_location IS NOT NULL) | |
SELECT | |
trip_seconds | |
, air_distance | |
, pickup_latitude | |
, pickup_longitude | |
, dropoff_latitude | |
, dropoff_longitude | |
, pickup_community_area | |
, dropoff_community_area | |
, trip_start_hour | |
, trip_start_weekday | |
, trip_start_week | |
, trip_start_yearday | |
, trip_start_month | |
FROM dataset | |
WHERE trip_speed_kmph BETWEEN 5 AND 90 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment