Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Script to collect and get routes for personal Citibike data
## API Structure
# {
# "name": "citibike_bay",
# "count": 8,
# "frequency": "realtime",
# "version": 1,
# "newdata": true,
# "lastrunstatus": "success",
# "lastsuccess": "Tue Jul 22 2014 21:50:19 GMT+0000 (UTC)",
# "results": {
# "collection1": [
# {
# "Start_Station": "E 25 St & 2 Ave",
# "Start_Time": "8/31/13 4:30:54 PM",
# "End_Station": "Broadway & W 24 St",
# "End_Time": "8/31/13 4:39:06 PM"
# },
# ....
# ]
# }
# }
## Required gems
require 'rest_client'
require 'pp'
require 'json'
require 'csv'
require 'time'
# require 'date'
## Keys
KIMONO_API = ''
KIMONO_API_KEY = ''
KIMONO_API_BEARER = ''
GOOGLE_API_KEY = ''
CSV_NAME = 'trips.csv'
STATIONS_DOC = 'stations.json'
NUMBER_PAGES_CITIBIKE_DATA = 13
## Pass in Citibike time as a string
## Return string as a Time object
def create_time_stamp(time)
format = "%m/%d/%y %I:%M:%S %p"
time = time.gsub(/\d+/) {|s| "%02d" % s.to_i}
time = Time.strptime(time, format)
return time
end
def create_stations_hash ( doc_name )
## Read in and parse the list of stations
stations_list = JSON.parse( IO.read( 'stations.json' ) )
stations_list = stations_list["stationBeanList"]
stations_hash = {}
## Take the stations list and map station names to lat / lon
for i in 0..(stations_list.length-1)
station = stations_list[i]
name = station["stationName"]
lat = station["latitude"]
lon = station["longitude"]
stations_hash[name] = [lat,lon]
end
return stations_hash
end
def catch_errors( station )
## Manually fix an error in Citibike station naming
error_stations = ["E 47 St & 2 Ave", "Lawrence St & Willoughby St"]
corrected_stations = ["Greenwich Ave & Charles St", "Lafayette St & Jersey St"]
for i in 0..(error_stations.length-1)
if station == error_stations[i]
station = corrected_stations[i]
end
end
return station
end
stations_hash = create_stations_hash(STATIONS_DOC)
## Set up the csv with column titles
CSV.open("#{CSV_NAME}", 'a') do |f|
f << ["start_station", "start_station_lat", "start_station_lon", "start_time", "end_station", "end_station_lat", "end_station_lon", "end_time", "actual_duration", "estimated_duration", "estimated_distance"]
end
## Iterate over all the pages of trip data on citibike's website
for p in 1..NUMBER_PAGES_CITIBIKE_DATA
pp "Collecting Page: #{p}"
## Get the data from the API and subset by the results
response = RestClient.get("https://www.kimonolabs.com/api/#{KIMONO_API}?apikey=#{KIMONO_API_KEY}&kimpath3=#{p}", {'authorization' => "Bearer #{KIMONO_API_BEARER}"});
response = JSON.parse(response)
results = response["results"]["collection1"]
num = results.length
for i in 0..(num-1)
begin
## Get the data wanted
trip = results[i]
start_station = trip["Start_Station"]
start_time = trip["Start_Time"]
end_station = trip["End_Station"]
end_time = trip["End_Time"]
## Catch naming errors in Citibike data
end_station = catch_errors(end_station)
start_station = catch_errors(start_station)
## Calculate the actual trip time
start_time_stamp = create_time_stamp(start_time)
end_time_stamp = create_time_stamp(end_time)
actual_duration = end_time_stamp - start_time_stamp
## Turn station names into geocodes
start_station_geocode = stations_hash[start_station]
end_station_geocode = stations_hash[end_station]
start_station_lat = start_station_geocode[0]
start_station_lon = start_station_geocode[1]
end_station_lat = end_station_geocode[0]
end_station_lon = end_station_geocode[1]
## Get Google Estimates For Cycling Time and Distance
estimated_distance = 'NA'
estimated_duration = 'NA'
begin
response = RestClient.get("https://maps.googleapis.com/maps/api/distancematrix/json?origins=#{start_station_lat},#{start_station_lon}&destinations=#{end_station_lat},#{end_station_lon}&key=#{GOOGLE_API_KEY}&mode=bicycling")
response = JSON.parse(response)
data = response['rows'][0]['elements'][0]
estimated_distance = data['distance']['value']
estimated_duration = data['duration']['value']
rescue
pp "Google Geocoding Eroor"
end
## Open the CSV and save the data
CSV.open("#{CSV_NAME}", 'a') do |f|
f << [start_station, start_station_lat, start_station_lon, start_time, end_station, end_station_lat, end_station_lon, end_time, actual_duration, estimated_duration, estimated_distance]
end
## Catch an error if the row is missing a station etc.
rescue
pp "NOT COMPLETE TRIP DATA"
end
end
## Sometimes there are throtelling issues with Kimono
## This is just a safety that solves this
sleep 20
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.