Skip to content

Instantly share code, notes, and snippets.

@ckholmes5
Created September 23, 2016 00:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ckholmes5/3293d14de608eb37326816cc4128bf1e to your computer and use it in GitHub Desktop.
Save ckholmes5/3293d14de608eb37326816cc4128bf1e to your computer and use it in GitHub Desktop.
#For April '14 to September '14
base_url = 'https://raw.githubusercontent.com/fivethirtyeight/uber-tlc-foil-response/master/uber-trip-data/'
months = ['uber-raw-data-apr14.csv','uber-raw-data-may14.csv', 'uber-raw-data-jun14.csv','uber-raw-data-jul14.csv', 'uber-raw-data-aug14.csv','uber-raw-data-sep14.csv']
for month in months:
uber = pd.read_csv(base_url + month,index_col=0,parse_dates=[0])
uber.to_csv(home_dir + 'Uber Data/' + month)
#For January '15 to June '15
r = requests.get('https://github.com/fivethirtyeight/uber-tlc-foil-response/raw/master/uber-trip-data/uber-raw-data-janjune-15.csv.zip', stream=True)
z = zipfile.ZipFile(StringIO.StringIO(r.content))
z.extractall(home_dir + 'Uber Data/')
z.close()
#Removing extra directory that zip puts in
os.rmdir(home_dir + 'Uber Data/__MACOSX')
##Collecting Taxi Data - Warning: Total dataset is > 20 GB
months_2014 = ['04', '05', '06', '07', '08', '09']
months_2015 = ['01', '02', '03', '04', '05', '06']
#2014 Data
for month in months_2014:
#reading data in pandas
yellow = pd.read_csv('https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2014-' + month + '.csv')
green = pd.read_csv('https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2014-' + month + '.csv')
#writing to csv
yellow.to_csv(home_dir + 'Cab Data/' + month + '_yellow_2014')
green.to_csv(home_dir + 'Cab Data/' + month + '_green_2014')
# 2015 Data
for month in months_2015:
yellow = pd.read_csv('https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2015-' + month + '.csv')
green = pd.read_csv('https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2015-' + month + '.csv')
fhv = pd.read_csv('https://s3.amazonaws.com/nyc-tlc/trip+data/fhv_tripdata_2015-' + month + '.csv')
yellow.to_csv(home_dir + 'Cab Data/' + month + '_yellow_2015')
green.to_csv(home_dir + 'Cab Data/' + month + '_green_2015')
fhv.to_csv(home_dir + 'Cab Data/' + month + '_fhv_2015')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment