Skip to content

Instantly share code, notes, and snippets.

@jaganadhg
Last active August 29, 2015 14:26
Show Gist options
  • Save jaganadhg/805f9eec68217f2c7dd7 to your computer and use it in GitHub Desktop.
Save jaganadhg/805f9eec68217f2c7dd7 to your computer and use it in GitHub Desktop.
Downloading the Citi Bike Data with Python
import glob
import urllib2
import zipfile
def cbd_downloader(url,path):
"""
Download the Citi Bike Station Data.
:param url: string url of the data file
:param path: string - path to save the file
"""
file_name = url.split("/")[-1]
outfile = open(path + file_name,'wb')
tmp_data = urllib2.urlopen(url).read()
outfile.write(tmp_data)
outfile.close()
print "DONE " + file_name
def cbd_unzip(zip_file,extract_path):
"""
Extract the Citi Bike Data zip files
"""
with zipfile.ZipFile(zip_file, "r") as zipped_data:
zipped_data.extractall(extract_path)
print "Done " + zip_file
if __name__ == "__main__":
url_list = [
"https://s3.amazonaws.com/tripdata/201307-citibike-tripdata.zip",
"https://s3.amazonaws.com/tripdata/201308-citibike-tripdata.zip",
"https://s3.amazonaws.com/tripdata/201309-citibike-tripdata.zip",
"https://s3.amazonaws.com/tripdata/201310-citibike-tripdata.zip",
"https://s3.amazonaws.com/tripdata/201311-citibike-tripdata.zip",
"https://s3.amazonaws.com/tripdata/201312-citibike-tripdata.zip",
"https://s3.amazonaws.com/tripdata/201401-citibike-tripdata.zip",
"https://s3.amazonaws.com/tripdata/201402-citibike-tripdata.zip",
"https://s3.amazonaws.com/tripdata/201403-citibike-tripdata.zip",
"https://s3.amazonaws.com/tripdata/201404-citibike-tripdata.zip",
"https://s3.amazonaws.com/tripdata/201405-citibike-tripdata.zip",
"https://s3.amazonaws.com/tripdata/201406-citibike-tripdata.zip",
"https://s3.amazonaws.com/tripdata/201407-citibike-tripdata.zip",
"https://s3.amazonaws.com/tripdata/201408-citibike-tripdata.zip",
"https://s3.amazonaws.com/tripdata/201409-citibike-tripdata.zip",
"https://s3.amazonaws.com/tripdata/201410-citibike-tripdata.zip",
"https://s3.amazonaws.com/tripdata/201411-citibike-tripdata.zip",
"https://s3.amazonaws.com/tripdata/201412-citibike-tripdata.zip",
"https://s3.amazonaws.com/tripdata/201501-citibike-tripdata.zip",
"https://s3.amazonaws.com/tripdata/201502-citibike-tripdata.zip",
"https://s3.amazonaws.com/tripdata/201503-citibike-tripdata.zip",
"https://s3.amazonaws.com/tripdata/201504-citibike-tripdata.zip",
"https://s3.amazonaws.com/tripdata/201505-citibike-tripdata.zip",
"https://s3.amazonaws.com/tripdata/201506-citibike-tripdata.zip"
]
for url in url_list:
cbd_downloader(url,"citi_bike_data")
zipped_data = glob.glob("citi_bike_data")
for zip in zipped_data:
cbd_unzip(zip,"citi_bike_data/csv")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment