-
-
Save fmoliveira/1371600ef6d6d270f09ee86d0b1b1923 to your computer and use it in GitHub Desktop.
webscraping_nyc_mta
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Import libraries | |
import requests | |
import urllib.request | |
import time | |
from bs4 import BeautifulSoup | |
# Set the URL you want to webscrape from | |
url = 'http://web.mta.info/developers/turnstile.html' | |
# Connect to the URL | |
response = requests.get(url) | |
# Parse HTML and save to BeautifulSoup object¶ | |
soup = BeautifulSoup(response.text, "html.parser") | |
# To download the whole data set, let's do a for loop through all a tags | |
for i in range(36,len(soup.findAll('a'))+1): #'a' tags are for links | |
one_a_tag = soup.findAll('a')[i] | |
link = one_a_tag['href'] | |
download_url = 'http://web.mta.info/developers/'+ link | |
urllib.request.urlretrieve(download_url,'./'+link[link.find('/turnstile_')+1:]) | |
time.sleep(1) #pause the code for a sec |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment