Skip to content

Instantly share code, notes, and snippets.

@shreyansb
Forked from ashaegupta/terminals_webscraper.py
Created September 28, 2011 17:51
Show Gist options
  • Save shreyansb/1248653 to your computer and use it in GitHub Desktop.
Save shreyansb/1248653 to your computer and use it in GitHub Desktop.
Scrapes web for airline arrival terminals
import urllib2
from BeautifulSoup import BeautifulSoup
airport_list = ["jfk", "ewr", "lga"]
def getAllTerminals():
terminals = {}
for airport in airport_list:
terms = klass._getTerminals(airport)
terminals[airport] = terms
return terminals
def _getTerminals(airport):
terms = {}
airport_html = "http://www.panynj.gov/airports/" + airport + "-airlines.html"
data = urllib2.urlopen(airport_html).read()
if not data:
return
soup = BeautifulSoup(data)
for row in soup('table')[0].tbody('tr'):
tds = row('td')
airline = tds[0].a.string
arrival_terminal = tds[4].string
terms[airline] = arrival_terminal
return terms
# this is how I'd call it:
resp = getAllTerminals()
# save it somewhere
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment