Skip to content

Instantly share code, notes, and snippets.

@ashaegupta
Created September 28, 2011 17:49
Show Gist options
  • Save ashaegupta/1248644 to your computer and use it in GitHub Desktop.
Save ashaegupta/1248644 to your computer and use it in GitHub Desktop.
Scrapes web for airline arrival terminals
import urllib2
from BeautifulSoup import BeautifulSoup
class Terminals():
def __init__(self):
self.terminals = {}
self.airports = ["jfk", "ewr", "lga"]
self.getAllTerminals()
def getAllTerminals(self):
for airport in self.airports:
self.terminals[airport] = {}
terms = {}
terms = self.getTerminals(airport)
self.terminals[airport] = terms
print self.terminals
def getTerminals(self, airport=str):
terms = {}
airport_html = "http://www.panynj.gov/airports/" + airport + "-airlines.html"
soup = BeautifulSoup(urllib2.urlopen(airport_html).read())
for row in soup('table')[0].tbody('tr'):
tds = row('td')
airline = tds[0].a.string
arrival_terminal = tds[4].string
terms[airline] = arrival_terminal
return terms
@shreyansb
Copy link

you can get rid of line 12 since line 15 does the same thing
otherwise this looks good!

@shreyansb
Copy link

where are you saving the stuff?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment