Skip to content

Instantly share code, notes, and snippets.

@ttjoseph
Created July 3, 2010 15:59
Show Gist options
  • Save ttjoseph/462655 to your computer and use it in GitHub Desktop.
Save ttjoseph/462655 to your computer and use it in GitHub Desktop.
Finds the cheapest place for a bunch of people to fly to by screen-scraping ITA Software's Matrix fare search site
#!/usr/bin/python
import urllib, urllib2
import re, cookielib
import os, time, sys
from BeautifulSoup import BeautifulSoup
COOKIE_FILE = 'cookies.dat'
BASE_URL = "http://matrix1.itasoftware.com"
INIT_URL = BASE_URL + "/cvg/dispatch"
MAIN_URL = BASE_URL + "/cvg/dispatch/guest"
SUBMIT_URL = BASE_URL + "/cvg/dispatch/prego/submit"
default_params = {
'cvgQuery.pogoClient': 0,
'cvgQuery.tripType': 1,
'cvgQuery.queryType': 0,
'cvgQuery.outboundSlice.originSet.userInput': '',
'cvgQuery.outboundSlice.originSet.radiusMiles': '0',
'cvgQuery.outboundSlice.originSet.userMonth': 6, # Months start from 0
'cvgQuery.outboundSlice.originSet.userDay': 6, # Days start from 1
'cvgQuery.outboundSlice.originSet.dayWindowTag': '0%0',
'cvgQuery.outboundSlice.originSet.modeTag': 'depart', # arrive
'cvgQuery.outboundSlice.originSet.timeWindowTag': '00%24',
'cvgQuery.outboundSlice.destinationSet.userInput': '',
'cvgQuery.outboundSlice.destinationSet.radiusMiles': '0',
'cvgQuery.outboundSlice.destinationSet.userMonth': 6,
'cvgQuery.outboundSlice.destinationSet.userDay': 8,
'cvgQuery.outboundSlice.destinationSet.dayWindowTag': '0%0', # -1%0, 0%1
'cvgQuery.outboundSlice.destinationSet.modeTag': 'depart',
'cvgQuery.outboundSlice.destinationSet.timeWindowTag': '00%24',
'cvgQuery.adultCount': 1,
'cvgQuery.seniorCount': 0,
'cvgQuery.youthCount': 0,
'cvgQuery.childCount': 0,
'cvgQuery.infantInSeatCount': 0,
'cvgQuery.infantOnLapCount': 0,
'cvgQuery.maximumNumberOfStops': -1, # -1 is unlimited
'cvgQuery.salesCity': 'BOS',
'cvgQuery.serviceClass': 3, # 3=any incl. Y, 2=C/J, 1=F
'cvgQuery.checkAvailability': 'true',
'cvgQuery.allowAirportChanges': 'true',
}
session_id = None
def priorities(p):
'''Returns a HTTP parameters string with priority-normal keys corresponding to all parameters in p.
ITA really seems to want that.'''
pp = ''
for n in p:
pp += '&priority-normal=%s' % n
return pp
def login_as_guest():
global session_id, MAIN_URL, SUBMIT_URL
page = urllib2.urlopen(INIT_URL)
soup = BeautifulSoup(page)
session_id = ''
for x in soup('a', alt='Log in as a guest'):
m = re.search('jsessionid=(.+)', x['href'])
session_id = m.group(1)
MAIN_URL += ';jsessionid=' + session_id
SUBMIT_URL += ';jsessionid=' + session_id
urllib2.urlopen(MAIN_URL).read()
def find_flights(orig, dest, depmonth, depday, retmonth, retday):
fares = []
carriers = []
p = default_params.copy()
p['cvgQuery.outboundSlice.originSet.userInput'] = orig
p['cvgQuery.outboundSlice.destinationSet.userInput'] = dest
p['cvgQuery.outboundSlice.originSet.userMonth'] = depmonth + 1
p['cvgQuery.outboundSlice.originSet.userDay'] = depday
p['cvgQuery.outboundSlice.destinationSet.userMonth'] = retmonth + 1
p['cvgQuery.outboundSlice.destinationSet.userMonth'] = retday
page = urllib2.urlopen(SUBMIT_URL, urllib.urlencode(p) + priorities(p))
soup = BeautifulSoup(page)
# Wait for the query results page to come up
while(True):
for x in soup('meta'):
if x['http-equiv']:
url = re.search('URL=(.+)', x['content'])
url = BASE_URL + url.group(1)
page = urllib2.urlopen(url)
soup = BeautifulSoup(page)
print ".",
sys.stdout.flush()
if re.search('split-page-frameset', page.geturl()):
break
time.sleep(1)
# Extract fare information
for x in soup('frame'):
if x['name'] == 'solution-pane':
url = x['src']
page = urllib2.urlopen(url)
soup2 = BeautifulSoup(page)
for x in soup2.findAll("td"):
price = re.search('\$([0-9]+)', str(x))
if(price):
fare = int(price.group(1))
carrier = x.findNextSibling('td').contents[0].strip()
fares.append(fare)
carriers.append(carrier)
return fares, carriers
# Set up cookie jar
cj = cookielib.LWPCookieJar()
if os.path.isfile(COOKIE_FILE):
cj.load(COOKIE_FILE)
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
urllib2.install_opener(opener)
print "Searching %s." % BASE_URL
print "Logging in as guest."
login_as_guest()
origins = ['NYC']
destinations = ['HAN', 'ICN', 'NRT', 'CTS', 'BKK', 'KUL', 'BKI']
# Departure and return dates
depmonth, depday = 7, 17
retmonth, retday = 7, 24
print "Departure on %d/%d, return on %d/%d." % (depmonth, depday, retmonth, retday)
totals = {}
for dest in destinations:
totals[dest] = 0
for orig in origins:
if orig == dest:
continue
fares, carriers = find_flights(orig, dest, depmonth, depday, retmonth, retday)
if len(fares) > 0:
print "Best price for %s-%s is $%d on %s" % (orig, dest, fares[0], carriers[0])
else:
print "Can't find a fare for %s-%s. Giving up." % (orig, dest)
sys.exit()
totals[dest] += fares[0]
print "Destinations sorted by total price:"
for i in sorted([(v,k) for (k,v) in totals.items()]):
print "%s: $%d" % (i[1], i[0])
# Save the cookie jar. Loading it next time allows us to skip the login step.
# cj.save(COOKIE_FILE)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment