Created
November 17, 2009 20:58
-
-
Save lkarsten/237258 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# mjaff | |
import re, sys, os, time | |
from mechanize import Browser | |
from pysqlite2 import dbapi2 as sqlite | |
import codecs | |
z = codecs.lookup("iso8859-1") | |
def fo(s): | |
return z | |
codecs.register(fo) | |
import urllib2 | |
import cookielib | |
br = None | |
def dl_link_from_details(url): | |
pass | |
def get_listing(): | |
br = Browser() | |
#br.set_debug_responses(True) | |
#br.set_debug_http(True) | |
url = "http://ksb.apollo.no/cgi-bin/bokning.cgi/b_ds111.p?p-valuta=NOK&p-fortag=APO" | |
br.open(url) | |
# print br, dir(br) | |
request = urllib2.Request(url) | |
response = urllib2.urlopen(request) | |
import ClientForm | |
#response = br.response().read() | |
forms = ClientForm.ParseResponse(response, backwards_compat=False) | |
#response.close() | |
form = forms[0] | |
#print form | |
x = form['x'] | |
#print "x is: ", form['x'] | |
url2k = 'http://ksb.apollo.no/cgi-bin/bokning.cgi/b_ds111.p?p-val=1&p-avrort=TRD&p-datum=20090606&month=20096&day=6&s_date=&p-destLand=GR&p-ort=NAX&p-resl=14&p-antpers=2&p-antung=0&p-antbarn=0&p-antinf=0&x='+form['x']+'&p-agent=&p-saljare=&p-agkontakt=&p-foretag=APO&p-oldvaldavrort=TRD&p-size=&ef=n&p-fr=&p-valuta=NO&p-val=&p-topage=%2Fcgi-bin%2Fbokning.cgi%2Fb_dscalc.p' | |
br.open(url2k) | |
#print "TITLE OF PAGE: ", br.title() | |
#s = '.*year?2009?month?06?day?05.*' | |
#print br, dir(br) | |
br.select_form(nr=2) | |
br.submit() | |
#print "TITLE OF PAGE: ", br.title() | |
#for form in br.forms(): | |
# print form | |
import pprint | |
#print br.response().readlines() | |
hotels = {} | |
#for link in br.links(url_regex='.*hotelcode=APA.*'): | |
for link in br.links(): # url_regex='.*hotelcode=APA.*'): | |
hotelcode = re.search( 'p-hot=(.{3})\&', link.url ) | |
if hotelcode: | |
hotelcode = hotelcode.group(1) | |
if not hotels.has_key( hotelcode ) : | |
hotels[hotelcode] = link.text | |
offers = [] | |
for link in br.links(url_regex='price'): | |
#print link | |
offer = {} | |
r = re.search('ref=(.*)\);', link.url ) | |
if not r: | |
print "meh" | |
print link | |
continue | |
r = r.group(1) | |
r = r.split(";") | |
#pprint.pprint(r) | |
offer["hotelcode"] = r[9] | |
offer["hotelkey"] = "%s:%s" % (r[8], r[9]) | |
offer["roomtype"] = r[10] | |
offer["hotelname"] = hotels.get( r[9] ) | |
offer["startdate"] = r[3] | |
offer["length"] = r[6] | |
offer["price"] = link.text | |
#pprint.pprint(offer) | |
offers.append(offer) | |
# pprint.pprint(offers) | |
return offers | |
def create_db(con, cursor): | |
cursor.execute('CREATE TABLE ap_naxos (id INTEGER PRIMARY KEY, reg_date DATE, \ | |
hotelkey VARCHAR(15), startdate INTEGER, roomtype VARCHAR(5), \ | |
price INTEGER, length INTEGER, hotelname VARCHAR(60))') | |
cursor.execute('CREATE TRIGGER insert_reg_date AFTER INSERT ON ap_naxos \ | |
BEGIN \ | |
UPDATE ap_naxos SET reg_date = DATETIME("NOW") \ | |
WHERE rowid = new.rowid; \ | |
END;') | |
con.commit() | |
if __name__ == "__main__": | |
offers = get_listing() | |
# sys.exit() | |
if not offers: | |
sys.exit() | |
import time | |
#d = time.strftime("%x %X", time.localtime() ) | |
#today = time.strftime("%Y%m%d", time.localtime()) | |
#fp = open("response.txt", 'a') | |
#fp = sys.stdout | |
#for o in offers: | |
#if o["hotel"] in ["GAX"]: | |
#r = "%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (d, o["hotelkey"], o["startdate"], | |
#o["roomtype"], o["price"], o["length"], o["hotelname"]) | |
#fp.write(r) | |
#fp.close() | |
# DB-stuff | |
db_name = 'reisepriser.db' | |
create = False | |
if not os.path.isfile(db_name): | |
create = True | |
con = sqlite.connect(db_name) | |
cursor = con.cursor() | |
# Only run when createing the db for the first time. Dah ... | |
if create: | |
#print "Creating db ..." | |
create_db(con, cursor) | |
line_counter = 0 | |
for o in offers: | |
cursor.execute("INSERT INTO ap_naxos (hotelkey, startdate, roomtype, price, length, hotelname) VALUES (?, ?, ?, ?, ?, ?)" , (o['hotelkey'], o['startdate'], o['roomtype'], o['price'], o['length'], o['hotelname'])) | |
line_counter += 1 | |
con.commit() | |
con.close() | |
#print "%d lines written to db" % line_counter |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment