richardjpope/gist:4452689

## gistfile1.py
# This is a very basic script to backup oyster card data to a scraperwiki vault
# Notes:
# 1) You need an oyster card that has been registered on tfl.gov.uk
# 2) This script requires you to enter your username and password (this about what that means before progressing, and do so at your own risk)
# 3) This script should be run in a PRIVATE SCRAPERWIKI VAULT ONLY https://scraperwiki.com/pricing/ not a public scraper, or the world will know your password

import scraperwiki
import mechanize
import lxml.html
from lxml.etree import tostring
import csv

username = 'YOUR TFL USERNAME'
password = 'YOUR TFL PASSWORD'

#setup browser
br = mechanize.Browser()
#br.set_all_readonly(False)    # allow everything to be written to
br.set_handle_robots(False)   # no robots
br.set_handle_refresh(False)  # can sometimes hang without this
br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]

response = br.open('https://oyster.tfl.gov.uk/oyster/entry.do')

#get the login form
br.form = list(br.forms())[0]
username_input = br.form.find_control("j_username")
password_input = br.form.find_control("j_password")

#enter password and submit
username_input.value = username
password_input.value = password
response = br.submit()

#find the journey history link
journey_history_link = None
for link in br.links():
    if link.text == 'Journey history':
        journey_history_link = link
        pass

#if no history link, then raise an exception
if journey_history_link == None:
    raise Exception('Failed to find jouney history link')

#got to the jouney history page
response = br.follow_link(journey_history_link)

# mechanize doesnt seem to like the html here, so use lxml to find download link
root = lxml.html.fromstring(response.read())
download_link = root.cssselect("form#jhDownloadForm input")[0]
download_href = download_link.attrib['onclick'].replace('document.jhDownloadForm.action="', '').replace('";document.jhDownloadForm.submit();', '')
download_href = 'https://oyster.tfl.gov.uk' + download_href

# download the csv
response = br.open(download_href)

# read and save
csv_reader = csv.DictReader(response.read().splitlines())

for row in csv_reader:
    row['Journey Action'] = row['Journey/Action'] # rename this col, as database objects to the slash
    del(row['Journey/Action'])
    scraperwiki.sqlite.save(unique_keys=['Start Time', 'End Time', 'Date', 'Journey Action'], data=row)
	# This is a very basic script to backup oyster card data to a scraperwiki vault
	# Notes:
	# 1) You need an oyster card that has been registered on tfl.gov.uk
	# 2) This script requires you to enter your username and password (this about what that means before progressing, and do so at your own risk)
	# 3) This script should be run in a PRIVATE SCRAPERWIKI VAULT ONLY https://scraperwiki.com/pricing/ not a public scraper, or the world will know your password

	import scraperwiki
	import mechanize
	import lxml.html
	from lxml.etree import tostring
	import csv

	username = 'YOUR TFL USERNAME'
	password = 'YOUR TFL PASSWORD'

	#setup browser
	br = mechanize.Browser()
	#br.set_all_readonly(False) # allow everything to be written to
	br.set_handle_robots(False) # no robots
	br.set_handle_refresh(False) # can sometimes hang without this
	br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]

	response = br.open('https://oyster.tfl.gov.uk/oyster/entry.do')

	#get the login form
	br.form = list(br.forms())[0]
	username_input = br.form.find_control("j_username")
	password_input = br.form.find_control("j_password")

	#enter password and submit
	username_input.value = username
	password_input.value = password
	response = br.submit()

	#find the journey history link
	journey_history_link = None
	for link in br.links():
	if link.text == 'Journey history':
	journey_history_link = link
	pass

	#if no history link, then raise an exception
	if journey_history_link == None:
	raise Exception('Failed to find jouney history link')

	#got to the jouney history page
	response = br.follow_link(journey_history_link)

	# mechanize doesnt seem to like the html here, so use lxml to find download link
	root = lxml.html.fromstring(response.read())
	download_link = root.cssselect("form#jhDownloadForm input")[0]
	download_href = download_link.attrib['onclick'].replace('document.jhDownloadForm.action="', '').replace('";document.jhDownloadForm.submit();', '')
	download_href = 'https://oyster.tfl.gov.uk' + download_href

	# download the csv
	response = br.open(download_href)

	# read and save
	csv_reader = csv.DictReader(response.read().splitlines())

	for row in csv_reader:
	row['Journey Action'] = row['Journey/Action'] # rename this col, as database objects to the slash
	del(row['Journey/Action'])
	scraperwiki.sqlite.save(unique_keys=['Start Time', 'End Time', 'Date', 'Journey Action'], data=row)