davetromp/plus500tickers.py

## plus500tickers.py
import re
import time
import mechanize


br = mechanize.Browser()
br.set_handle_robots(False)   # ignore robots
br.set_handle_refresh(False)  # can sometimes hang without this
br.addheaders = [('User-agent', 'Firefox')]

# first get all ticker symbols as used on plus500.com
f = br.open("http://www.plus500.com/allinstruments/allinstruments.aspx").read()
p = '<td class="symbol">(.*)&nbsp;</td>'
r = re.findall(p, f)
# write those to a file
with open('plus500Tickers.txt', 'w') as fileOut:
    fileOut.write('\n'.join(r))

# then open up each ticker symbol plus500 page
# find out if this page references data from yahoo or google or any other
# service

tickerlist = []
for item in r:
    time.sleep(1)
    try:
        url = "http://www.plus500.com/Instruments/" + item
        x = br.open(url).read()
    except Exception as e:
        print str(e)
        print "failed to open url " + str(url)
    try:
        p2 = '<a id="ctl00_ContentPlaceMain1_HyperLinkInstrumentLink" href="(.*)" target="_blank">(.*)</a>'
        r2 = re.findall(p2, x)
        r2str = str(r2[0][0]) + "," + str(r2[0][1] + "," + str(item))
        tickerlist.append(r2str)
        print r2str
    except Exception as e:
        print str(e)
        print "failed to parse html from url " + str(url)

# write urls to data services with data service name to file
with open('plus500TickersSource.csv', 'w') as fileOut:
    fileOut.write('\n'.join(tickerlist))
	import re
	import time
	import mechanize


	br = mechanize.Browser()
	br.set_handle_robots(False) # ignore robots
	br.set_handle_refresh(False) # can sometimes hang without this
	br.addheaders = [('User-agent', 'Firefox')]

	# first get all ticker symbols as used on plus500.com
	f = br.open("http://www.plus500.com/allinstruments/allinstruments.aspx").read()
	p = '<td class="symbol">(.*) </td>'
	r = re.findall(p, f)
	# write those to a file
	with open('plus500Tickers.txt', 'w') as fileOut:
	fileOut.write('\n'.join(r))

	# then open up each ticker symbol plus500 page
	# find out if this page references data from yahoo or google or any other
	# service

	tickerlist = []
	for item in r:
	time.sleep(1)
	try:
	url = "http://www.plus500.com/Instruments/" + item
	x = br.open(url).read()
	except Exception as e:
	print str(e)
	print "failed to open url " + str(url)
	try:
	p2 = '<a id="ctl00_ContentPlaceMain1_HyperLinkInstrumentLink" href="(.)" target="_blank">(.)</a>'
	r2 = re.findall(p2, x)
	r2str = str(r2[0][0]) + "," + str(r2[0][1] + "," + str(item))
	tickerlist.append(r2str)
	print r2str
	except Exception as e:
	print str(e)
	print "failed to parse html from url " + str(url)

	# write urls to data services with data service name to file
	with open('plus500TickersSource.csv', 'w') as fileOut:
	fileOut.write('\n'.join(tickerlist))