Skip to content

Instantly share code, notes, and snippets.

@davetromp
Last active February 28, 2017 22:38
Show Gist options
  • Save davetromp/c29be580cd03b189d684 to your computer and use it in GitHub Desktop.
Save davetromp/c29be580cd03b189d684 to your computer and use it in GitHub Desktop.
Get links to data referenced by plus500.com
import re
import time
import mechanize
br = mechanize.Browser()
br.set_handle_robots(False) # ignore robots
br.set_handle_refresh(False) # can sometimes hang without this
br.addheaders = [('User-agent', 'Firefox')]
# first get all ticker symbols as used on plus500.com
f = br.open("http://www.plus500.com/allinstruments/allinstruments.aspx").read()
p = '<td class="symbol">(.*)&nbsp;</td>'
r = re.findall(p, f)
# write those to a file
with open('plus500Tickers.txt', 'w') as fileOut:
fileOut.write('\n'.join(r))
# then open up each ticker symbol plus500 page
# find out if this page references data from yahoo or google or any other
# service
tickerlist = []
for item in r:
time.sleep(1)
try:
url = "http://www.plus500.com/Instruments/" + item
x = br.open(url).read()
except Exception as e:
print str(e)
print "failed to open url " + str(url)
try:
p2 = '<a id="ctl00_ContentPlaceMain1_HyperLinkInstrumentLink" href="(.*)" target="_blank">(.*)</a>'
r2 = re.findall(p2, x)
r2str = str(r2[0][0]) + "," + str(r2[0][1] + "," + str(item))
tickerlist.append(r2str)
print r2str
except Exception as e:
print str(e)
print "failed to parse html from url " + str(url)
# write urls to data services with data service name to file
with open('plus500TickersSource.csv', 'w') as fileOut:
fileOut.write('\n'.join(tickerlist))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment