Last active
June 25, 2016 00:50
-
-
Save nickrobson/28707e4ade4469fc18c02bde423aa610 to your computer and use it in GitHub Desktop.
Brexit Results Scraper: Gets current voting status.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import re | |
import urllib2 | |
import bs4 | |
BASE_URL = 'http://www.bbc.co.uk/news/politics/eu_referendum/results/local/' | |
VOTES = re.compile(r'[0-9]+(?:,[0-9]+)*') | |
def get_urls(): | |
return [BASE_URL + chr(x) for x in range(97, 123)] | |
TOTAL_LEAVE = 0 | |
TOTAL_REMAIN = 0 | |
print '## Format: [place] | [leave votes] | [remain votes]' | |
print '' | |
for url in get_urls(): | |
try: | |
content = urllib2.urlopen(url).read() | |
soup = bs4.BeautifulSoup(content, 'html.parser') | |
results = soup.find_all('div', class_='eu-ref-result-bar') | |
for result in results: | |
name = result.find('h3').get_text() | |
leave = result.find('div', class_='eu-ref-result-bar__party--leave') | |
remain = result.find('div', class_='eu-ref-result-bar__party--remain') | |
lvotes = leave.find('div', class_='eu-ref-result-bar__votes').get_text().strip() | |
rvotes = remain.find('div', class_='eu-ref-result-bar__votes').get_text().strip() | |
lv = VOTES.search(lvotes) | |
if lv: | |
lvotes = lv.group(0) | |
TOTAL_LEAVE += int(lvotes.replace(',', '')) | |
rv = VOTES.search(rvotes) | |
if rv: | |
rvotes = rv.group(0) | |
TOTAL_REMAIN += int(rvotes.replace(',', '')) | |
print name.strip(), '|', lvotes, '|', rvotes | |
except urllib2.HTTPError: | |
pass | |
def prettify(n): | |
if n < 1000: | |
return str(n) | |
return prettify(n // 1000) + ',' + '%03d' % (n % 1000) | |
print '' | |
print 'Total Votes |', prettify(TOTAL_LEAVE), '|', prettify(TOTAL_REMAIN) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment