Created January 17, 2015 13:50
#Functions for scrapping UK general election results from
#point "processAYear" at the index page for a year (e.g. "")
#The function returns a list of dictionary objects representing the results for each constituency.
from bs4 import BeautifulSoup
import requests
def processAYear(url):
page = BeautifulSoup(requests.get(url).text)
tables = [x.get("href") for x in page.findAll("a") if x.get("href")[0] == "i"]
results = []
for t in tables:
results.append(processAPage(url + t))
return reduce(lambda x,y: x + y, results)
def processAPage(url):
page = requests.get(url)
con = page.text.split("<!-- endresult -->")[0].split("<!-- result -->")[1:]
areas = []
for c in con:
return areas
def getConstituancyResults(bs):
constituancyResults = {}
constituancyResults["area"] = bs.b.getText()
constituancyResults["electorate"] = int(bs.p.getText().split(";")[0][12:].replace(",",""))
constituancyResults["turnOut"] = float(bs.p.getText().split(";")[1][9:-1])
constituancyResults["electorate"] = ""
constituancyResults["turnOut"] = ""
constituancyResults["candidates"] = []
for c in bs.findAll("tr"):
return constituancyResults
def getCandidate(bs):
candidate = {}
candidate["name"] = bs.findAll("td")[0].getText()
candidate["party"] = bs.findAll("td")[1].getText()
candidate["votes"] = int(bs.findAll("td")[2].getText().replace(",",""))
candidate["votes"] = -1
return candidate
