Instantly share code, notes, and snippets.

Embed
What would you like to do?
Functions for scrapping election results from http://www.politicsresources.net/area/uk/edates.htm.
#Functions for scrapping UK general election results from http://www.politicsresources.net/area/uk/edates.htm.
#point "processAYear" at the index page for a year (e.g. "http://www.politicsresources.net/area/uk/ge50/")
#The function returns a list of dictionary objects representing the results for each constituency.
from bs4 import BeautifulSoup
import requests
def processAYear(url):
page = BeautifulSoup(requests.get(url).text)
tables = [x.get("href") for x in page.findAll("a") if x.get("href")[0] == "i"]
results = []
for t in tables:
results.append(processAPage(url + t))
return reduce(lambda x,y: x + y, results)
def processAPage(url):
page = requests.get(url)
con = page.text.split("<!-- endresult -->")[0].split("<!-- result -->")[1:]
areas = []
for c in con:
areas.append(getConstituancyResults(BeautifulSoup(c)))
return areas
def getConstituancyResults(bs):
constituancyResults = {}
constituancyResults["area"] = bs.b.getText()
try:
constituancyResults["electorate"] = int(bs.p.getText().split(";")[0][12:].replace(",",""))
constituancyResults["turnOut"] = float(bs.p.getText().split(";")[1][9:-1])
except:
constituancyResults["electorate"] = ""
constituancyResults["turnOut"] = ""
constituancyResults["candidates"] = []
for c in bs.findAll("tr"):
constituancyResults["candidates"].append(getCandidate(c))
return constituancyResults
def getCandidate(bs):
candidate = {}
candidate["name"] = bs.findAll("td")[0].getText()
candidate["party"] = bs.findAll("td")[1].getText()
try:
candidate["votes"] = int(bs.findAll("td")[2].getText().replace(",",""))
except:
candidate["votes"] = -1
return candidate
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment