Functions for scrapping election results from http://www.politicsresources.net/area/uk/edates.htm.
#Functions for scrapping UK general election results from http://www.politicsresources.net/area/uk/edates.htm. | |
#point "processAYear" at the index page for a year (e.g. "http://www.politicsresources.net/area/uk/ge50/") | |
#The function returns a list of dictionary objects representing the results for each constituency. | |
from bs4 import BeautifulSoup | |
import requests | |
def processAYear(url): | |
page = BeautifulSoup(requests.get(url).text) | |
tables = [x.get("href") for x in page.findAll("a") if x.get("href")[0] == "i"] | |
results = [] | |
for t in tables: | |
results.append(processAPage(url + t)) | |
return reduce(lambda x,y: x + y, results) | |
def processAPage(url): | |
page = requests.get(url) | |
con = page.text.split("<!-- endresult -->")[0].split("<!-- result -->")[1:] | |
areas = [] | |
for c in con: | |
areas.append(getConstituancyResults(BeautifulSoup(c))) | |
return areas | |
def getConstituancyResults(bs): | |
constituancyResults = {} | |
constituancyResults["area"] = bs.b.getText() | |
try: | |
constituancyResults["electorate"] = int(bs.p.getText().split(";")[0][12:].replace(",","")) | |
constituancyResults["turnOut"] = float(bs.p.getText().split(";")[1][9:-1]) | |
except: | |
constituancyResults["electorate"] = "" | |
constituancyResults["turnOut"] = "" | |
constituancyResults["candidates"] = [] | |
for c in bs.findAll("tr"): | |
constituancyResults["candidates"].append(getCandidate(c)) | |
return constituancyResults | |
def getCandidate(bs): | |
candidate = {} | |
candidate["name"] = bs.findAll("td")[0].getText() | |
candidate["party"] = bs.findAll("td")[1].getText() | |
try: | |
candidate["votes"] = int(bs.findAll("td")[2].getText().replace(",","")) | |
except: | |
candidate["votes"] = -1 | |
return candidate |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment