Created
January 17, 2015 13:50
-
-
Save ijmbarr/a54cd05a01195875f8b1 to your computer and use it in GitHub Desktop.
Functions for scrapping election results from http://www.politicsresources.net/area/uk/edates.htm.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Functions for scrapping UK general election results from http://www.politicsresources.net/area/uk/edates.htm. | |
#point "processAYear" at the index page for a year (e.g. "http://www.politicsresources.net/area/uk/ge50/") | |
#The function returns a list of dictionary objects representing the results for each constituency. | |
from bs4 import BeautifulSoup | |
import requests | |
def processAYear(url): | |
page = BeautifulSoup(requests.get(url).text) | |
tables = [x.get("href") for x in page.findAll("a") if x.get("href")[0] == "i"] | |
results = [] | |
for t in tables: | |
results.append(processAPage(url + t)) | |
return reduce(lambda x,y: x + y, results) | |
def processAPage(url): | |
page = requests.get(url) | |
con = page.text.split("<!-- endresult -->")[0].split("<!-- result -->")[1:] | |
areas = [] | |
for c in con: | |
areas.append(getConstituancyResults(BeautifulSoup(c))) | |
return areas | |
def getConstituancyResults(bs): | |
constituancyResults = {} | |
constituancyResults["area"] = bs.b.getText() | |
try: | |
constituancyResults["electorate"] = int(bs.p.getText().split(";")[0][12:].replace(",","")) | |
constituancyResults["turnOut"] = float(bs.p.getText().split(";")[1][9:-1]) | |
except: | |
constituancyResults["electorate"] = "" | |
constituancyResults["turnOut"] = "" | |
constituancyResults["candidates"] = [] | |
for c in bs.findAll("tr"): | |
constituancyResults["candidates"].append(getCandidate(c)) | |
return constituancyResults | |
def getCandidate(bs): | |
candidate = {} | |
candidate["name"] = bs.findAll("td")[0].getText() | |
candidate["party"] = bs.findAll("td")[1].getText() | |
try: | |
candidate["votes"] = int(bs.findAll("td")[2].getText().replace(",","")) | |
except: | |
candidate["votes"] = -1 | |
return candidate |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment