ijmbarr/GEscraping

## GEscraping
#Functions for scrapping UK general election results from http://www.politicsresources.net/area/uk/edates.htm.
#point "processAYear" at the index page for a year (e.g. "http://www.politicsresources.net/area/uk/ge50/")
#The function returns a list of dictionary objects representing the results for each constituency.

from bs4 import BeautifulSoup
import requests

def processAYear(url):
    page = BeautifulSoup(requests.get(url).text)
    tables = [x.get("href") for x in page.findAll("a") if x.get("href")[0] == "i"]
    results = []
    for t in tables:
        results.append(processAPage(url + t))
    return reduce(lambda x,y: x + y, results)

def processAPage(url):
    page = requests.get(url)
    con = page.text.split("<!-- endresult -->")[0].split("<!-- result -->")[1:]
    areas = []
    for c in con:
        areas.append(getConstituancyResults(BeautifulSoup(c)))
    return areas

def getConstituancyResults(bs):
    constituancyResults = {}
    constituancyResults["area"] = bs.b.getText()
    try:
        constituancyResults["electorate"] = int(bs.p.getText().split(";")[0][12:].replace(",",""))
        constituancyResults["turnOut"] = float(bs.p.getText().split(";")[1][9:-1])
    except:
        constituancyResults["electorate"] = ""
        constituancyResults["turnOut"] =  ""
    constituancyResults["candidates"] = []
    for c in bs.findAll("tr"):
        constituancyResults["candidates"].append(getCandidate(c))

    return constituancyResults

def getCandidate(bs):
    candidate = {}
    candidate["name"] = bs.findAll("td")[0].getText()
    candidate["party"] = bs.findAll("td")[1].getText()
    try:
        candidate["votes"] = int(bs.findAll("td")[2].getText().replace(",",""))
    except:
        candidate["votes"] = -1

    return candidate
	#Functions for scrapping UK general election results from http://www.politicsresources.net/area/uk/edates.htm.
	#point "processAYear" at the index page for a year (e.g. "http://www.politicsresources.net/area/uk/ge50/")
	#The function returns a list of dictionary objects representing the results for each constituency.

	from bs4 import BeautifulSoup
	import requests

	def processAYear(url):
	page = BeautifulSoup(requests.get(url).text)
	tables = [x.get("href") for x in page.findAll("a") if x.get("href")[0] == "i"]
	results = []
	for t in tables:
	results.append(processAPage(url + t))
	return reduce(lambda x,y: x + y, results)

	def processAPage(url):
	page = requests.get(url)
	con = page.text.split("<!-- endresult -->")[0].split("<!-- result -->")[1:]
	areas = []
	for c in con:
	areas.append(getConstituancyResults(BeautifulSoup(c)))
	return areas

	def getConstituancyResults(bs):
	constituancyResults = {}
	constituancyResults["area"] = bs.b.getText()
	try:
	constituancyResults["electorate"] = int(bs.p.getText().split(";")[0][12:].replace(",",""))
	constituancyResults["turnOut"] = float(bs.p.getText().split(";")[1][9:-1])
	except:
	constituancyResults["electorate"] = ""
	constituancyResults["turnOut"] = ""
	constituancyResults["candidates"] = []
	for c in bs.findAll("tr"):
	constituancyResults["candidates"].append(getCandidate(c))

	return constituancyResults

	def getCandidate(bs):
	candidate = {}
	candidate["name"] = bs.findAll("td")[0].getText()
	candidate["party"] = bs.findAll("td")[1].getText()
	try:
	candidate["votes"] = int(bs.findAll("td")[2].getText().replace(",",""))
	except:
	candidate["votes"] = -1

	return candidate