Skip to content

Instantly share code, notes, and snippets.

@njwilson23
Created May 12, 2017 05:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save njwilson23/e13f1b8f48a6ce579e8124db71b9f346 to your computer and use it in GitHub Desktop.
Save njwilson23/e13f1b8f48a6ce579e8124db71b9f346 to your computer and use it in GitHub Desktop.
Quick scraping code for BC MLA voting records
import requests
import bs4
import json
import re
from concurrent.futures import ThreadPoolExecutor, as_completed
def parse_index():
""" Return a list of sub-pages with voting data """
response = requests.get("https://www.leg.bc.ca/content-hansard/Index/40th4th/2015votesmhds.htm")
if response.status_code != 200:
raise IOError(response.status_code)
soup = bs4.BeautifulSoup(response.content.decode("utf-16"), "html5lib")
pages = set([a.attrs["href"].split("#")[0]
for a in soup.find_all("a", attrs={"href": re.compile(r"2015VOTES.\.htm.*")})])
return pages
def parse_page(page):
response = requests.get("https://www.leg.bc.ca/content-hansard/Index/40th4th/{}".format(page))
if response.status_code != 200:
raise IOError(response.status_code)
soup = bs4.BeautifulSoup(response.content.decode("utf-16"), "html5lib")
sections = soup.find_all("div", attrs={"title": re.compile(".*")})
d = []
for section in sections:
if ";" in section["title"]:
d.append(parse_mla_section(section))
return d
def parse_mla_section(div):
mla_name, section_name = div["title"].split(";", 1)
d_ = {}
d = {"mla_name": mla_name, "section": section_name.strip(), "motions": d_}
for li in div.find_all("li"):
contents = " ".join(list(li.strings)).strip()
if "Yea" in contents:
i = contents.index("Yea")
d_[contents[:i].strip(" ,")] = "Y"
elif "Nay" in contents:
i = contents.index("Nay")
d_[contents[:i].strip(" ,")] = "N"
return d
# Let 'er rip
pages = parse_index()
executor = ThreadPoolExecutor(max_workers=8)
futures = []
for page in pages:
futures.append(executor.submit(parse_page, page))
votes = []
for future in as_completed(futures):
votes.extend(future.result())
with open("vote-data.json", "w") as f:
f.write(json.dumps(votes))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment