Skip to content

Instantly share code, notes, and snippets.

@tomncooper
Last active March 25, 2019 10:25
Show Gist options
  • Save tomncooper/6142e7696d6173443ed68102193ed4c9 to your computer and use it in GitHub Desktop.
Save tomncooper/6142e7696d6173443ed68102193ed4c9 to your computer and use it in GitHub Desktop.
Script to extract signature data from the UK Government Petitions website (Python 3.4+)
import json
import os
import datetime as dt
from urllib.request import urlopen
from csv import DictWriter
from argparse import ArgumentParser
PETITION_URL = "https://petition.parliament.uk/petitions/{}.json"
def get_petition_data(petition_number):
""" Gets a dictionary version of the JSON string returned by the petition API.
Arguments:
petition_id (str): The petition ID number. This will be the number at the end of
the petition page's URL.
Returns:
A dictionary version of the full JSON string returned by the petition API.
Raises:
RuntimeError: If the API returns a non 200 status code for the supplied petition
ID.
"""
url_str = PETITION_URL.format(petition_number)
print("Fetching data from petition URL: {}".format(url_str))
raw_result = urlopen(url_str)
if raw_result.code != 200:
raise RuntimeError(
(
"Failed to get results from petition URL {}. Got {} "
"response from server"
).format(url_str, raw_result.code)
)
json_result = json.loads(raw_result.read())
return json_result
def _save_csv(data_list, fieldnames, filepath):
""" Helper function to save the supplied list of dictionaries to a csv file."""
with open(filepath, "w") as output_file:
writer = DictWriter(output_file, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(data_list)
def save_petition_data(petition_id, output_dir=None):
""" Saves the country and constituency breakdown of the petition to separate csv files
in the format "<petition ID>_signatures_by_country_<datetime>.csv"
Arguments:
petition_id (str): The petition ID number. This will be the number at the end of
the petition page's URL.
output_dir (str): Optional output directory to save the output CSVs too.
Raises:
RuntimeError: If the API returns a non 200 status code for the supplied petition
ID.
"""
# Check the output directory exists
if output_dir:
out_path = os.path.expanduser(output_dir)
try:
os.makedirs(out_path)
except OSError:
print("Saving output files in existing directory:{}".format(output_dir))
else:
print(
"Supplied directory: {} does not exist. Will create it now.".format(
output_dir
)
)
data = get_petition_data(petition_id)
date_str = dt.datetime.now().strftime("%Y-%m-%d_%H-%M")
country_list = data["data"]["attributes"]["signatures_by_country"]
country_filename = "{}_signatures_by_country_{}.csv".format(petition_id, date_str)
if output_dir:
country_filepath = os.path.join(out_path, country_filename)
else:
country_filepath = country_filename
country_fieldnames = ["name", "code", "signature_count"]
_save_csv(country_list, country_fieldnames, country_filepath)
print("Wrote country breakdown to: {}".format(country_filename))
constituency_list = data["data"]["attributes"]["signatures_by_constituency"]
constituency_filename = "{}_signatures_by_constituency_{}.csv".format(
petition_id, date_str
)
if output_dir:
constituency_filepath = os.path.join(out_path, constituency_filename)
else:
constituency_filepath = constituency_filename
constituency_fieldnames = ["name", "ons_code", "mp", "signature_count"]
_save_csv(constituency_list, constituency_fieldnames, constituency_filename)
print("Wrote constituency breakdown to: {}".format(constituency_filename))
if __name__ == "__main__":
PARSER = ArgumentParser(
description=(
"Script to extract signature data from the UK Government Petitions website "
"(https://petition.parliament.uk/) and places output in CSV files; one for "
"signatures by country, the other for signatures by constituency."
)
)
PARSER.add_argument(
"petition_id",
help=(
"The petition ID number, this will be the number at the end of the URL on "
"the petition website"
),
)
PARSER.add_argument(
"-o",
"--out_dir",
required=False,
help=(
"Output directory in which to place the CSV files. If this does not "
"exist it will be created. If not supplied files will be placed in the same "
"directory in which the script is run"
),
)
ARGS = PARSER.parse_args()
save_petition_data(ARGS.petition_id, output_dir=ARGS.out_dir)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment