Last active
March 25, 2019 10:25
-
-
Save tomncooper/6142e7696d6173443ed68102193ed4c9 to your computer and use it in GitHub Desktop.
Script to extract signature data from the UK Government Petitions website (Python 3.4+)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import os | |
import datetime as dt | |
from urllib.request import urlopen | |
from csv import DictWriter | |
from argparse import ArgumentParser | |
PETITION_URL = "https://petition.parliament.uk/petitions/{}.json" | |
def get_petition_data(petition_number): | |
""" Gets a dictionary version of the JSON string returned by the petition API. | |
Arguments: | |
petition_id (str): The petition ID number. This will be the number at the end of | |
the petition page's URL. | |
Returns: | |
A dictionary version of the full JSON string returned by the petition API. | |
Raises: | |
RuntimeError: If the API returns a non 200 status code for the supplied petition | |
ID. | |
""" | |
url_str = PETITION_URL.format(petition_number) | |
print("Fetching data from petition URL: {}".format(url_str)) | |
raw_result = urlopen(url_str) | |
if raw_result.code != 200: | |
raise RuntimeError( | |
( | |
"Failed to get results from petition URL {}. Got {} " | |
"response from server" | |
).format(url_str, raw_result.code) | |
) | |
json_result = json.loads(raw_result.read()) | |
return json_result | |
def _save_csv(data_list, fieldnames, filepath): | |
""" Helper function to save the supplied list of dictionaries to a csv file.""" | |
with open(filepath, "w") as output_file: | |
writer = DictWriter(output_file, fieldnames=fieldnames) | |
writer.writeheader() | |
writer.writerows(data_list) | |
def save_petition_data(petition_id, output_dir=None): | |
""" Saves the country and constituency breakdown of the petition to separate csv files | |
in the format "<petition ID>_signatures_by_country_<datetime>.csv" | |
Arguments: | |
petition_id (str): The petition ID number. This will be the number at the end of | |
the petition page's URL. | |
output_dir (str): Optional output directory to save the output CSVs too. | |
Raises: | |
RuntimeError: If the API returns a non 200 status code for the supplied petition | |
ID. | |
""" | |
# Check the output directory exists | |
if output_dir: | |
out_path = os.path.expanduser(output_dir) | |
try: | |
os.makedirs(out_path) | |
except OSError: | |
print("Saving output files in existing directory:{}".format(output_dir)) | |
else: | |
print( | |
"Supplied directory: {} does not exist. Will create it now.".format( | |
output_dir | |
) | |
) | |
data = get_petition_data(petition_id) | |
date_str = dt.datetime.now().strftime("%Y-%m-%d_%H-%M") | |
country_list = data["data"]["attributes"]["signatures_by_country"] | |
country_filename = "{}_signatures_by_country_{}.csv".format(petition_id, date_str) | |
if output_dir: | |
country_filepath = os.path.join(out_path, country_filename) | |
else: | |
country_filepath = country_filename | |
country_fieldnames = ["name", "code", "signature_count"] | |
_save_csv(country_list, country_fieldnames, country_filepath) | |
print("Wrote country breakdown to: {}".format(country_filename)) | |
constituency_list = data["data"]["attributes"]["signatures_by_constituency"] | |
constituency_filename = "{}_signatures_by_constituency_{}.csv".format( | |
petition_id, date_str | |
) | |
if output_dir: | |
constituency_filepath = os.path.join(out_path, constituency_filename) | |
else: | |
constituency_filepath = constituency_filename | |
constituency_fieldnames = ["name", "ons_code", "mp", "signature_count"] | |
_save_csv(constituency_list, constituency_fieldnames, constituency_filename) | |
print("Wrote constituency breakdown to: {}".format(constituency_filename)) | |
if __name__ == "__main__": | |
PARSER = ArgumentParser( | |
description=( | |
"Script to extract signature data from the UK Government Petitions website " | |
"(https://petition.parliament.uk/) and places output in CSV files; one for " | |
"signatures by country, the other for signatures by constituency." | |
) | |
) | |
PARSER.add_argument( | |
"petition_id", | |
help=( | |
"The petition ID number, this will be the number at the end of the URL on " | |
"the petition website" | |
), | |
) | |
PARSER.add_argument( | |
"-o", | |
"--out_dir", | |
required=False, | |
help=( | |
"Output directory in which to place the CSV files. If this does not " | |
"exist it will be created. If not supplied files will be placed in the same " | |
"directory in which the script is run" | |
), | |
) | |
ARGS = PARSER.parse_args() | |
save_petition_data(ARGS.petition_id, output_dir=ARGS.out_dir) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment