Skip to content

Instantly share code, notes, and snippets.

@lorinc
Created March 8, 2022 12:36
Show Gist options
  • Save lorinc/cd8bbc376753b60325c4c61424005eaf to your computer and use it in GitHub Desktop.
Save lorinc/cd8bbc376753b60325c4c61424005eaf to your computer and use it in GitHub Desktop.
iPyton notebook that I used to prototype the generation of the 3200 formal delegation letters
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"id": "f4f25983-fff1-43b6-967b-8ba3bf5ec285",
"metadata": {
"tags": []
},
"source": [
"## 1. installing required python libraries"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cdf271cd-aca3-414c-acb2-bf1963911292",
"metadata": {},
"outputs": [],
"source": [
"%%capture\n",
"%pip install jinja2 jsonschema"
]
},
{
"cell_type": "markdown",
"id": "94c0206b-b530-46a8-8633-6343a8293d01",
"metadata": {
"tags": []
},
"source": [
"## 2. generating documents"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6365bc98-5dd2-413d-af60-375bc9775db4",
"metadata": {},
"outputs": [],
"source": [
"# merging user data and template into html files\n",
"\n",
"import json\n",
"from jinja2 import Template\n",
"\n",
"################################################################# read\n",
"\n",
"def get_delegation_data(file_str='../input/delegate.json'):\n",
" \"\"\"reads the json dump file and returns as json variable\"\"\"\n",
" with open(file_str, 'r', encoding='utf-8') as f:\n",
" return json.load(f)['data']['delegation_processes']\n",
"\n",
"\n",
"def drop_old_runs(delegation_data):\n",
" max_delegation_round = max([e['round'] for e in delegation_data])\n",
" for run in delegation_data:\n",
" if run['round'] == max_delegation_round:\n",
" yield run\n",
"\n",
"################################################################# validate\n",
"\n",
"def test(json_ref):\n",
" if json_ref not in [None, '', [], {}]:\n",
" return json_ref\n",
" else:\n",
" return None\n",
"\n",
"\n",
"def is_valid_user(user_json):\n",
" \"\"\"very-very forgiving data quality check\"\"\"\n",
" try:\n",
" if all([\n",
" test(user_json['user_detail']['legal_name']),\n",
" test(user_json['user_detail']['personal_identity_num']),\n",
" test(user_json['user_addresses'][0]),\n",
" test(user_json['user_addresses'][0]['postal_cd']),\n",
" test(user_json['user_addresses'][0]['city_name']),\n",
" test(user_json['user_addresses'][0]['street_name']) \n",
" ]):\n",
" return True\n",
" else:\n",
" return False\n",
" except:\n",
" return False\n",
"\n",
" \n",
"def is_valid_analog(analog_json):\n",
" \"\"\"very-very forgiving data quality check\"\"\"\n",
" if all([\n",
" test(analog_json['full_name']),\n",
" test(analog_json['identity_num']),\n",
" test(analog_json['address'])\n",
" ]):\n",
" return True\n",
" else:\n",
" return False\n",
" \n",
"\n",
"def user_address_builder(user_json):\n",
" \"\"\"building a string from things that might or might not be there\"\"\"\n",
" return ' '.join([\n",
" test(user_json['user_addresses'][0]['postal_cd']) or '',\n",
" test(user_json['user_addresses'][0]['city_name']) or '',\n",
" \",\",\n",
" test(user_json['user_addresses'][0]['street_name']) or '',\n",
" (\n",
" test(user_json['user_addresses'][0]['street_type']['comment'])\n",
" if (\n",
" test(user_json['user_addresses'][0]['street_type']\n",
" and test(user_json['user_addresses'][0]['street_type']['comment']))\n",
" ) else ''\n",
" ),\n",
" test(user_json['user_addresses'][0]['street_num']) or ''\n",
" ])\n",
"\n",
"################################################################# build\n",
"\n",
"def doc_builder(delegation_run_data):\n",
" for run_town in delegation_run_data:\n",
" fileload = {'filename':f\"{run_town['town_id']}.html\",\n",
" 'name' : run_town['town']['town_name'], \n",
" 'delegees' : []}\n",
"\n",
" for voting_location in run_town['town']['voting_locations']:\n",
" if voting_location['user_2_voting_locations']:\n",
" for user_location in voting_location['user_2_voting_locations']:\n",
"\n",
" if is_valid_user(user_location['user']):\n",
" fileload['delegees'].append(\n",
" {\n",
" 'name':user_location['user']['user_detail']['legal_name'],\n",
" 'id':user_location['user']['user_detail']['personal_identity_num'],\n",
" 'address':user_address_builder(user_location['user']),\n",
" 'email_address':user_location['user']['email_address'],\n",
" 'phone_num':user_location['user']['phone_num'],\n",
" 'zone':voting_location['location_number']\n",
" }\n",
" )\n",
"\n",
" elif is_valid_analog(user_location['analog_user']):\n",
" fileload['delegees'].append(\n",
" {\n",
" 'name':user_location['analog_user']['full_name'],\n",
" 'id':user_location['analog_user']['identity_num'],\n",
" 'address':user_location['analog_user']['address'],\n",
" 'email_address':user_location['analog_user']['email_address'],\n",
" 'phone_num':user_location['analog_user']['phone_num'],\n",
" 'zone':voting_location['location_number']\n",
" }\n",
" ) \n",
" else:\n",
" pass # this should be error handling\n",
" else:\n",
" pass # empty delegation letter\n",
" \n",
" yield fileload\n",
"\n",
"################################################################# merge\n",
"\n",
"def read_letter_template(file_str='../input/megbizolevel_jinja_template.html'):\n",
" with open(file_str, 'r', encoding='utf-8') as f:\n",
" template = Template(f.read())\n",
" return template\n",
"\n",
"\n",
"def generate_documents(fileload, jinja_template):\n",
" path = \"../output/\" \n",
" for town in fileload:\n",
" if town['delegees']:\n",
" document = jinja_template.render(town = town)\n",
" with open(path+town['filename'], 'w', encoding='utf-8') as f:\n",
" f.write(document)\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ee212b73-77e3-4f43-8c3e-4ee2b2044728",
"metadata": {},
"outputs": [],
"source": [
"generate_documents(\n",
" doc_builder(\n",
" drop_old_runs(\n",
" get_delegation_data()\n",
" )\n",
" ),\n",
" read_letter_template()\n",
") "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a1727c07-4bdb-4fc8-9589-06c708708dce",
"metadata": {},
"outputs": [],
"source": [
"# transforming the json array to a tabular report\n",
"\n",
"import pandas as pd\n",
"\n",
"report = pd.json_normalize(\n",
" doc_builder(\n",
" drop_old_runs(\n",
" get_delegation_data()\n",
" )\n",
" )\n",
")\n",
"\n",
"report = report.join(\n",
" pd.DataFrame(\n",
" report.delegees.explode()\n",
" ), rsuffix='_exploded'\n",
").drop(\n",
" columns=['delegees']\n",
")\n",
"\n",
"report = report.join(\n",
" pd.json_normalize(\n",
" report.delegees_exploded\n",
" ).set_index(\n",
" report.index\n",
" ), rsuffix='_usr'\n",
").drop(\n",
" columns=['delegees_exploded']\n",
").drop_duplicates()\n",
"\n",
"report.to_csv('../output/delegalas_report.csv')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "887fc299-7ca0-4b20-b641-5aff5f97b5a6",
"metadata": {},
"outputs": [],
"source": [
"# 399 entries, but not all of them have delegees\n",
"%ls /home/jupyter/20k_docgen/output/ | wc -l"
]
},
{
"cell_type": "markdown",
"id": "c18b5333-c620-4113-80d2-6f9a4c32f700",
"metadata": {
"tags": []
},
"source": [
"## 3. installing libreoffice writer for pdf conversion"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4a282c4c-69c1-4930-b847-7a36eef4a0ce",
"metadata": {},
"outputs": [],
"source": [
"%%capture\n",
"!apt-get -qq install -y libreoffice-writer"
]
},
{
"cell_type": "markdown",
"id": "50bc0d4c-dc6a-4ddb-b181-8c2efa96e33d",
"metadata": {
"tags": []
},
"source": [
"## 4. converting HTMLs to PDFs and zipping them for download\n",
"*(did you clean the output folder before running the generation script??!)*"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ea44aa72-9766-4e20-9340-91130dd8bd57",
"metadata": {},
"outputs": [],
"source": [
"%%bash\n",
"cd /home/jupyter/20k_docgen/output/\n",
"for f in *.html ; do lowriter --headless --convert-to pdf \"$f\" ; done\n",
"zip /home/jupyter/20k_docgen/pub/500_megbizo_html.zip /home/jupyter/20k_docgen/output/*.pdf"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment