Skip to content

Instantly share code, notes, and snippets.

@adam704a
Last active May 19, 2020 13:33
Show Gist options
  • Save adam704a/37d9e11adacecbafc076724583e9aad6 to your computer and use it in GitHub Desktop.
Save adam704a/37d9e11adacecbafc076724583e9aad6 to your computer and use it in GitHub Desktop.
Import ESPEN Forecast API Data
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Overview\n",
"This notebook uses the *new* ESPEN Forecast API. There are 3 data elements that are being imported:\n",
"\n",
"* MDA Forecasted \n",
"* IA Forecasted\n",
"* IA Type\n"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"import csv\n",
"\n",
"TOKEN=\"1234\" # use this later\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# looks at the json result and determines if the estimate has been provided (for any of the years)\n",
"def has_ia_estimates(result):\n",
" if 'IA_2020' in result:\n",
" if result['IA_2020'] is not None \\\n",
" or result['IA_2021'] is not None \\\n",
" or result['IA_2022'] is not None \\\n",
" or result['IA_2023'] is not None \\\n",
" or result['IA_2024'] is not None \\\n",
" or result['IA_2025'] is not None \\\n",
" or result['IA_2026'] is not None \\\n",
" or result['IA_2027'] is not None \\\n",
" or result['IA_2028'] is not None \\\n",
" or result['IA_2029'] is not None \\\n",
" or result['IA_2030'] is not None :\n",
" return True\n",
" else:\n",
" return False\n",
" # funny thing is the sch and trachoma return ia keys without an underscore, so checking here \n",
" # TODO: ask standard code\n",
" elif 'IA2020' in result:\n",
" if result['IA2020'] is not None \\\n",
" or result['IA2021'] is not None \\\n",
" or result['IA2022'] is not None \\\n",
" or result['IA2023'] is not None \\\n",
" or result['IA2024'] is not None \\\n",
" or result['IA2025'] is not None \\\n",
" or result['IA2026'] is not None \\\n",
" or result['IA2027'] is not None \\\n",
" or result['IA2028'] is not None \\\n",
" or result['IA2029'] is not None \\\n",
" or result['IA2030'] is not None :\n",
" return True\n",
" else:\n",
" return False\n",
" else:\n",
" return False\n",
"\n",
"# see if any of the years has estimates for mda\n",
"def has_mda_estimates(result):\n",
" if result['MDA_2020'] is not None \\\n",
" or result['MDA_2021'] is not None \\\n",
" or result['MDA_2022'] is not None \\\n",
" or result['MDA_2023'] is not None \\\n",
" or result['MDA_2024'] is not None \\\n",
" or result['MDA_2025'] is not None \\\n",
" or result['MDA_2026'] is not None \\\n",
" or result['MDA_2027'] is not None \\\n",
" or result['MDA_2028'] is not None \\\n",
" or result['MDA_2029'] is not None \\\n",
" or result['MDA_2030'] is not None:\n",
" return True\n",
" else:\n",
" return False\n",
"\n",
"# look up cateogry option, these are from what is set up in D\n",
"def get_disease_category_option(disease):\n",
" \n",
" if disease == \"lf\":\n",
" return \"V0BMspy4wZa\"\n",
" \n",
" elif disease == \"sth\":\n",
" return \"kZdL7Ru8FXQ\"\n",
" \n",
" elif disease == \"sch\":\n",
" return \"VvPBXlaJZU7\"\n",
" \n",
" elif disease == \"oncho\":\n",
" return \"tTPp1nAkGP7\"\n",
" \n",
" elif disease == \"trachoma\":\n",
" return \"HSnNEBpKtmU\"\n",
" \n",
" else:\n",
" return \"missing\"\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# from here: https://www.mikulskibartosz.name/how-to-display-a-progress-bar-in-jupyter-notebook/\n",
"import time, sys\n",
"from IPython.display import clear_output\n",
"\n",
"def update_progress(progress):\n",
" bar_length = 20\n",
" if isinstance(progress, int):\n",
" progress = float(progress)\n",
" if not isinstance(progress, float):\n",
" progress = 0\n",
" if progress < 0:\n",
" progress = 0\n",
" if progress >= 1:\n",
" progress = 1\n",
" \n",
" block = int(round(bar_length * progress))\n",
" \n",
" clear_output(wait = True)\n",
" text = \"Progress: [{0}] {1:.1f}%\".format( \"#\" * block + \"-\" * (bar_length - block), progress * 100)\n",
" print(text)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Step 1: Import data from ESPEN API\n",
"Not all responses from the Forecast API will include the MDA and IA information, which is why only certain records are retained here as *total_mda_results* and *total_ia_results* respectively."
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"there are 937 records for lf where 937 districts have MDA estimates and 79 districts have IU estimates\n",
"there are 937 records for sth where 937 districts have MDA estimates and 937 districts have IU estimates\n",
"there are 937 records for sch where 937 districts have MDA estimates and 937 districts have IU estimates\n",
"there are 937 records for oncho where 937 districts have MDA estimates and 937 districts have IU estimates\n",
"there are 937 records for trachoma where 63 districts have MDA estimates and 57 districts have IU estimates\n",
"--------\n",
"there are 4685 records across diseases\n",
"there are 3811 total_mda_results and 2947 total_ia_results results\n"
]
}
],
"source": [
"diseases = ['lf','sth', 'sch', 'oncho','trachoma'] # this can be lf, sth, sch, trachoma, oncho\n",
"\n",
"total_results = []\n",
"total_ia_results = []\n",
"total_mda_results = []\n",
"\n",
"for disease in diseases: # for each of the diseases\n",
" \n",
" r1 = requests.get('https://admin.espen.afro.who.int/api/data?iso2=ET&disease='+ disease +'&level=iu&type=forecast')\n",
" iu_results = r1.json()\n",
" \n",
" # add to total\n",
" total_results = total_results + iu_results\n",
" \n",
" # initialize disease specific estimates\n",
" ia_estimates = []\n",
" mda_estimates = []\n",
" \n",
" # go through each IU\n",
" for index, iu_result in enumerate(iu_results):\n",
" \n",
" # add disease variable\n",
" iu_result['disease'] = disease\n",
" \n",
" # keep track of impact assessments and MDA estimates seperatey\n",
" if has_ia_estimates(iu_result): \n",
" ia_estimates.append(iu_result)\n",
" \n",
" if has_mda_estimates(iu_result): \n",
" mda_estimates.append(iu_result)\n",
" \n",
" print(\"there are {}\".format(len(iu_results))+\" records for \"+ disease +\" where {}\".format(len(mda_estimates)) + \" districts have MDA \\\n",
"estimates and {}\".format(len(ia_estimates)) +\" districts have IU estimates\")\n",
" \n",
" total_ia_results = total_ia_results + ia_estimates\n",
" total_mda_results = total_mda_results + mda_estimates \n",
"\n",
"print(\"--------\")\n",
"print(\"there are {}\".format(len(total_results))+\" records across diseases\")\n",
"print(\"there are {}\".format(len(total_mda_results))+\" total_mda_results and {}\".format(len(total_ia_results)) + \" total_ia_results results\")\n"
]
},
{
"cell_type": "code",
"execution_count": 71,
"metadata": {},
"outputs": [],
"source": [
"# write out to these to files\n",
"total_mda_df = pd.DataFrame(total_mda_results)\n",
"total_mda_df.to_csv('mda-export.csv', index=True)\n",
"\n",
"total_ia_df = pd.DataFrame(total_ia_results)\n",
"total_ia_df.to_csv('ia-export.csv', index=True) "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Step 2: Load in current orgs from database.\n",
" This will be used later to determine there is a match in the [NTDDB](https://ethiopia.integratedntddb.org/)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"ntd_orgs = pd.read_excel('../integrated_db_org_list.xlsx')"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Woreda</th>\n",
" <th>Woreda ID</th>\n",
" <th>HMIS Woreda Name</th>\n",
" <th>Zone</th>\n",
" <th>Zone ID</th>\n",
" <th>HMIS Zone ID</th>\n",
" <th>Region</th>\n",
" <th>Region ID</th>\n",
" <th>HMIS Region ID</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Addis Ketema Subcity</td>\n",
" <td>ORG00000009</td>\n",
" <td>NaN</td>\n",
" <td>Addis Ababa</td>\n",
" <td>ORG00000002</td>\n",
" <td>NaN</td>\n",
" <td>Addis Ababa</td>\n",
" <td>ORG00000001</td>\n",
" <td>yY9BLUUegel</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Akaki Kaliti Subcity</td>\n",
" <td>ORG00000008</td>\n",
" <td>NaN</td>\n",
" <td>Addis Ababa</td>\n",
" <td>ORG00000002</td>\n",
" <td>NaN</td>\n",
" <td>Addis Ababa</td>\n",
" <td>ORG00000001</td>\n",
" <td>yY9BLUUegel</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Arada Subcity</td>\n",
" <td>ORG00000005</td>\n",
" <td>NaN</td>\n",
" <td>Addis Ababa</td>\n",
" <td>ORG00000002</td>\n",
" <td>NaN</td>\n",
" <td>Addis Ababa</td>\n",
" <td>ORG00000001</td>\n",
" <td>yY9BLUUegel</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Bolle Subcity</td>\n",
" <td>ORG00000007</td>\n",
" <td>NaN</td>\n",
" <td>Addis Ababa</td>\n",
" <td>ORG00000002</td>\n",
" <td>NaN</td>\n",
" <td>Addis Ababa</td>\n",
" <td>ORG00000001</td>\n",
" <td>yY9BLUUegel</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Gulelie Subcity</td>\n",
" <td>ORG00000003</td>\n",
" <td>NaN</td>\n",
" <td>Addis Ababa</td>\n",
" <td>ORG00000002</td>\n",
" <td>NaN</td>\n",
" <td>Addis Ababa</td>\n",
" <td>ORG00000001</td>\n",
" <td>yY9BLUUegel</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Kirkose Subcity</td>\n",
" <td>ORG00000012</td>\n",
" <td>NaN</td>\n",
" <td>Addis Ababa</td>\n",
" <td>ORG00000002</td>\n",
" <td>NaN</td>\n",
" <td>Addis Ababa</td>\n",
" <td>ORG00000001</td>\n",
" <td>yY9BLUUegel</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Kolfie Keranio Subcity</td>\n",
" <td>ORG00000006</td>\n",
" <td>NaN</td>\n",
" <td>Addis Ababa</td>\n",
" <td>ORG00000002</td>\n",
" <td>NaN</td>\n",
" <td>Addis Ababa</td>\n",
" <td>ORG00000001</td>\n",
" <td>yY9BLUUegel</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Lideta Subcity</td>\n",
" <td>ORG00000010</td>\n",
" <td>NaN</td>\n",
" <td>Addis Ababa</td>\n",
" <td>ORG00000002</td>\n",
" <td>NaN</td>\n",
" <td>Addis Ababa</td>\n",
" <td>ORG00000001</td>\n",
" <td>yY9BLUUegel</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Nifase Silk Lafto Subcity</td>\n",
" <td>ORG00000004</td>\n",
" <td>NaN</td>\n",
" <td>Addis Ababa</td>\n",
" <td>ORG00000002</td>\n",
" <td>NaN</td>\n",
" <td>Addis Ababa</td>\n",
" <td>ORG00000001</td>\n",
" <td>yY9BLUUegel</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>Yeka Subcity</td>\n",
" <td>ORG00000011</td>\n",
" <td>NaN</td>\n",
" <td>Addis Ababa</td>\n",
" <td>ORG00000002</td>\n",
" <td>NaN</td>\n",
" <td>Addis Ababa</td>\n",
" <td>ORG00000001</td>\n",
" <td>yY9BLUUegel</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>Abala</td>\n",
" <td>ORG00000029</td>\n",
" <td>NaN</td>\n",
" <td>Awsa</td>\n",
" <td>ORG00000014</td>\n",
" <td>jsn4NYnJdnI</td>\n",
" <td>Afar</td>\n",
" <td>ORG00000013</td>\n",
" <td>UFtGyqJMEZh</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>Adar</td>\n",
" <td>ORG00000018</td>\n",
" <td>NaN</td>\n",
" <td>Awsa</td>\n",
" <td>ORG00000014</td>\n",
" <td>jsn4NYnJdnI</td>\n",
" <td>Afar</td>\n",
" <td>ORG00000013</td>\n",
" <td>UFtGyqJMEZh</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>Afambo</td>\n",
" <td>ORG00000026</td>\n",
" <td>NaN</td>\n",
" <td>Awsa</td>\n",
" <td>ORG00000014</td>\n",
" <td>jsn4NYnJdnI</td>\n",
" <td>Afar</td>\n",
" <td>ORG00000013</td>\n",
" <td>UFtGyqJMEZh</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>Afdera</td>\n",
" <td>ORG00000023</td>\n",
" <td>NaN</td>\n",
" <td>Awsa</td>\n",
" <td>ORG00000014</td>\n",
" <td>jsn4NYnJdnI</td>\n",
" <td>Afar</td>\n",
" <td>ORG00000013</td>\n",
" <td>UFtGyqJMEZh</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>Amibara</td>\n",
" <td>ORG00000020</td>\n",
" <td>NaN</td>\n",
" <td>Awsa</td>\n",
" <td>ORG00000014</td>\n",
" <td>jsn4NYnJdnI</td>\n",
" <td>Afar</td>\n",
" <td>ORG00000013</td>\n",
" <td>UFtGyqJMEZh</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>Aregoba</td>\n",
" <td>ORG00000017</td>\n",
" <td>NaN</td>\n",
" <td>Awsa</td>\n",
" <td>ORG00000014</td>\n",
" <td>jsn4NYnJdnI</td>\n",
" <td>Afar</td>\n",
" <td>ORG00000013</td>\n",
" <td>UFtGyqJMEZh</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>Awash fentale</td>\n",
" <td>ORG00000021</td>\n",
" <td>NaN</td>\n",
" <td>Awsa</td>\n",
" <td>ORG00000014</td>\n",
" <td>jsn4NYnJdnI</td>\n",
" <td>Afar</td>\n",
" <td>ORG00000013</td>\n",
" <td>UFtGyqJMEZh</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>Awash Town</td>\n",
" <td>ORG00000015</td>\n",
" <td>NaN</td>\n",
" <td>Awsa</td>\n",
" <td>ORG00000014</td>\n",
" <td>jsn4NYnJdnI</td>\n",
" <td>Afar</td>\n",
" <td>ORG00000013</td>\n",
" <td>UFtGyqJMEZh</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>Awra</td>\n",
" <td>ORG00000028</td>\n",
" <td>NaN</td>\n",
" <td>Awsa</td>\n",
" <td>ORG00000014</td>\n",
" <td>jsn4NYnJdnI</td>\n",
" <td>Afar</td>\n",
" <td>ORG00000013</td>\n",
" <td>UFtGyqJMEZh</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>Aysaeta</td>\n",
" <td>ORG00000027</td>\n",
" <td>NaN</td>\n",
" <td>Awsa</td>\n",
" <td>ORG00000014</td>\n",
" <td>jsn4NYnJdnI</td>\n",
" <td>Afar</td>\n",
" <td>ORG00000013</td>\n",
" <td>UFtGyqJMEZh</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Woreda Woreda ID HMIS Woreda Name Zone \\\n",
"0 Addis Ketema Subcity ORG00000009 NaN Addis Ababa \n",
"1 Akaki Kaliti Subcity ORG00000008 NaN Addis Ababa \n",
"2 Arada Subcity ORG00000005 NaN Addis Ababa \n",
"3 Bolle Subcity ORG00000007 NaN Addis Ababa \n",
"4 Gulelie Subcity ORG00000003 NaN Addis Ababa \n",
"5 Kirkose Subcity ORG00000012 NaN Addis Ababa \n",
"6 Kolfie Keranio Subcity ORG00000006 NaN Addis Ababa \n",
"7 Lideta Subcity ORG00000010 NaN Addis Ababa \n",
"8 Nifase Silk Lafto Subcity ORG00000004 NaN Addis Ababa \n",
"9 Yeka Subcity ORG00000011 NaN Addis Ababa \n",
"10 Abala ORG00000029 NaN Awsa \n",
"11 Adar ORG00000018 NaN Awsa \n",
"12 Afambo ORG00000026 NaN Awsa \n",
"13 Afdera ORG00000023 NaN Awsa \n",
"14 Amibara ORG00000020 NaN Awsa \n",
"15 Aregoba ORG00000017 NaN Awsa \n",
"16 Awash fentale ORG00000021 NaN Awsa \n",
"17 Awash Town ORG00000015 NaN Awsa \n",
"18 Awra ORG00000028 NaN Awsa \n",
"19 Aysaeta ORG00000027 NaN Awsa \n",
"\n",
" Zone ID HMIS Zone ID Region Region ID HMIS Region ID \n",
"0 ORG00000002 NaN Addis Ababa ORG00000001 yY9BLUUegel \n",
"1 ORG00000002 NaN Addis Ababa ORG00000001 yY9BLUUegel \n",
"2 ORG00000002 NaN Addis Ababa ORG00000001 yY9BLUUegel \n",
"3 ORG00000002 NaN Addis Ababa ORG00000001 yY9BLUUegel \n",
"4 ORG00000002 NaN Addis Ababa ORG00000001 yY9BLUUegel \n",
"5 ORG00000002 NaN Addis Ababa ORG00000001 yY9BLUUegel \n",
"6 ORG00000002 NaN Addis Ababa ORG00000001 yY9BLUUegel \n",
"7 ORG00000002 NaN Addis Ababa ORG00000001 yY9BLUUegel \n",
"8 ORG00000002 NaN Addis Ababa ORG00000001 yY9BLUUegel \n",
"9 ORG00000002 NaN Addis Ababa ORG00000001 yY9BLUUegel \n",
"10 ORG00000014 jsn4NYnJdnI Afar ORG00000013 UFtGyqJMEZh \n",
"11 ORG00000014 jsn4NYnJdnI Afar ORG00000013 UFtGyqJMEZh \n",
"12 ORG00000014 jsn4NYnJdnI Afar ORG00000013 UFtGyqJMEZh \n",
"13 ORG00000014 jsn4NYnJdnI Afar ORG00000013 UFtGyqJMEZh \n",
"14 ORG00000014 jsn4NYnJdnI Afar ORG00000013 UFtGyqJMEZh \n",
"15 ORG00000014 jsn4NYnJdnI Afar ORG00000013 UFtGyqJMEZh \n",
"16 ORG00000014 jsn4NYnJdnI Afar ORG00000013 UFtGyqJMEZh \n",
"17 ORG00000014 jsn4NYnJdnI Afar ORG00000013 UFtGyqJMEZh \n",
"18 ORG00000014 jsn4NYnJdnI Afar ORG00000013 UFtGyqJMEZh \n",
"19 ORG00000014 jsn4NYnJdnI Afar ORG00000013 UFtGyqJMEZh "
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ntd_orgs.head(20)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"\n"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"# the old way\n",
"def find_admin3(district):\n",
" for index, ntd_row in ntd_orgs.iterrows():\n",
" if ntd_row['Woreda'].lower() == district.lower():\n",
" return ntd_row['Woreda ID']"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"# the newer way. exact match by woreda name. Use a dictionary for looking up woredas\n",
"org_dictionary = {}\n",
"\n",
"for index, ntd_row in ntd_orgs.iterrows():\n",
" org_dictionary[ntd_row['Woreda'].lower()] = ntd_row['Woreda ID']\n",
" \n",
" \n",
"def find_admin3(district):\n",
" \n",
" if district.lower() in org_dictionary:\n",
" return org_dictionary[district.lower()]\n"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"# fuzzy match by woreda name\n",
"# returns a dict with the matched ou and the highest score \n",
"\n",
"from fuzzywuzzy import fuzz\n",
"from fuzzywuzzy import process\n",
"\n",
"\n",
"# This is REALLY imported used in step 4 and 5 below\n",
"ou_match_threshold = 86\n",
"\n",
"# using fuzzy\n",
"def find_admin3(district):\n",
" highscore = {\"uid\": \"00000000\",\"score\":0}\n",
" for index, ntd_row in ntd_orgs.iterrows(): \n",
" score = fuzz.ratio(ntd_row['Woreda'].lower(), district.lower())\n",
" if score >= highscore['score']:\n",
" highscore = {\"uid\":ntd_row['Woreda ID'],\"score\":score}\n",
" \n",
" return highscore\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"there with a match score of 91\n"
]
}
],
"source": [
"# test this out\n",
"a=find_admin3(\"Gumer\")\n",
"if a:\n",
" print('there with a match score of {}'.format(a['score']))\n",
"else:\n",
" print('not there')\n"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"86"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fuzz.ratio(\"Annalemo\".lower(), \"Anlemo\".lower())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Step 3: Write out OU mapping file\n",
"Determine how many IUs can be mapped to org units in the database"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Progress: [####################] 100.0%\n"
]
}
],
"source": [
"# Write out to a CSV file\n",
"with open('espen-forecasted-ous.csv', 'w') as analysis_data_export:\n",
" \n",
" fieldnames = ['admin1_name','admin2_name', 'admin3_name', 'disease', 'NTD UID','Match Ratio']\n",
" writer = csv.DictWriter(analysis_data_export, fieldnames=fieldnames)\n",
" writer.writeheader()\n",
" \n",
" for index,record in enumerate(total_results):\n",
" \n",
" match = find_admin3(record['IU_Name'])\n",
" \n",
" writer.writerow({\n",
" 'admin1_name': record['Country'],\n",
" 'admin2_name': record['Province_Region'],\n",
" 'admin3_name' : record['IU_Name'],\n",
" 'disease' : record['disease'],\n",
" 'NTD UID': match['uid'],\n",
" 'Match Ratio' : match['score']\n",
" })\n",
" \n",
" update_progress(index / len(total_results))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Step 4: Create the import file for MDA Forecasted\n",
" The idea is to create a file that looks like this:\n",
"\n",
"\n",
"```xml\n",
"{\n",
" \"dataValues\": [\n",
" {\n",
" \"dataElement\": \"hygwN3AetL9\", // MDA Forecasted\n",
" \"period\": \"2020\", \n",
" \"orgUnit\": \"lgZ6HfZaj3f\", \n",
" \"value\": \"True\", \n",
" \"categoryOptionCombo\" : \"pepMNQRIOA0\" \n",
" }, \n",
" {\n",
" \"dataElement\": \"hygwN3AetL9\", // MDA Forecasted\n",
" \"period\": \"2021\", \n",
" \"orgUnit\": \"zHa2ohFrpPM\", \n",
" \"value\": \"high-endemicity\", \n",
" \"categoryOptionCombo\" : \"pepMNQRIOA0\" \n",
" }, \n",
" ```"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [],
"source": [
"# these are the currently forecasted years\n",
"mda_lables = ['MDA_2020','MDA_2021','MDA_2022','MDA_2023','MDA_2024','MDA_2025','MDA_2026','MDA_2027','MDA_2028','MDA_2029','MDA_2030']"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'MDA', 'NE', None}"
]
},
"execution_count": 54,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Get all of the possible MDA values\n",
"temp = set()\n",
"for n in total_mda_results:\n",
" for i in mda_lables:\n",
" if i in n : #if 'IA2024' in n and n['IA2024'] is not '':\n",
" temp.add(n[i])\n",
" \n",
"temp"
]
},
{
"cell_type": "code",
"execution_count": 73,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Progress: [####################] 100.0%\n"
]
}
],
"source": [
"datavalues = []\n",
"unmaped_ous = set()\n",
"\n",
"for index, record in enumerate(total_mda_results):\n",
" \n",
" # only import data for woredas that can be found\n",
" match = find_admin3(record['IU_Name'])\n",
" if match['score'] >= ou_match_threshold:\n",
" \n",
" for label in mda_lables:\n",
" \n",
" # make sure that this record includes this data, because sometimes it doesn't\n",
" if label in record and record[label] is not None: # and ignore empty values (i.e. IU_NAME: None)\n",
"\n",
" # MDA is forecasted when set to 'MDA', ignore for NE\n",
" if record[label]==\"MDA\":\n",
" \n",
" element = {\n",
" \"dataElement\": \"hygwN3AetL9\",\n",
" \"period\": label[-4:], \n",
" \"orgUnit\": match['uid'],\n",
" \"value\": \"True\",\n",
" \"categoryOptionCombo\" : get_disease_category_option(record['disease'] )\n",
" }\n",
" datavalues.append(element)\n",
"\n",
" \n",
" else:\n",
" unmaped_ous.add(record['IU_Name'])\n",
" \n",
" update_progress(index / len(total_mda_results))\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 74,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Of the potential 3811 records from the API to be loaded in, there are 8860 yearly forecasts to be imported and 127 woredas that could not be mapped\n"
]
}
],
"source": [
"# print what we know\n",
"print(\"Of the potential {}\".format(len(total_mda_results)) +\" records from the API to be loaded in, there are {}\"\\\n",
" .format(len(datavalues)) + \" yearly forecasts to be imported and {}\"\\\n",
" .format(len(unmaped_ous)) + \" woredas that could not be mapped\")\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 75,
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"\n",
"# Add these things\n",
"data = {}\n",
"data['dataValues'] = datavalues\n",
"\n",
"with open('mda-results-importing.json', 'w') as outfile:\n",
" json.dump(data, outfile, ensure_ascii=False, indent=4)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Step 5: Create the import file for IA Forecasted\n",
" The idea is to create a file that looks like this:\n",
"\n",
"\n",
"```xml\n",
"{\n",
" \"dataValues\": [\n",
" {\n",
" \"dataElement\": \"nAEO6eb7MdX\", // IA Forecasted\n",
" \"period\": \"2020\", \n",
" \"orgUnit\": \"lgZ6HfZaj3f\", \n",
" \"value\": \"True\", \n",
" \"categoryOptionCombo\" : \"V0BMspy4wZa\" \n",
" }, \n",
" {\n",
" \"dataElement\": \"nAEO6eb7MdX\", // IA Forecasted\n",
" \"period\": \"2021\", \n",
" \"orgUnit\": \"lgZ6HfZaj3f\", \n",
" \"value\": \"True\", \n",
" \"categoryOptionCombo\" : \"V0BMspy4wZa\" \n",
" }, \n",
" ```\n",
" \n",
" "
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"# these are the currently forecasted years\n",
"ia_lables = ['IA_2020','IA_2021','IA_2022','IA_2023','IA_2024','IA_2025','IA_2026','IA_2027','IA_2028','IA_2029','IA_2030','IA2020','IA2021','IA2022','IA2023','IA2024','IA2025','IA2026','IA2027','IA2028','IA2029','IA2030']\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2020 should be 2020\n",
"2020 should also be 2020\n"
]
}
],
"source": [
"# IA_2020 should return 2020 and IA2020 should also return 2020\n",
"print(\"{}\".format('IA2020'[-4:])+\" should be 2020\")\n",
"print(\"{}\".format('IA_2020'[-4:])+\" should also be 2020\")"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'IA', 'Mapping', 'Pre-TAS', 'Surveillance', 'TAS 1', 'TAS 2', 'TAS 3', 'TIS'}"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Get all of the possible IA values\n",
"temp = set()\n",
"for n in total_ia_results:\n",
" for i in ia_lables:\n",
" if i in n \\\n",
" and n[i] is not None \\\n",
" and n[i] is not '': #if 'IA2024' in n and n['IA2024'] is not '':\n",
" temp.add(n[i])\n",
" \n",
"temp"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Progress: [####################] 100.0%\n"
]
}
],
"source": [
"datavalues = []\n",
"unmaped_ous = set()\n",
"\n",
"for index, record in enumerate(total_ia_results):\n",
" \n",
" # only import data for woredas that can be found\n",
" match = find_admin3(record['IU_Name'])\n",
" if match['score'] >= ou_match_threshold:\n",
" \n",
" for label in ia_lables:\n",
"\n",
" # make sure that this record includes this data, because sometimes it doesn't\n",
" if label in record and record[label] is not None: # and ignore empty values (i.e. IU_NAME: None)\n",
"\n",
" element = {\n",
" \"dataElement\": \"nAEO6eb7MdX\",\n",
" \"period\": label[-4:], \n",
" \"orgUnit\": match['uid'],\n",
" \"value\": \"True\",\n",
" \"categoryOptionCombo\" : get_disease_category_option(record['disease'] )\n",
" }\n",
" datavalues.append(element)\n",
" \n",
" else:\n",
" unmaped_ous.add(record['IU_Name'])\n",
" \n",
" update_progress(index / len(total_ia_results))\n",
" \n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Of the potential 2947 records from the API to be loaded in, there are 25635 yearly forecasts to be imported and 127 woredas that could not be mapped\n"
]
}
],
"source": [
"# print what we know\n",
"print(\"Of the potential {}\".format(len(total_ia_results)) +\" records from the API to be loaded in, there are {}\"\\\n",
" .format(len(datavalues)) + \" yearly forecasts to be imported and {}\"\\\n",
" .format(len(unmaped_ous)) + \" woredas that could not be mapped\")\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"\n",
"# Add these things\n",
"data = {}\n",
"data['dataValues'] = datavalues\n",
"\n",
"with open('ia-results-importing.json', 'w') as outfile:\n",
" json.dump(data, outfile, ensure_ascii=False, indent=4)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Step 6: Create the import file for IA Forecasted Type\n",
" The idea is to create a file that looks like this:\n",
"\n",
"\n",
"```xml\n",
"{\n",
" \"dataValues\": [\n",
" {\n",
" \"dataElement\": \"nAEO6eb7MdX\", // IA Forecasted\n",
" \"period\": \"2020\", \n",
" \"orgUnit\": \"lgZ6HfZaj3f\", \n",
" \"value\": \"True\", \n",
" \"categoryOptionCombo\" : \"V0BMspy4wZa\" \n",
" }, \n",
" {\n",
" \"dataElement\": \"nAEO6eb7MdX\", // IA Forecasted\n",
" \"period\": \"2021\", \n",
" \"orgUnit\": \"lgZ6HfZaj3f\", \n",
" \"value\": \"True\", \n",
" \"categoryOptionCombo\" : \"V0BMspy4wZa\" \n",
" }, \n",
" ```\n",
" \n",
" "
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"# returns the option value that DHIS2 needs\n",
"def mapAssessmentType(value):\n",
" if value == \"IA\":\n",
" return \"IA\"\n",
" elif value == \"Mapping\":\n",
" return \"Mapping\"\n",
" elif value == \"Pre-TAS\":\n",
" return \"Pre TAS\"\n",
" elif value == \"TAS 1\":\n",
" return \"TAS 1\"\n",
" elif value == \"TAS 2\":\n",
" return \"TAS 2\"\n",
" elif value == \"TAS 3\":\n",
" return \"TAS 3\"\n",
" elif value == \"Surveillance\":\n",
" return \"Surveillance\"\n",
" elif value == \"TIS\":\n",
" return \"TIS\"\n",
" else:\n",
" return value+\" does not exist\"\n",
" \n",
" "
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Progress: [####################] 100.0%\n",
"there are 2984 elements added\n"
]
}
],
"source": [
"datavalues = []\n",
"unmaped_ous = set()\n",
"\n",
"for index, record in enumerate(total_ia_results):\n",
" \n",
" # only import data for woredas that can be found\n",
" match = find_admin3(record['IU_Name'])\n",
" if match['score'] >= ou_match_threshold:\n",
" \n",
" for label in ia_lables:\n",
" \n",
" # make sure that this record includes this data, because sometimes it doesn't\n",
" if label in record \\\n",
" and record[label] is not None \\\n",
" and record[label] is not '': # and ignore empty values (i.e. IU_NAME: '')\n",
"\n",
" element = {\n",
" \"dataElement\": \"ja31hcRGTci\",\n",
" \"period\": label[-4:], \n",
" \"orgUnit\": match['uid'],\n",
" \"value\": mapAssessmentType(record[label]),\n",
" \"categoryOptionCombo\" : get_disease_category_option(record['disease'] )\n",
" }\n",
" datavalues.append(element)\n",
" \n",
" else:\n",
" unmaped_ous.add(record['IU_Name'])\n",
" \n",
" update_progress(index / len(total_ia_results))\n",
" \n",
"\n",
"print(\"there are {}\".format(len(datavalues))+ \" elements added\")"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"\n",
"# Add these things\n",
"data = {}\n",
"data['dataValues'] = datavalues\n",
"\n",
"with open('ia-types-importing.json', 'w') as outfile:\n",
" json.dump(data, outfile, ensure_ascii=False, indent=4)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "rti",
"language": "python",
"name": "rti"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.10"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment