adam704a/Forecast API.ipynb

## Forecast API.ipynb
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Overview\n",
    "This notebook uses the *new* ESPEN Forecast API. There are 3 data elements that are being imported:\n",
    "\n",
    "* MDA Forecasted \n",
    "* IA Forecasted\n",
    "* IA Type\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import requests\n",
    "import csv\n",
    "\n",
    "TOKEN=\"1234\" # use this later\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# looks at the json result and determines if the estimate has been provided (for any of the years)\n",
    "def has_ia_estimates(result):\n",
    "    if 'IA_2020' in result:\n",
    "        if result['IA_2020'] is not None \\\n",
    "        or result['IA_2021'] is not None \\\n",
    "        or result['IA_2022'] is not None \\\n",
    "        or result['IA_2023'] is not None \\\n",
    "        or result['IA_2024'] is not None \\\n",
    "        or result['IA_2025'] is not None \\\n",
    "        or result['IA_2026'] is not None \\\n",
    "        or result['IA_2027'] is not None \\\n",
    "        or result['IA_2028'] is not None \\\n",
    "        or result['IA_2029'] is not None \\\n",
    "        or result['IA_2030'] is not None :\n",
    "            return True\n",
    "        else:\n",
    "            return False\n",
    "    # funny thing is the sch and trachoma return ia keys without an underscore, so checking here \n",
    "    # TODO: ask standard code\n",
    "    elif 'IA2020' in result:\n",
    "        if result['IA2020'] is not None \\\n",
    "        or result['IA2021'] is not None \\\n",
    "        or result['IA2022'] is not None \\\n",
    "        or result['IA2023'] is not None \\\n",
    "        or result['IA2024'] is not None \\\n",
    "        or result['IA2025'] is not None \\\n",
    "        or result['IA2026'] is not None \\\n",
    "        or result['IA2027'] is not None \\\n",
    "        or result['IA2028'] is not None \\\n",
    "        or result['IA2029'] is not None \\\n",
    "        or result['IA2030'] is not None :\n",
    "            return True\n",
    "        else:\n",
    "            return False\n",
    "    else:\n",
    "        return False\n",
    "\n",
    "# see if any of the years has estimates for mda\n",
    "def has_mda_estimates(result):\n",
    "    if result['MDA_2020'] is not None \\\n",
    "    or result['MDA_2021'] is not None \\\n",
    "    or result['MDA_2022'] is not None \\\n",
    "    or result['MDA_2023'] is not None \\\n",
    "    or result['MDA_2024'] is not None \\\n",
    "    or result['MDA_2025'] is not None \\\n",
    "    or result['MDA_2026'] is not None \\\n",
    "    or result['MDA_2027'] is not None \\\n",
    "    or result['MDA_2028'] is not None \\\n",
    "    or result['MDA_2029'] is not None \\\n",
    "    or result['MDA_2030'] is not None:\n",
    "        return True\n",
    "    else:\n",
    "        return False\n",
    "\n",
    "# look up cateogry option, these are from what is set up in D\n",
    "def get_disease_category_option(disease):\n",
    "    \n",
    "    if disease == \"lf\":\n",
    "        return \"V0BMspy4wZa\"\n",
    "    \n",
    "    elif disease == \"sth\":\n",
    "        return \"kZdL7Ru8FXQ\"\n",
    "    \n",
    "    elif disease == \"sch\":\n",
    "        return \"VvPBXlaJZU7\"\n",
    "    \n",
    "    elif disease == \"oncho\":\n",
    "        return \"tTPp1nAkGP7\"\n",
    "    \n",
    "    elif disease == \"trachoma\":\n",
    "        return \"HSnNEBpKtmU\"\n",
    "    \n",
    "    else:\n",
    "        return \"missing\"\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# from here: https://www.mikulskibartosz.name/how-to-display-a-progress-bar-in-jupyter-notebook/\n",
    "import time, sys\n",
    "from IPython.display import clear_output\n",
    "\n",
    "def update_progress(progress):\n",
    "    bar_length = 20\n",
    "    if isinstance(progress, int):\n",
    "        progress = float(progress)\n",
    "    if not isinstance(progress, float):\n",
    "        progress = 0\n",
    "    if progress < 0:\n",
    "        progress = 0\n",
    "    if progress >= 1:\n",
    "        progress = 1\n",
    "        \n",
    "    block = int(round(bar_length * progress))\n",
    "    \n",
    "    clear_output(wait = True)\n",
    "    text = \"Progress: [{0}] {1:.1f}%\".format( \"#\" * block + \"-\" * (bar_length - block), progress * 100)\n",
    "    print(text)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 1: Import data from ESPEN API\n",
    "Not all responses from the Forecast API will include the MDA and IA information, which is why only certain records are retained here as *total_mda_results* and *total_ia_results* respectively."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "there are 937 records for lf where 937 districts have MDA estimates and 79 districts have IU estimates\n",
      "there are 937 records for sth where 937 districts have MDA estimates and 937 districts have IU estimates\n",
      "there are 937 records for sch where 937 districts have MDA estimates and 937 districts have IU estimates\n",
      "there are 937 records for oncho where 937 districts have MDA estimates and 937 districts have IU estimates\n",
      "there are 937 records for trachoma where 63 districts have MDA estimates and 57 districts have IU estimates\n",
      "--------\n",
      "there are 4685 records across diseases\n",
      "there are 3811 total_mda_results and 2947 total_ia_results results\n"
     ]
    }
   ],
   "source": [
    "diseases = ['lf','sth', 'sch', 'oncho','trachoma'] # this can be lf, sth, sch, trachoma, oncho\n",
    "\n",
    "total_results = []\n",
    "total_ia_results = []\n",
    "total_mda_results = []\n",
    "\n",
    "for disease in diseases: # for each of the diseases\n",
    "   \n",
    "    r1 = requests.get('https://admin.espen.afro.who.int/api/data?iso2=ET&disease='+ disease +'&level=iu&type=forecast')\n",
    "    iu_results = r1.json()\n",
    "    \n",
    "    # add to total\n",
    "    total_results = total_results + iu_results\n",
    "    \n",
    "    # initialize disease specific estimates\n",
    "    ia_estimates = []\n",
    "    mda_estimates = []\n",
    "    \n",
    "    # go through each IU\n",
    "    for index, iu_result in enumerate(iu_results):\n",
    "        \n",
    "        # add disease variable\n",
    "        iu_result['disease'] = disease\n",
    "        \n",
    "        # keep track of impact assessments and MDA estimates seperatey\n",
    "        if has_ia_estimates(iu_result): \n",
    "            ia_estimates.append(iu_result)\n",
    "        \n",
    "        if has_mda_estimates(iu_result): \n",
    "            mda_estimates.append(iu_result)\n",
    "            \n",
    "    print(\"there are {}\".format(len(iu_results))+\" records for \"+ disease +\" where {}\".format(len(mda_estimates)) + \" districts have MDA \\\n",
    "estimates and {}\".format(len(ia_estimates)) +\" districts have IU estimates\")\n",
    "    \n",
    "    total_ia_results = total_ia_results + ia_estimates\n",
    "    total_mda_results = total_mda_results + mda_estimates \n",
    "\n",
    "print(\"--------\")\n",
    "print(\"there are {}\".format(len(total_results))+\" records across diseases\")\n",
    "print(\"there are {}\".format(len(total_mda_results))+\" total_mda_results and {}\".format(len(total_ia_results)) + \" total_ia_results results\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {},
   "outputs": [],
   "source": [
    "# write out to these to files\n",
    "total_mda_df = pd.DataFrame(total_mda_results)\n",
    "total_mda_df.to_csv('mda-export.csv', index=True)\n",
    "\n",
    "total_ia_df = pd.DataFrame(total_ia_results)\n",
    "total_ia_df.to_csv('ia-export.csv', index=True)  "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 2: Load in current orgs from database.\n",
    " This will be used later to determine there is a match in the [NTDDB](https://ethiopia.integratedntddb.org/)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "\n",
    "ntd_orgs = pd.read_excel('../integrated_db_org_list.xlsx')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Woreda</th>\n",
       "      <th>Woreda ID</th>\n",
       "      <th>HMIS Woreda Name</th>\n",
       "      <th>Zone</th>\n",
       "      <th>Zone ID</th>\n",
       "      <th>HMIS Zone ID</th>\n",
       "      <th>Region</th>\n",
       "      <th>Region ID</th>\n",
       "      <th>HMIS Region ID</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Addis Ketema Subcity</td>\n",
       "      <td>ORG00000009</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Addis Ababa</td>\n",
       "      <td>ORG00000002</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Addis Ababa</td>\n",
       "      <td>ORG00000001</td>\n",
       "      <td>yY9BLUUegel</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Akaki Kaliti Subcity</td>\n",
       "      <td>ORG00000008</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Addis Ababa</td>\n",
       "      <td>ORG00000002</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Addis Ababa</td>\n",
       "      <td>ORG00000001</td>\n",
       "      <td>yY9BLUUegel</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Arada Subcity</td>\n",
       "      <td>ORG00000005</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Addis Ababa</td>\n",
       "      <td>ORG00000002</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Addis Ababa</td>\n",
       "      <td>ORG00000001</td>\n",
       "      <td>yY9BLUUegel</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Bolle Subcity</td>\n",
       "      <td>ORG00000007</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Addis Ababa</td>\n",
       "      <td>ORG00000002</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Addis Ababa</td>\n",
       "      <td>ORG00000001</td>\n",
       "      <td>yY9BLUUegel</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Gulelie Subcity</td>\n",
       "      <td>ORG00000003</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Addis Ababa</td>\n",
       "      <td>ORG00000002</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Addis Ababa</td>\n",
       "      <td>ORG00000001</td>\n",
       "      <td>yY9BLUUegel</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Kirkose Subcity</td>\n",
       "      <td>ORG00000012</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Addis Ababa</td>\n",
       "      <td>ORG00000002</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Addis Ababa</td>\n",
       "      <td>ORG00000001</td>\n",
       "      <td>yY9BLUUegel</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Kolfie Keranio Subcity</td>\n",
       "      <td>ORG00000006</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Addis Ababa</td>\n",
       "      <td>ORG00000002</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Addis Ababa</td>\n",
       "      <td>ORG00000001</td>\n",
       "      <td>yY9BLUUegel</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Lideta Subcity</td>\n",
       "      <td>ORG00000010</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Addis Ababa</td>\n",
       "      <td>ORG00000002</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Addis Ababa</td>\n",
       "      <td>ORG00000001</td>\n",
       "      <td>yY9BLUUegel</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>Nifase Silk Lafto Subcity</td>\n",
       "      <td>ORG00000004</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Addis Ababa</td>\n",
       "      <td>ORG00000002</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Addis Ababa</td>\n",
       "      <td>ORG00000001</td>\n",
       "      <td>yY9BLUUegel</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Yeka Subcity</td>\n",
       "      <td>ORG00000011</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Addis Ababa</td>\n",
       "      <td>ORG00000002</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Addis Ababa</td>\n",
       "      <td>ORG00000001</td>\n",
       "      <td>yY9BLUUegel</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>Abala</td>\n",
       "      <td>ORG00000029</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Awsa</td>\n",
       "      <td>ORG00000014</td>\n",
       "      <td>jsn4NYnJdnI</td>\n",
       "      <td>Afar</td>\n",
       "      <td>ORG00000013</td>\n",
       "      <td>UFtGyqJMEZh</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>Adar</td>\n",
       "      <td>ORG00000018</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Awsa</td>\n",
       "      <td>ORG00000014</td>\n",
       "      <td>jsn4NYnJdnI</td>\n",
       "      <td>Afar</td>\n",
       "      <td>ORG00000013</td>\n",
       "      <td>UFtGyqJMEZh</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>Afambo</td>\n",
       "      <td>ORG00000026</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Awsa</td>\n",
       "      <td>ORG00000014</td>\n",
       "      <td>jsn4NYnJdnI</td>\n",
       "      <td>Afar</td>\n",
       "      <td>ORG00000013</td>\n",
       "      <td>UFtGyqJMEZh</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>Afdera</td>\n",
       "      <td>ORG00000023</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Awsa</td>\n",
       "      <td>ORG00000014</td>\n",
       "      <td>jsn4NYnJdnI</td>\n",
       "      <td>Afar</td>\n",
       "      <td>ORG00000013</td>\n",
       "      <td>UFtGyqJMEZh</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>Amibara</td>\n",
       "      <td>ORG00000020</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Awsa</td>\n",
       "      <td>ORG00000014</td>\n",
       "      <td>jsn4NYnJdnI</td>\n",
       "      <td>Afar</td>\n",
       "      <td>ORG00000013</td>\n",
       "      <td>UFtGyqJMEZh</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>Aregoba</td>\n",
       "      <td>ORG00000017</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Awsa</td>\n",
       "      <td>ORG00000014</td>\n",
       "      <td>jsn4NYnJdnI</td>\n",
       "      <td>Afar</td>\n",
       "      <td>ORG00000013</td>\n",
       "      <td>UFtGyqJMEZh</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>Awash fentale</td>\n",
       "      <td>ORG00000021</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Awsa</td>\n",
       "      <td>ORG00000014</td>\n",
       "      <td>jsn4NYnJdnI</td>\n",
       "      <td>Afar</td>\n",
       "      <td>ORG00000013</td>\n",
       "      <td>UFtGyqJMEZh</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>Awash Town</td>\n",
       "      <td>ORG00000015</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Awsa</td>\n",
       "      <td>ORG00000014</td>\n",
       "      <td>jsn4NYnJdnI</td>\n",
       "      <td>Afar</td>\n",
       "      <td>ORG00000013</td>\n",
       "      <td>UFtGyqJMEZh</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>Awra</td>\n",
       "      <td>ORG00000028</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Awsa</td>\n",
       "      <td>ORG00000014</td>\n",
       "      <td>jsn4NYnJdnI</td>\n",
       "      <td>Afar</td>\n",
       "      <td>ORG00000013</td>\n",
       "      <td>UFtGyqJMEZh</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>Aysaeta</td>\n",
       "      <td>ORG00000027</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Awsa</td>\n",
       "      <td>ORG00000014</td>\n",
       "      <td>jsn4NYnJdnI</td>\n",
       "      <td>Afar</td>\n",
       "      <td>ORG00000013</td>\n",
       "      <td>UFtGyqJMEZh</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                       Woreda    Woreda ID  HMIS Woreda Name         Zone  \\\n",
       "0        Addis Ketema Subcity  ORG00000009               NaN  Addis Ababa   \n",
       "1        Akaki Kaliti Subcity  ORG00000008               NaN  Addis Ababa   \n",
       "2               Arada Subcity  ORG00000005               NaN  Addis Ababa   \n",
       "3               Bolle Subcity  ORG00000007               NaN  Addis Ababa   \n",
       "4             Gulelie Subcity  ORG00000003               NaN  Addis Ababa   \n",
       "5             Kirkose Subcity  ORG00000012               NaN  Addis Ababa   \n",
       "6      Kolfie Keranio Subcity  ORG00000006               NaN  Addis Ababa   \n",
       "7              Lideta Subcity  ORG00000010               NaN  Addis Ababa   \n",
       "8   Nifase Silk Lafto Subcity  ORG00000004               NaN  Addis Ababa   \n",
       "9                Yeka Subcity  ORG00000011               NaN  Addis Ababa   \n",
       "10                      Abala  ORG00000029               NaN         Awsa   \n",
       "11                       Adar  ORG00000018               NaN         Awsa   \n",
       "12                     Afambo  ORG00000026               NaN         Awsa   \n",
       "13                     Afdera  ORG00000023               NaN         Awsa   \n",
       "14                    Amibara  ORG00000020               NaN         Awsa   \n",
       "15                    Aregoba  ORG00000017               NaN         Awsa   \n",
       "16              Awash fentale  ORG00000021               NaN         Awsa   \n",
       "17                 Awash Town  ORG00000015               NaN         Awsa   \n",
       "18                       Awra  ORG00000028               NaN         Awsa   \n",
       "19                    Aysaeta  ORG00000027               NaN         Awsa   \n",
       "\n",
       "        Zone ID HMIS Zone ID       Region    Region ID HMIS Region ID  \n",
       "0   ORG00000002          NaN  Addis Ababa  ORG00000001    yY9BLUUegel  \n",
       "1   ORG00000002          NaN  Addis Ababa  ORG00000001    yY9BLUUegel  \n",
       "2   ORG00000002          NaN  Addis Ababa  ORG00000001    yY9BLUUegel  \n",
       "3   ORG00000002          NaN  Addis Ababa  ORG00000001    yY9BLUUegel  \n",
       "4   ORG00000002          NaN  Addis Ababa  ORG00000001    yY9BLUUegel  \n",
       "5   ORG00000002          NaN  Addis Ababa  ORG00000001    yY9BLUUegel  \n",
       "6   ORG00000002          NaN  Addis Ababa  ORG00000001    yY9BLUUegel  \n",
       "7   ORG00000002          NaN  Addis Ababa  ORG00000001    yY9BLUUegel  \n",
       "8   ORG00000002          NaN  Addis Ababa  ORG00000001    yY9BLUUegel  \n",
       "9   ORG00000002          NaN  Addis Ababa  ORG00000001    yY9BLUUegel  \n",
       "10  ORG00000014  jsn4NYnJdnI         Afar  ORG00000013    UFtGyqJMEZh  \n",
       "11  ORG00000014  jsn4NYnJdnI         Afar  ORG00000013    UFtGyqJMEZh  \n",
       "12  ORG00000014  jsn4NYnJdnI         Afar  ORG00000013    UFtGyqJMEZh  \n",
       "13  ORG00000014  jsn4NYnJdnI         Afar  ORG00000013    UFtGyqJMEZh  \n",
       "14  ORG00000014  jsn4NYnJdnI         Afar  ORG00000013    UFtGyqJMEZh  \n",
       "15  ORG00000014  jsn4NYnJdnI         Afar  ORG00000013    UFtGyqJMEZh  \n",
       "16  ORG00000014  jsn4NYnJdnI         Afar  ORG00000013    UFtGyqJMEZh  \n",
       "17  ORG00000014  jsn4NYnJdnI         Afar  ORG00000013    UFtGyqJMEZh  \n",
       "18  ORG00000014  jsn4NYnJdnI         Afar  ORG00000013    UFtGyqJMEZh  \n",
       "19  ORG00000014  jsn4NYnJdnI         Afar  ORG00000013    UFtGyqJMEZh  "
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ntd_orgs.head(20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "# the old way\n",
    "def find_admin3(district):\n",
    "    for index, ntd_row in ntd_orgs.iterrows():\n",
    "        if ntd_row['Woreda'].lower() == district.lower():\n",
    "            return ntd_row['Woreda ID']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "# the newer way. exact match by woreda name. Use a dictionary for looking up woredas\n",
    "org_dictionary = {}\n",
    "\n",
    "for index, ntd_row in ntd_orgs.iterrows():\n",
    "    org_dictionary[ntd_row['Woreda'].lower()] = ntd_row['Woreda ID']\n",
    "    \n",
    "    \n",
    "def find_admin3(district):\n",
    "    \n",
    "    if district.lower() in org_dictionary:\n",
    "        return org_dictionary[district.lower()]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "# fuzzy match by woreda name\n",
    "# returns a dict with the matched ou and the highest score \n",
    "\n",
    "from fuzzywuzzy import fuzz\n",
    "from fuzzywuzzy import process\n",
    "\n",
    "\n",
    "# This is REALLY imported used in step 4 and 5 below\n",
    "ou_match_threshold = 86\n",
    "\n",
    "# using fuzzy\n",
    "def find_admin3(district):\n",
    "    highscore = {\"uid\": \"00000000\",\"score\":0}\n",
    "    for index, ntd_row in ntd_orgs.iterrows():   \n",
    "        score = fuzz.ratio(ntd_row['Woreda'].lower(), district.lower())\n",
    "        if score >= highscore['score']:\n",
    "            highscore = {\"uid\":ntd_row['Woreda ID'],\"score\":score}\n",
    "    \n",
    "    return highscore\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "there with a match score of 91\n"
     ]
    }
   ],
   "source": [
    "# test this out\n",
    "a=find_admin3(\"Gumer\")\n",
    "if a:\n",
    "    print('there with a match score of {}'.format(a['score']))\n",
    "else:\n",
    "    print('not there')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "86"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "fuzz.ratio(\"Annalemo\".lower(), \"Anlemo\".lower())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 3: Write out OU mapping file\n",
    "Determine how many IUs can be mapped to org units in the database"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Progress: [####################] 100.0%\n"
     ]
    }
   ],
   "source": [
    "# Write out to a CSV file\n",
    "with open('espen-forecasted-ous.csv', 'w') as analysis_data_export:\n",
    "    \n",
    "    fieldnames = ['admin1_name','admin2_name', 'admin3_name', 'disease', 'NTD UID','Match Ratio']\n",
    "    writer = csv.DictWriter(analysis_data_export, fieldnames=fieldnames)\n",
    "    writer.writeheader()\n",
    "    \n",
    "    for index,record in enumerate(total_results):\n",
    "        \n",
    "        match = find_admin3(record['IU_Name'])\n",
    "        \n",
    "        writer.writerow({\n",
    "            'admin1_name': record['Country'],\n",
    "            'admin2_name': record['Province_Region'],\n",
    "            'admin3_name' : record['IU_Name'],\n",
    "            'disease' : record['disease'],\n",
    "            'NTD UID': match['uid'],\n",
    "            'Match Ratio' : match['score']\n",
    "        })\n",
    "        \n",
    "        update_progress(index / len(total_results))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 4: Create the import file for MDA Forecasted\n",
    " The idea is to create a file that looks like this:\n",
    "\n",
    "\n",
    "```xml\n",
    "{\n",
    "  \"dataValues\": [\n",
    "    {\n",
    "      \"dataElement\": \"hygwN3AetL9\", // MDA Forecasted\n",
    "      \"period\": \"2020\", \n",
    "      \"orgUnit\": \"lgZ6HfZaj3f\", \n",
    "      \"value\": \"True\",   \n",
    "      \"categoryOptionCombo\" : \"pepMNQRIOA0\" \n",
    "    }, \n",
    "    {\n",
    "      \"dataElement\": \"hygwN3AetL9\", // MDA Forecasted\n",
    "      \"period\": \"2021\", \n",
    "      \"orgUnit\": \"zHa2ohFrpPM\", \n",
    "      \"value\": \"high-endemicity\", \n",
    "      \"categoryOptionCombo\" : \"pepMNQRIOA0\" \n",
    "    }, \n",
    " ```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [],
   "source": [
    "# these are the currently forecasted years\n",
    "mda_lables = ['MDA_2020','MDA_2021','MDA_2022','MDA_2023','MDA_2024','MDA_2025','MDA_2026','MDA_2027','MDA_2028','MDA_2029','MDA_2030']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'MDA', 'NE', None}"
      ]
     },
     "execution_count": 54,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Get all of the possible MDA values\n",
    "temp = set()\n",
    "for n in total_mda_results:\n",
    "    for i in mda_lables:\n",
    "        if i in n : #if 'IA2024' in n and n['IA2024'] is not '':\n",
    "            temp.add(n[i])\n",
    "        \n",
    "temp"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Progress: [####################] 100.0%\n"
     ]
    }
   ],
   "source": [
    "datavalues = []\n",
    "unmaped_ous = set()\n",
    "\n",
    "for index, record in enumerate(total_mda_results):\n",
    "    \n",
    "    # only import data for woredas that can be found\n",
    "    match = find_admin3(record['IU_Name'])\n",
    "    if match['score'] >= ou_match_threshold:\n",
    "        \n",
    "        for label in mda_lables:\n",
    "             \n",
    "            # make sure that this record includes this data, because sometimes it doesn't\n",
    "            if label in record and record[label] is not None: # and ignore empty values (i.e. IU_NAME: None)\n",
    "\n",
    "                # MDA is forecasted when set to 'MDA', ignore for NE\n",
    "                if record[label]==\"MDA\":\n",
    "                    \n",
    "                    element = {\n",
    "                        \"dataElement\": \"hygwN3AetL9\",\n",
    "                        \"period\": label[-4:], \n",
    "                        \"orgUnit\": match['uid'],\n",
    "                        \"value\": \"True\",\n",
    "                        \"categoryOptionCombo\" : get_disease_category_option(record['disease'] )\n",
    "                                   }\n",
    "                    datavalues.append(element)\n",
    "\n",
    "                    \n",
    "    else:\n",
    "        unmaped_ous.add(record['IU_Name'])\n",
    "        \n",
    "    update_progress(index / len(total_mda_results))\n",
    "        "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Of the potential 3811 records from the API to be loaded in, there are 8860 yearly forecasts to be imported and 127 woredas that could not be mapped\n"
     ]
    }
   ],
   "source": [
    "# print what we know\n",
    "print(\"Of the potential {}\".format(len(total_mda_results)) +\" records from the API to be loaded in, there are {}\"\\\n",
    "      .format(len(datavalues)) + \" yearly forecasts to be imported and {}\"\\\n",
    "      .format(len(unmaped_ous)) + \" woredas that could not be mapped\")\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "\n",
    "# Add these things\n",
    "data = {}\n",
    "data['dataValues'] = datavalues\n",
    "\n",
    "with open('mda-results-importing.json', 'w') as outfile:\n",
    "    json.dump(data, outfile, ensure_ascii=False, indent=4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 5: Create the import file for IA Forecasted\n",
    " The idea is to create a file that looks like this:\n",
    "\n",
    "\n",
    "```xml\n",
    "{\n",
    "  \"dataValues\": [\n",
    "    {\n",
    "      \"dataElement\": \"nAEO6eb7MdX\", // IA Forecasted\n",
    "      \"period\": \"2020\", \n",
    "      \"orgUnit\": \"lgZ6HfZaj3f\", \n",
    "      \"value\": \"True\",   \n",
    "      \"categoryOptionCombo\" : \"V0BMspy4wZa\" \n",
    "    }, \n",
    "    {\n",
    "      \"dataElement\": \"nAEO6eb7MdX\", // IA Forecasted\n",
    "      \"period\": \"2021\", \n",
    "      \"orgUnit\": \"lgZ6HfZaj3f\", \n",
    "      \"value\": \"True\", \n",
    "      \"categoryOptionCombo\" : \"V0BMspy4wZa\" \n",
    "    }, \n",
    " ```\n",
    " \n",
    " "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "# these are the currently forecasted years\n",
    "ia_lables = ['IA_2020','IA_2021','IA_2022','IA_2023','IA_2024','IA_2025','IA_2026','IA_2027','IA_2028','IA_2029','IA_2030','IA2020','IA2021','IA2022','IA2023','IA2024','IA2025','IA2026','IA2027','IA2028','IA2029','IA2030']\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2020 should be 2020\n",
      "2020 should also be 2020\n"
     ]
    }
   ],
   "source": [
    "# IA_2020 should return 2020 and IA2020 should also return 2020\n",
    "print(\"{}\".format('IA2020'[-4:])+\" should be 2020\")\n",
    "print(\"{}\".format('IA_2020'[-4:])+\" should also be 2020\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'IA', 'Mapping', 'Pre-TAS', 'Surveillance', 'TAS 1', 'TAS 2', 'TAS 3', 'TIS'}"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Get all of the possible IA values\n",
    "temp = set()\n",
    "for n in total_ia_results:\n",
    "    for i in ia_lables:\n",
    "        if i in n \\\n",
    "        and n[i] is not None \\\n",
    "        and n[i] is not '': #if 'IA2024' in n and n['IA2024'] is not '':\n",
    "            temp.add(n[i])\n",
    "        \n",
    "temp"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Progress: [####################] 100.0%\n"
     ]
    }
   ],
   "source": [
    "datavalues = []\n",
    "unmaped_ous = set()\n",
    "\n",
    "for index, record in enumerate(total_ia_results):\n",
    "    \n",
    "    # only import data for woredas that can be found\n",
    "    match = find_admin3(record['IU_Name'])\n",
    "    if match['score'] >= ou_match_threshold:\n",
    "    \n",
    "        for label in ia_lables:\n",
    "\n",
    "            # make sure that this record includes this data, because sometimes it doesn't\n",
    "            if label in record and record[label] is not None: # and ignore empty values (i.e. IU_NAME: None)\n",
    "\n",
    "                    element = {\n",
    "                        \"dataElement\": \"nAEO6eb7MdX\",\n",
    "                        \"period\": label[-4:], \n",
    "                        \"orgUnit\": match['uid'],\n",
    "                        \"value\": \"True\",\n",
    "                        \"categoryOptionCombo\" : get_disease_category_option(record['disease'] )\n",
    "                                   }\n",
    "                    datavalues.append(element)\n",
    "           \n",
    "    else:\n",
    "        unmaped_ous.add(record['IU_Name'])\n",
    "        \n",
    "    update_progress(index / len(total_ia_results))\n",
    "        \n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Of the potential 2947 records from the API to be loaded in, there are 25635 yearly forecasts to be imported and 127 woredas that could not be mapped\n"
     ]
    }
   ],
   "source": [
    "# print what we know\n",
    "print(\"Of the potential {}\".format(len(total_ia_results)) +\" records from the API to be loaded in, there are {}\"\\\n",
    "      .format(len(datavalues)) + \" yearly forecasts to be imported and {}\"\\\n",
    "      .format(len(unmaped_ous)) + \" woredas that could not be mapped\")\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "\n",
    "# Add these things\n",
    "data = {}\n",
    "data['dataValues'] = datavalues\n",
    "\n",
    "with open('ia-results-importing.json', 'w') as outfile:\n",
    "    json.dump(data, outfile, ensure_ascii=False, indent=4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 6: Create the import file for IA Forecasted Type\n",
    " The idea is to create a file that looks like this:\n",
    "\n",
    "\n",
    "```xml\n",
    "{\n",
    "  \"dataValues\": [\n",
    "    {\n",
    "      \"dataElement\": \"nAEO6eb7MdX\", // IA Forecasted\n",
    "      \"period\": \"2020\", \n",
    "      \"orgUnit\": \"lgZ6HfZaj3f\", \n",
    "      \"value\": \"True\",   \n",
    "      \"categoryOptionCombo\" : \"V0BMspy4wZa\" \n",
    "    }, \n",
    "    {\n",
    "      \"dataElement\": \"nAEO6eb7MdX\", // IA Forecasted\n",
    "      \"period\": \"2021\", \n",
    "      \"orgUnit\": \"lgZ6HfZaj3f\", \n",
    "      \"value\": \"True\", \n",
    "      \"categoryOptionCombo\" : \"V0BMspy4wZa\" \n",
    "    }, \n",
    " ```\n",
    " \n",
    " "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "# returns the option value that DHIS2 needs\n",
    "def mapAssessmentType(value):\n",
    "    if value == \"IA\":\n",
    "        return \"IA\"\n",
    "    elif value == \"Mapping\":\n",
    "        return \"Mapping\"\n",
    "    elif value == \"Pre-TAS\":\n",
    "        return \"Pre TAS\"\n",
    "    elif value == \"TAS 1\":\n",
    "        return \"TAS 1\"\n",
    "    elif value == \"TAS 2\":\n",
    "        return \"TAS 2\"\n",
    "    elif value == \"TAS 3\":\n",
    "        return \"TAS 3\"\n",
    "    elif value == \"Surveillance\":\n",
    "        return \"Surveillance\"\n",
    "    elif value == \"TIS\":\n",
    "        return \"TIS\"\n",
    "    else:\n",
    "        return value+\" does not exist\"\n",
    "    \n",
    "         "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Progress: [####################] 100.0%\n",
      "there are 2984 elements added\n"
     ]
    }
   ],
   "source": [
    "datavalues = []\n",
    "unmaped_ous = set()\n",
    "\n",
    "for index, record in enumerate(total_ia_results):\n",
    "    \n",
    "    # only import data for woredas that can be found\n",
    "    match = find_admin3(record['IU_Name'])\n",
    "    if match['score'] >= ou_match_threshold:\n",
    "    \n",
    "        for label in ia_lables:\n",
    "              \n",
    "            # make sure that this record includes this data, because sometimes it doesn't\n",
    "            if label in record \\\n",
    "            and record[label] is not None \\\n",
    "            and record[label] is not '': # and ignore empty values (i.e. IU_NAME: '')\n",
    "\n",
    "                element = {\n",
    "                    \"dataElement\": \"ja31hcRGTci\",\n",
    "                    \"period\": label[-4:], \n",
    "                    \"orgUnit\": match['uid'],\n",
    "                    \"value\": mapAssessmentType(record[label]),\n",
    "                    \"categoryOptionCombo\" : get_disease_category_option(record['disease'] )\n",
    "                               }\n",
    "                datavalues.append(element)\n",
    "           \n",
    "    else:\n",
    "        unmaped_ous.add(record['IU_Name'])\n",
    "        \n",
    "    update_progress(index / len(total_ia_results))\n",
    "        \n",
    "\n",
    "print(\"there are {}\".format(len(datavalues))+ \" elements added\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "\n",
    "# Add these things\n",
    "data = {}\n",
    "data['dataValues'] = datavalues\n",
    "\n",
    "with open('ia-types-importing.json', 'w') as outfile:\n",
    "    json.dump(data, outfile, ensure_ascii=False, indent=4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "rti",
   "language": "python",
   "name": "rti"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}