Created
April 4, 2020 20:38
-
-
Save aflaxman/a88ec3c46ccbe750ed7e72ceaf4479bc to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Sat Apr 4 13:36:42 PDT 2020\r\n" | |
] | |
} | |
], | |
"source": [ | |
"import numpy as np, matplotlib.pyplot as plt, pandas as pd\n", | |
"pd.set_option('display.max_rows', 8)\n", | |
"!date\n", | |
"\n", | |
"%load_ext autoreload\n", | |
"%autoreload 2" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Extract and transform IHME COVID-19 Projections so they can be used as input for the ICL COVID-19 Model\n", | |
"\n", | |
"Target format: csv with columns `Date, Deaths, Cases, Infection-Fatality Ratio`" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"/snfs1/Project/simulation_science/covid/data\n" | |
] | |
} | |
], | |
"source": [ | |
"%cd /home/j/Project/simulation_science/covid/data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"--2020-04-02 20:12:21-- https://ihmecovid19storage.blob.core.windows.net/latest/ihme-covid19.zip\n", | |
"Resolving ihmecovid19storage.blob.core.windows.net (ihmecovid19storage.blob.core.windows.net)... 52.239.236.68\n", | |
"Connecting to ihmecovid19storage.blob.core.windows.net (ihmecovid19storage.blob.core.windows.net)|52.239.236.68|:443... connected.\n", | |
"HTTP request sent, awaiting response... 200 OK\n", | |
"Length: 669460 (654K) [application/octet-stream]\n", | |
"Saving to: ‘ihme-covid19.zip’\n", | |
"\n", | |
"100%[======================================>] 669,460 --.-K/s in 0.03s \n", | |
"\n", | |
"2020-04-02 20:12:21 (23.5 MB/s) - ‘ihme-covid19.zip’ saved [669460/669460]\n", | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"!wget https://ihmecovid19storage.blob.core.windows.net/latest/ihme-covid19.zip" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Archive: ihme-covid19.zip\n", | |
" creating: 2020_04_01.2/\n", | |
" inflating: 2020_04_01.2/Hospitalization_all_locs.csv \n", | |
" inflating: 2020_04_01.2/IHME_COVID_19_Data_Release_Information_Sheet_II.pdf \n" | |
] | |
} | |
], | |
"source": [ | |
"!unzip ihme-covid19.zip" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>V1</th>\n", | |
" <th>location</th>\n", | |
" <th>date</th>\n", | |
" <th>allbed_mean</th>\n", | |
" <th>allbed_lower</th>\n", | |
" <th>allbed_upper</th>\n", | |
" <th>ICUbed_mean</th>\n", | |
" <th>ICUbed_lower</th>\n", | |
" <th>ICUbed_upper</th>\n", | |
" <th>InvVen_mean</th>\n", | |
" <th>...</th>\n", | |
" <th>totdea_mean</th>\n", | |
" <th>totdea_lower</th>\n", | |
" <th>totdea_upper</th>\n", | |
" <th>bedover_mean</th>\n", | |
" <th>bedover_lower</th>\n", | |
" <th>bedover_upper</th>\n", | |
" <th>icuover_mean</th>\n", | |
" <th>icuover_lower</th>\n", | |
" <th>icuover_upper</th>\n", | |
" <th>location_name</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>Wyoming</td>\n", | |
" <td>2020-02-06</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>...</td>\n", | |
" <td>0.000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.00</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>Wyoming</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>2</td>\n", | |
" <td>Wyoming</td>\n", | |
" <td>2020-02-07</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>...</td>\n", | |
" <td>0.000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.00</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>Wyoming</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>3</td>\n", | |
" <td>Wyoming</td>\n", | |
" <td>2020-02-08</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>...</td>\n", | |
" <td>0.000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.00</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>Wyoming</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>4</td>\n", | |
" <td>Wyoming</td>\n", | |
" <td>2020-02-09</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>...</td>\n", | |
" <td>0.000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.00</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>Wyoming</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9951</th>\n", | |
" <td>178</td>\n", | |
" <td>Alabama</td>\n", | |
" <td>2020-08-01</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>...</td>\n", | |
" <td>5516.046</td>\n", | |
" <td>849.0</td>\n", | |
" <td>9624.05</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>Alabama</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9952</th>\n", | |
" <td>179</td>\n", | |
" <td>Alabama</td>\n", | |
" <td>2020-08-02</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>...</td>\n", | |
" <td>5516.046</td>\n", | |
" <td>849.0</td>\n", | |
" <td>9624.05</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>Alabama</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9953</th>\n", | |
" <td>180</td>\n", | |
" <td>Alabama</td>\n", | |
" <td>2020-08-03</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>...</td>\n", | |
" <td>5516.046</td>\n", | |
" <td>849.0</td>\n", | |
" <td>9624.05</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>Alabama</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9954</th>\n", | |
" <td>181</td>\n", | |
" <td>Alabama</td>\n", | |
" <td>2020-08-04</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>...</td>\n", | |
" <td>5516.046</td>\n", | |
" <td>849.0</td>\n", | |
" <td>9624.05</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>Alabama</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>9955 rows × 31 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" V1 location date allbed_mean allbed_lower allbed_upper \\\n", | |
"0 1 Wyoming 2020-02-06 0.0 0.0 0.0 \n", | |
"1 2 Wyoming 2020-02-07 0.0 0.0 0.0 \n", | |
"2 3 Wyoming 2020-02-08 0.0 0.0 0.0 \n", | |
"3 4 Wyoming 2020-02-09 0.0 0.0 0.0 \n", | |
"... ... ... ... ... ... ... \n", | |
"9951 178 Alabama 2020-08-01 0.0 0.0 0.0 \n", | |
"9952 179 Alabama 2020-08-02 0.0 0.0 0.0 \n", | |
"9953 180 Alabama 2020-08-03 0.0 0.0 0.0 \n", | |
"9954 181 Alabama 2020-08-04 0.0 0.0 0.0 \n", | |
"\n", | |
" ICUbed_mean ICUbed_lower ICUbed_upper InvVen_mean ... totdea_mean \\\n", | |
"0 0.0 0.0 0.0 0.0 ... 0.000 \n", | |
"1 0.0 0.0 0.0 0.0 ... 0.000 \n", | |
"2 0.0 0.0 0.0 0.0 ... 0.000 \n", | |
"3 0.0 0.0 0.0 0.0 ... 0.000 \n", | |
"... ... ... ... ... ... ... \n", | |
"9951 0.0 0.0 0.0 0.0 ... 5516.046 \n", | |
"9952 0.0 0.0 0.0 0.0 ... 5516.046 \n", | |
"9953 0.0 0.0 0.0 0.0 ... 5516.046 \n", | |
"9954 0.0 0.0 0.0 0.0 ... 5516.046 \n", | |
"\n", | |
" totdea_lower totdea_upper bedover_mean bedover_lower bedover_upper \\\n", | |
"0 0.0 0.00 0.0 0.0 0.0 \n", | |
"1 0.0 0.00 0.0 0.0 0.0 \n", | |
"2 0.0 0.00 0.0 0.0 0.0 \n", | |
"3 0.0 0.00 0.0 0.0 0.0 \n", | |
"... ... ... ... ... ... \n", | |
"9951 849.0 9624.05 0.0 0.0 0.0 \n", | |
"9952 849.0 9624.05 0.0 0.0 0.0 \n", | |
"9953 849.0 9624.05 0.0 0.0 0.0 \n", | |
"9954 849.0 9624.05 0.0 0.0 0.0 \n", | |
"\n", | |
" icuover_mean icuover_lower icuover_upper location_name \n", | |
"0 0.0 0.0 0.0 Wyoming \n", | |
"1 0.0 0.0 0.0 Wyoming \n", | |
"2 0.0 0.0 0.0 Wyoming \n", | |
"3 0.0 0.0 0.0 Wyoming \n", | |
"... ... ... ... ... \n", | |
"9951 0.0 0.0 0.0 Alabama \n", | |
"9952 0.0 0.0 0.0 Alabama \n", | |
"9953 0.0 0.0 0.0 Alabama \n", | |
"9954 0.0 0.0 0.0 Alabama \n", | |
"\n", | |
"[9955 rows x 31 columns]" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df = pd.read_csv('2020_04_01.2/Hospitalization_all_locs.csv')\n", | |
"df" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>cumulative_deaths</th>\n", | |
" <th>cases</th>\n", | |
" <th>deaths</th>\n", | |
" <th>infection_fatality_ratio</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>date</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>2020-02-06</th>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.01</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2020-02-07</th>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.01</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2020-02-08</th>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.01</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2020-02-09</th>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.01</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2020-08-01</th>\n", | |
" <td>978.3</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.01</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2020-08-02</th>\n", | |
" <td>978.3</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.01</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2020-08-03</th>\n", | |
" <td>978.3</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.01</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2020-08-04</th>\n", | |
" <td>978.3</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.01</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>181 rows × 4 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" cumulative_deaths cases deaths infection_fatality_ratio\n", | |
"date \n", | |
"2020-02-06 0.0 0.0 0.0 0.01\n", | |
"2020-02-07 0.0 0.0 0.0 0.01\n", | |
"2020-02-08 0.0 0.0 0.0 0.01\n", | |
"2020-02-09 0.0 0.0 0.0 0.01\n", | |
"... ... ... ... ...\n", | |
"2020-08-01 978.3 0.0 0.0 0.01\n", | |
"2020-08-02 978.3 0.0 0.0 0.01\n", | |
"2020-08-03 978.3 0.0 0.0 0.01\n", | |
"2020-08-04 978.3 0.0 0.0 0.01\n", | |
"\n", | |
"[181 rows x 4 columns]" | |
] | |
}, | |
"execution_count": 20, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"def etl_state(location):\n", | |
" t = df[df.location == 'Washington'].filter(['date', 'totdea_mean', 'allbed_mean'])\n", | |
" t = t.set_index('date')\n", | |
" t.columns = ['cumulative_deaths', 'cases']\n", | |
" t['deaths'] = t.cumulative_deaths.diff().fillna(0)\n", | |
" t['infection_fatality_ratio'] = 0.01\n", | |
"\n", | |
" return t\n", | |
"t = etl_state('New York')\n", | |
"t" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"t.to_csv('ny_projection.csv')" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "dismod_mr", | |
"language": "python", | |
"name": "dismod_mr" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.7" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment