Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aflaxman/a88ec3c46ccbe750ed7e72ceaf4479bc to your computer and use it in GitHub Desktop.
Save aflaxman/a88ec3c46ccbe750ed7e72ceaf4479bc to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Sat Apr 4 13:36:42 PDT 2020\r\n"
]
}
],
"source": [
"import numpy as np, matplotlib.pyplot as plt, pandas as pd\n",
"pd.set_option('display.max_rows', 8)\n",
"!date\n",
"\n",
"%load_ext autoreload\n",
"%autoreload 2"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Extract and transform IHME COVID-19 Projections so they can be used as input for the ICL COVID-19 Model\n",
"\n",
"Target format: csv with columns `Date, Deaths, Cases, Infection-Fatality Ratio`"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"/snfs1/Project/simulation_science/covid/data\n"
]
}
],
"source": [
"%cd /home/j/Project/simulation_science/covid/data"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"--2020-04-02 20:12:21-- https://ihmecovid19storage.blob.core.windows.net/latest/ihme-covid19.zip\n",
"Resolving ihmecovid19storage.blob.core.windows.net (ihmecovid19storage.blob.core.windows.net)... 52.239.236.68\n",
"Connecting to ihmecovid19storage.blob.core.windows.net (ihmecovid19storage.blob.core.windows.net)|52.239.236.68|:443... connected.\n",
"HTTP request sent, awaiting response... 200 OK\n",
"Length: 669460 (654K) [application/octet-stream]\n",
"Saving to: ‘ihme-covid19.zip’\n",
"\n",
"100%[======================================>] 669,460 --.-K/s in 0.03s \n",
"\n",
"2020-04-02 20:12:21 (23.5 MB/s) - ‘ihme-covid19.zip’ saved [669460/669460]\n",
"\n"
]
}
],
"source": [
"!wget https://ihmecovid19storage.blob.core.windows.net/latest/ihme-covid19.zip"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Archive: ihme-covid19.zip\n",
" creating: 2020_04_01.2/\n",
" inflating: 2020_04_01.2/Hospitalization_all_locs.csv \n",
" inflating: 2020_04_01.2/IHME_COVID_19_Data_Release_Information_Sheet_II.pdf \n"
]
}
],
"source": [
"!unzip ihme-covid19.zip"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>V1</th>\n",
" <th>location</th>\n",
" <th>date</th>\n",
" <th>allbed_mean</th>\n",
" <th>allbed_lower</th>\n",
" <th>allbed_upper</th>\n",
" <th>ICUbed_mean</th>\n",
" <th>ICUbed_lower</th>\n",
" <th>ICUbed_upper</th>\n",
" <th>InvVen_mean</th>\n",
" <th>...</th>\n",
" <th>totdea_mean</th>\n",
" <th>totdea_lower</th>\n",
" <th>totdea_upper</th>\n",
" <th>bedover_mean</th>\n",
" <th>bedover_lower</th>\n",
" <th>bedover_upper</th>\n",
" <th>icuover_mean</th>\n",
" <th>icuover_lower</th>\n",
" <th>icuover_upper</th>\n",
" <th>location_name</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>Wyoming</td>\n",
" <td>2020-02-06</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.000</td>\n",
" <td>0.0</td>\n",
" <td>0.00</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>Wyoming</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>Wyoming</td>\n",
" <td>2020-02-07</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.000</td>\n",
" <td>0.0</td>\n",
" <td>0.00</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>Wyoming</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>Wyoming</td>\n",
" <td>2020-02-08</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.000</td>\n",
" <td>0.0</td>\n",
" <td>0.00</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>Wyoming</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>Wyoming</td>\n",
" <td>2020-02-09</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.000</td>\n",
" <td>0.0</td>\n",
" <td>0.00</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>Wyoming</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9951</th>\n",
" <td>178</td>\n",
" <td>Alabama</td>\n",
" <td>2020-08-01</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>5516.046</td>\n",
" <td>849.0</td>\n",
" <td>9624.05</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>Alabama</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9952</th>\n",
" <td>179</td>\n",
" <td>Alabama</td>\n",
" <td>2020-08-02</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>5516.046</td>\n",
" <td>849.0</td>\n",
" <td>9624.05</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>Alabama</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9953</th>\n",
" <td>180</td>\n",
" <td>Alabama</td>\n",
" <td>2020-08-03</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>5516.046</td>\n",
" <td>849.0</td>\n",
" <td>9624.05</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>Alabama</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9954</th>\n",
" <td>181</td>\n",
" <td>Alabama</td>\n",
" <td>2020-08-04</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>5516.046</td>\n",
" <td>849.0</td>\n",
" <td>9624.05</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>Alabama</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>9955 rows × 31 columns</p>\n",
"</div>"
],
"text/plain": [
" V1 location date allbed_mean allbed_lower allbed_upper \\\n",
"0 1 Wyoming 2020-02-06 0.0 0.0 0.0 \n",
"1 2 Wyoming 2020-02-07 0.0 0.0 0.0 \n",
"2 3 Wyoming 2020-02-08 0.0 0.0 0.0 \n",
"3 4 Wyoming 2020-02-09 0.0 0.0 0.0 \n",
"... ... ... ... ... ... ... \n",
"9951 178 Alabama 2020-08-01 0.0 0.0 0.0 \n",
"9952 179 Alabama 2020-08-02 0.0 0.0 0.0 \n",
"9953 180 Alabama 2020-08-03 0.0 0.0 0.0 \n",
"9954 181 Alabama 2020-08-04 0.0 0.0 0.0 \n",
"\n",
" ICUbed_mean ICUbed_lower ICUbed_upper InvVen_mean ... totdea_mean \\\n",
"0 0.0 0.0 0.0 0.0 ... 0.000 \n",
"1 0.0 0.0 0.0 0.0 ... 0.000 \n",
"2 0.0 0.0 0.0 0.0 ... 0.000 \n",
"3 0.0 0.0 0.0 0.0 ... 0.000 \n",
"... ... ... ... ... ... ... \n",
"9951 0.0 0.0 0.0 0.0 ... 5516.046 \n",
"9952 0.0 0.0 0.0 0.0 ... 5516.046 \n",
"9953 0.0 0.0 0.0 0.0 ... 5516.046 \n",
"9954 0.0 0.0 0.0 0.0 ... 5516.046 \n",
"\n",
" totdea_lower totdea_upper bedover_mean bedover_lower bedover_upper \\\n",
"0 0.0 0.00 0.0 0.0 0.0 \n",
"1 0.0 0.00 0.0 0.0 0.0 \n",
"2 0.0 0.00 0.0 0.0 0.0 \n",
"3 0.0 0.00 0.0 0.0 0.0 \n",
"... ... ... ... ... ... \n",
"9951 849.0 9624.05 0.0 0.0 0.0 \n",
"9952 849.0 9624.05 0.0 0.0 0.0 \n",
"9953 849.0 9624.05 0.0 0.0 0.0 \n",
"9954 849.0 9624.05 0.0 0.0 0.0 \n",
"\n",
" icuover_mean icuover_lower icuover_upper location_name \n",
"0 0.0 0.0 0.0 Wyoming \n",
"1 0.0 0.0 0.0 Wyoming \n",
"2 0.0 0.0 0.0 Wyoming \n",
"3 0.0 0.0 0.0 Wyoming \n",
"... ... ... ... ... \n",
"9951 0.0 0.0 0.0 Alabama \n",
"9952 0.0 0.0 0.0 Alabama \n",
"9953 0.0 0.0 0.0 Alabama \n",
"9954 0.0 0.0 0.0 Alabama \n",
"\n",
"[9955 rows x 31 columns]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.read_csv('2020_04_01.2/Hospitalization_all_locs.csv')\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>cumulative_deaths</th>\n",
" <th>cases</th>\n",
" <th>deaths</th>\n",
" <th>infection_fatality_ratio</th>\n",
" </tr>\n",
" <tr>\n",
" <th>date</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2020-02-06</th>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2020-02-07</th>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2020-02-08</th>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2020-02-09</th>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2020-08-01</th>\n",
" <td>978.3</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2020-08-02</th>\n",
" <td>978.3</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2020-08-03</th>\n",
" <td>978.3</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2020-08-04</th>\n",
" <td>978.3</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.01</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>181 rows × 4 columns</p>\n",
"</div>"
],
"text/plain": [
" cumulative_deaths cases deaths infection_fatality_ratio\n",
"date \n",
"2020-02-06 0.0 0.0 0.0 0.01\n",
"2020-02-07 0.0 0.0 0.0 0.01\n",
"2020-02-08 0.0 0.0 0.0 0.01\n",
"2020-02-09 0.0 0.0 0.0 0.01\n",
"... ... ... ... ...\n",
"2020-08-01 978.3 0.0 0.0 0.01\n",
"2020-08-02 978.3 0.0 0.0 0.01\n",
"2020-08-03 978.3 0.0 0.0 0.01\n",
"2020-08-04 978.3 0.0 0.0 0.01\n",
"\n",
"[181 rows x 4 columns]"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"def etl_state(location):\n",
" t = df[df.location == 'Washington'].filter(['date', 'totdea_mean', 'allbed_mean'])\n",
" t = t.set_index('date')\n",
" t.columns = ['cumulative_deaths', 'cases']\n",
" t['deaths'] = t.cumulative_deaths.diff().fillna(0)\n",
" t['infection_fatality_ratio'] = 0.01\n",
"\n",
" return t\n",
"t = etl_state('New York')\n",
"t"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"t.to_csv('ny_projection.csv')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "dismod_mr",
"language": "python",
"name": "dismod_mr"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.7"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment