Last active
July 26, 2021 10:53
-
-
Save xenatisch/5af06e87438426cb6bc4b7d9d3557f74 to your computer and use it in GitHub Desktop.
Calculates daily time series using the cumulative `value_metric` columns in data downloaded from the UK Coronavirus Dashboard - APIv2.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"id": "4aad1530", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from pandas import read_csv" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"id": "245369bd", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def cumulative2daily(dt, value_metric, record_metrics=list()):\n", | |
" \"\"\"\n", | |
" Calculates daily time series using the cumulative `value_metric` columns\n", | |
" in data downloaded from the UK Coronavirus Dashboard - APIv2.\n", | |
" \n", | |
" Records are grouped based on \n", | |
" \n", | |
" areaType, areaCode, date\n", | |
" \n", | |
" columns and any additional metrics - e.g. age - as defined in `record_metrics`.\n", | |
" \n", | |
" \n", | |
" Parameters\n", | |
" ----------\n", | |
" dt: DataFrame\n", | |
" Original data.\n", | |
" \n", | |
" value_metric: str\n", | |
" Name of the cumulative metric.\n", | |
" \n", | |
" record_metrics: List[str]\n", | |
" Name of the metrics, other than `areaType`, `areaCode`, `areaName`, and \n", | |
" `date`, based on which data should be grouped. \n", | |
" \n", | |
" For instance, for age demographics, this may be set to `[\"age\"]`.\n", | |
" \n", | |
" Returns\n", | |
" -------\n", | |
" DataFrame\n", | |
" Original dataset with an additional column. The new column will have the \n", | |
" same name as the `value_metric`, with a `new` prefix. For instance, when \n", | |
" `value_metric = 'cumCasesBySpecimenDate'`, the new column will be called\n", | |
" `newCumCasesBySpecimenDate`.\n", | |
" \"\"\"\n", | |
" metric_name = \"new\" + value_metric[0].upper() + value_metric[1:]\n", | |
"\n", | |
" dd = dt.copy(deep=True)\n", | |
" \n", | |
" dd.loc[:, [\"areaType\", \"areaCode\", \"date\", *record_metrics, value_metric]] = (\n", | |
" dd\n", | |
" .loc[:, [\"areaType\", \"areaCode\", \"date\", *record_metrics, value_metric]]\n", | |
" .drop_duplicates(keep=\"first\")\n", | |
" .sort_values([\"date\", *record_metrics])\n", | |
" .groupby([\"areaType\", \"areaCode\", *record_metrics])\n", | |
" .rolling(window=2, on=\"date\", axis=0)\n", | |
" .apply(lambda dr: dr.iloc[1] - dr.iloc[0])\n", | |
" .reset_index()\n", | |
" )\n", | |
" \n", | |
" dd = (\n", | |
" dd\n", | |
" .rename(columns={value_metric: metric_name})\n", | |
" .assign(**{value_metric: dt.loc[:, value_metric]})\n", | |
" )\n", | |
" \n", | |
" return dd" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "d9c9549d", | |
"metadata": {}, | |
"source": [ | |
"## Examples" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "63b556e6", | |
"metadata": {}, | |
"source": [ | |
"### Case demographics by sex" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"id": "2541d515", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"url = \"https://api.coronavirus.data.gov.uk/v2/data?areaType=nation&areaCode=E92000001&metric=maleCases&format=csv\"" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"id": "6df9b85e", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>areaCode</th>\n", | |
" <th>areaName</th>\n", | |
" <th>areaType</th>\n", | |
" <th>date</th>\n", | |
" <th>age</th>\n", | |
" <th>rate</th>\n", | |
" <th>value</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>E92000001</td>\n", | |
" <td>England</td>\n", | |
" <td>nation</td>\n", | |
" <td>2021-07-24</td>\n", | |
" <td>5_to_9</td>\n", | |
" <td>4268.2</td>\n", | |
" <td>77360</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>E92000001</td>\n", | |
" <td>England</td>\n", | |
" <td>nation</td>\n", | |
" <td>2021-07-24</td>\n", | |
" <td>65_to_69</td>\n", | |
" <td>4879.7</td>\n", | |
" <td>66012</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>E92000001</td>\n", | |
" <td>England</td>\n", | |
" <td>nation</td>\n", | |
" <td>2021-07-24</td>\n", | |
" <td>10_to_14</td>\n", | |
" <td>7709.4</td>\n", | |
" <td>132560</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>E92000001</td>\n", | |
" <td>England</td>\n", | |
" <td>nation</td>\n", | |
" <td>2021-07-24</td>\n", | |
" <td>75_to_79</td>\n", | |
" <td>4662.2</td>\n", | |
" <td>41993</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>E92000001</td>\n", | |
" <td>England</td>\n", | |
" <td>nation</td>\n", | |
" <td>2021-07-24</td>\n", | |
" <td>40_to_44</td>\n", | |
" <td>10163.3</td>\n", | |
" <td>172796</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9951</th>\n", | |
" <td>E92000001</td>\n", | |
" <td>England</td>\n", | |
" <td>nation</td>\n", | |
" <td>2020-01-30</td>\n", | |
" <td>0_to_4</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9952</th>\n", | |
" <td>E92000001</td>\n", | |
" <td>England</td>\n", | |
" <td>nation</td>\n", | |
" <td>2020-01-30</td>\n", | |
" <td>60_to_64</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9953</th>\n", | |
" <td>E92000001</td>\n", | |
" <td>England</td>\n", | |
" <td>nation</td>\n", | |
" <td>2020-01-30</td>\n", | |
" <td>90+</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9954</th>\n", | |
" <td>E92000001</td>\n", | |
" <td>England</td>\n", | |
" <td>nation</td>\n", | |
" <td>2020-01-30</td>\n", | |
" <td>35_to_39</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9955</th>\n", | |
" <td>E92000001</td>\n", | |
" <td>England</td>\n", | |
" <td>nation</td>\n", | |
" <td>2020-01-30</td>\n", | |
" <td>15_to_19</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>9956 rows × 7 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" areaCode areaName areaType date age rate value\n", | |
"0 E92000001 England nation 2021-07-24 5_to_9 4268.2 77360\n", | |
"1 E92000001 England nation 2021-07-24 65_to_69 4879.7 66012\n", | |
"2 E92000001 England nation 2021-07-24 10_to_14 7709.4 132560\n", | |
"3 E92000001 England nation 2021-07-24 75_to_79 4662.2 41993\n", | |
"4 E92000001 England nation 2021-07-24 40_to_44 10163.3 172796\n", | |
"... ... ... ... ... ... ... ...\n", | |
"9951 E92000001 England nation 2020-01-30 0_to_4 0.0 0\n", | |
"9952 E92000001 England nation 2020-01-30 60_to_64 0.0 0\n", | |
"9953 E92000001 England nation 2020-01-30 90+ 0.0 0\n", | |
"9954 E92000001 England nation 2020-01-30 35_to_39 0.0 0\n", | |
"9955 E92000001 England nation 2020-01-30 15_to_19 0.0 0\n", | |
"\n", | |
"[9956 rows x 7 columns]" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"original_data = read_csv(url)\n", | |
"\n", | |
"original_data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"id": "f91fae21", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>areaCode</th>\n", | |
" <th>areaName</th>\n", | |
" <th>areaType</th>\n", | |
" <th>date</th>\n", | |
" <th>age</th>\n", | |
" <th>rate</th>\n", | |
" <th>newValue</th>\n", | |
" <th>value</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>E92000001</td>\n", | |
" <td>England</td>\n", | |
" <td>nation</td>\n", | |
" <td>2020-01-30</td>\n", | |
" <td>0_to_4</td>\n", | |
" <td>4268.2</td>\n", | |
" <td>NaN</td>\n", | |
" <td>77360</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>E92000001</td>\n", | |
" <td>England</td>\n", | |
" <td>nation</td>\n", | |
" <td>2020-02-05</td>\n", | |
" <td>0_to_4</td>\n", | |
" <td>4879.7</td>\n", | |
" <td>0.0</td>\n", | |
" <td>66012</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>E92000001</td>\n", | |
" <td>England</td>\n", | |
" <td>nation</td>\n", | |
" <td>2020-02-08</td>\n", | |
" <td>0_to_4</td>\n", | |
" <td>7709.4</td>\n", | |
" <td>0.0</td>\n", | |
" <td>132560</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>E92000001</td>\n", | |
" <td>England</td>\n", | |
" <td>nation</td>\n", | |
" <td>2020-02-09</td>\n", | |
" <td>0_to_4</td>\n", | |
" <td>4662.2</td>\n", | |
" <td>0.0</td>\n", | |
" <td>41993</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>E92000001</td>\n", | |
" <td>England</td>\n", | |
" <td>nation</td>\n", | |
" <td>2020-02-11</td>\n", | |
" <td>0_to_4</td>\n", | |
" <td>10163.3</td>\n", | |
" <td>0.0</td>\n", | |
" <td>172796</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9951</th>\n", | |
" <td>E92000001</td>\n", | |
" <td>England</td>\n", | |
" <td>nation</td>\n", | |
" <td>2021-07-20</td>\n", | |
" <td>90+</td>\n", | |
" <td>0.0</td>\n", | |
" <td>15.0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9952</th>\n", | |
" <td>E92000001</td>\n", | |
" <td>England</td>\n", | |
" <td>nation</td>\n", | |
" <td>2021-07-21</td>\n", | |
" <td>90+</td>\n", | |
" <td>0.0</td>\n", | |
" <td>15.0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9953</th>\n", | |
" <td>E92000001</td>\n", | |
" <td>England</td>\n", | |
" <td>nation</td>\n", | |
" <td>2021-07-22</td>\n", | |
" <td>90+</td>\n", | |
" <td>0.0</td>\n", | |
" <td>14.0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9954</th>\n", | |
" <td>E92000001</td>\n", | |
" <td>England</td>\n", | |
" <td>nation</td>\n", | |
" <td>2021-07-23</td>\n", | |
" <td>90+</td>\n", | |
" <td>0.0</td>\n", | |
" <td>14.0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9955</th>\n", | |
" <td>E92000001</td>\n", | |
" <td>England</td>\n", | |
" <td>nation</td>\n", | |
" <td>2021-07-24</td>\n", | |
" <td>90+</td>\n", | |
" <td>0.0</td>\n", | |
" <td>5.0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>9956 rows × 8 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" areaCode areaName areaType date age rate newValue \\\n", | |
"0 E92000001 England nation 2020-01-30 0_to_4 4268.2 NaN \n", | |
"1 E92000001 England nation 2020-02-05 0_to_4 4879.7 0.0 \n", | |
"2 E92000001 England nation 2020-02-08 0_to_4 7709.4 0.0 \n", | |
"3 E92000001 England nation 2020-02-09 0_to_4 4662.2 0.0 \n", | |
"4 E92000001 England nation 2020-02-11 0_to_4 10163.3 0.0 \n", | |
"... ... ... ... ... ... ... ... \n", | |
"9951 E92000001 England nation 2021-07-20 90+ 0.0 15.0 \n", | |
"9952 E92000001 England nation 2021-07-21 90+ 0.0 15.0 \n", | |
"9953 E92000001 England nation 2021-07-22 90+ 0.0 14.0 \n", | |
"9954 E92000001 England nation 2021-07-23 90+ 0.0 14.0 \n", | |
"9955 E92000001 England nation 2021-07-24 90+ 0.0 5.0 \n", | |
"\n", | |
" value \n", | |
"0 77360 \n", | |
"1 66012 \n", | |
"2 132560 \n", | |
"3 41993 \n", | |
"4 172796 \n", | |
"... ... \n", | |
"9951 0 \n", | |
"9952 0 \n", | |
"9953 0 \n", | |
"9954 0 \n", | |
"9955 0 \n", | |
"\n", | |
"[9956 rows x 8 columns]" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"processed_data = cumulative2daily(original_data, value_metric=\"value\", record_metrics=[\"age\"])\n", | |
"\n", | |
"processed_data" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3.7.7 64-bit ('anaconda3': virtualenv)", | |
"language": "python", | |
"name": "python37764bitanaconda3virtualenv170a8a6454fc493eac0c047df6a6a6c0" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.7" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment