Last active
January 12, 2021 19:20
-
-
Save sambrightman/fd455b129da5730e72dd81bf79ae14a0 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
requests | |
pandas |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import requests\n", | |
"import json\n", | |
"import pandas\n", | |
"from urllib import parse\n", | |
"\n", | |
"API_URL = 'https://coronavirus.data.gov.uk/api/'\n", | |
"INITIAL_PATH = '/v1/data?filters=areaType=nation&structure=%7B%22areaType%22:%22areaType%22,%22areaName%22:%22areaName%22,%22areaCode%22:%22areaCode%22,%22date%22:%22date%22,%22cumPeopleVaccinatedFirstDoseByPublishDate%22:%22cumPeopleVaccinatedFirstDoseByPublishDate%22,%22cumPeopleVaccinatedSecondDoseByPublishDate%22:%22cumPeopleVaccinatedSecondDoseByPublishDate%22%7D&format=json'" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def fetch(path):\n", | |
" url = parse.urljoin(API_URL, path)\n", | |
" response = requests.get(url)\n", | |
" response.raise_for_status()\n", | |
"\n", | |
" response_json = response.json()\n", | |
" return response_json['length'], response_json['data'], response_json['pagination']['current'], response_json['pagination']['last'], response_json['pagination']['next']\n", | |
"\n", | |
"\n", | |
"def fetch_all():\n", | |
" next_path = INITIAL_PATH\n", | |
" while True:\n", | |
" length, data, current_path, last_path, next_path = fetch(next_path[1:])\n", | |
" if len(data) == length:\n", | |
" break\n", | |
" elif current_path == last_path:\n", | |
" raise Exception('reached end of pages') \n", | |
" elif next_path is None:\n", | |
" raise Exception('no next page')\n", | |
" return pandas.DataFrame(data).set_index('date').rename({\n", | |
" 'cumPeopleVaccinatedFirstDoseByPublishDate': 'cumFirstDoses',\n", | |
" 'cumPeopleVaccinatedSecondDoseByPublishDate': 'cumSecondDoses',\n", | |
" }, axis='columns')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df = fetch_all()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>cumFirstDoses</th>\n", | |
" <th>cumSecondDoses</th>\n", | |
" <th>cumTotal</th>\n", | |
" <th>numFirstDoses</th>\n", | |
" <th>numSecondDoses</th>\n", | |
" <th>numTotal</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>date</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>2021-01-10</th>\n", | |
" <td>2286572</td>\n", | |
" <td>391399</td>\n", | |
" <td>2677971</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2021-01-11</th>\n", | |
" <td>2431648</td>\n", | |
" <td>412167</td>\n", | |
" <td>2843815</td>\n", | |
" <td>145076.0</td>\n", | |
" <td>20768.0</td>\n", | |
" <td>165844.0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" cumFirstDoses cumSecondDoses cumTotal numFirstDoses \\\n", | |
"date \n", | |
"2021-01-10 2286572 391399 2677971 NaN \n", | |
"2021-01-11 2431648 412167 2843815 145076.0 \n", | |
"\n", | |
" numSecondDoses numTotal \n", | |
"date \n", | |
"2021-01-10 NaN NaN \n", | |
"2021-01-11 20768.0 165844.0 " | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df['cumTotal'] = df['cumFirstDoses'] + df['cumSecondDoses']\n", | |
"grouped = df.groupby('date').sum()\n", | |
"diffs = grouped.diff().rename(lambda name: 'n{}'.format(name[1:]), axis='columns')\n", | |
"grouped.merge(diffs, on='date')" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.8" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import json | |
import pandas | |
from urllib import parse | |
API_URL = 'https://coronavirus.data.gov.uk/api/' | |
INITIAL_PATH = '/v1/data?filters=areaType=nation&structure=%7B%22areaType%22:%22areaType%22,%22areaName%22:%22areaName%22,%22areaCode%22:%22areaCode%22,%22date%22:%22date%22,%22cumPeopleVaccinatedFirstDoseByPublishDate%22:%22cumPeopleVaccinatedFirstDoseByPublishDate%22,%22cumPeopleVaccinatedSecondDoseByPublishDate%22:%22cumPeopleVaccinatedSecondDoseByPublishDate%22%7D&format=json' | |
def fetch(path): | |
url = parse.urljoin(API_URL, path) | |
response = requests.get(url) | |
response.raise_for_status() | |
response_json = response.json() | |
return response_json['length'], response_json['data'], response_json['pagination']['current'], response_json['pagination']['last'], response_json['pagination']['next'] | |
def fetch_all(): | |
next_path = INITIAL_PATH | |
while True: | |
length, data, current_path, last_path, next_path = fetch(next_path[1:]) | |
if len(data) == length: | |
break | |
elif current_path == last_path: | |
raise Exception('reached end of pages') | |
elif next_path is None: | |
raise Exception('no next page') | |
return pandas.DataFrame(data).set_index('date').rename({ | |
'cumPeopleVaccinatedFirstDoseByPublishDate': 'cumFirstDoses', | |
'cumPeopleVaccinatedSecondDoseByPublishDate': 'cumSecondDoses', | |
}, axis='columns') | |
df = fetch_all() | |
df['cumTotal'] = df['cumFirstDoses'] + df['cumSecondDoses'] | |
grouped = df.groupby('date').sum() | |
diffs = grouped.diff().rename(lambda name: 'n{}'.format(name[1:]), axis='columns') | |
grouped.merge(diffs, on='date') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment