Skip to content

Instantly share code, notes, and snippets.

@sambrightman
Last active January 12, 2021 19:20
Show Gist options
  • Save sambrightman/fd455b129da5730e72dd81bf79ae14a0 to your computer and use it in GitHub Desktop.
Save sambrightman/fd455b129da5730e72dd81bf79ae14a0 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"import json\n",
"import pandas\n",
"from urllib import parse\n",
"\n",
"API_URL = 'https://coronavirus.data.gov.uk/api/'\n",
"INITIAL_PATH = '/v1/data?filters=areaType=nation&structure=%7B%22areaType%22:%22areaType%22,%22areaName%22:%22areaName%22,%22areaCode%22:%22areaCode%22,%22date%22:%22date%22,%22cumPeopleVaccinatedFirstDoseByPublishDate%22:%22cumPeopleVaccinatedFirstDoseByPublishDate%22,%22cumPeopleVaccinatedSecondDoseByPublishDate%22:%22cumPeopleVaccinatedSecondDoseByPublishDate%22%7D&format=json'"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"def fetch(path):\n",
" url = parse.urljoin(API_URL, path)\n",
" response = requests.get(url)\n",
" response.raise_for_status()\n",
"\n",
" response_json = response.json()\n",
" return response_json['length'], response_json['data'], response_json['pagination']['current'], response_json['pagination']['last'], response_json['pagination']['next']\n",
"\n",
"\n",
"def fetch_all():\n",
" next_path = INITIAL_PATH\n",
" while True:\n",
" length, data, current_path, last_path, next_path = fetch(next_path[1:])\n",
" if len(data) == length:\n",
" break\n",
" elif current_path == last_path:\n",
" raise Exception('reached end of pages') \n",
" elif next_path is None:\n",
" raise Exception('no next page')\n",
" return pandas.DataFrame(data).set_index('date').rename({\n",
" 'cumPeopleVaccinatedFirstDoseByPublishDate': 'cumFirstDoses',\n",
" 'cumPeopleVaccinatedSecondDoseByPublishDate': 'cumSecondDoses',\n",
" }, axis='columns')"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"df = fetch_all()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>cumFirstDoses</th>\n",
" <th>cumSecondDoses</th>\n",
" <th>cumTotal</th>\n",
" <th>numFirstDoses</th>\n",
" <th>numSecondDoses</th>\n",
" <th>numTotal</th>\n",
" </tr>\n",
" <tr>\n",
" <th>date</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2021-01-10</th>\n",
" <td>2286572</td>\n",
" <td>391399</td>\n",
" <td>2677971</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2021-01-11</th>\n",
" <td>2431648</td>\n",
" <td>412167</td>\n",
" <td>2843815</td>\n",
" <td>145076.0</td>\n",
" <td>20768.0</td>\n",
" <td>165844.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" cumFirstDoses cumSecondDoses cumTotal numFirstDoses \\\n",
"date \n",
"2021-01-10 2286572 391399 2677971 NaN \n",
"2021-01-11 2431648 412167 2843815 145076.0 \n",
"\n",
" numSecondDoses numTotal \n",
"date \n",
"2021-01-10 NaN NaN \n",
"2021-01-11 20768.0 165844.0 "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['cumTotal'] = df['cumFirstDoses'] + df['cumSecondDoses']\n",
"grouped = df.groupby('date').sum()\n",
"diffs = grouped.diff().rename(lambda name: 'n{}'.format(name[1:]), axis='columns')\n",
"grouped.merge(diffs, on='date')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.8"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
import requests
import json
import pandas
from urllib import parse
API_URL = 'https://coronavirus.data.gov.uk/api/'
INITIAL_PATH = '/v1/data?filters=areaType=nation&structure=%7B%22areaType%22:%22areaType%22,%22areaName%22:%22areaName%22,%22areaCode%22:%22areaCode%22,%22date%22:%22date%22,%22cumPeopleVaccinatedFirstDoseByPublishDate%22:%22cumPeopleVaccinatedFirstDoseByPublishDate%22,%22cumPeopleVaccinatedSecondDoseByPublishDate%22:%22cumPeopleVaccinatedSecondDoseByPublishDate%22%7D&format=json'
def fetch(path):
url = parse.urljoin(API_URL, path)
response = requests.get(url)
response.raise_for_status()
response_json = response.json()
return response_json['length'], response_json['data'], response_json['pagination']['current'], response_json['pagination']['last'], response_json['pagination']['next']
def fetch_all():
next_path = INITIAL_PATH
while True:
length, data, current_path, last_path, next_path = fetch(next_path[1:])
if len(data) == length:
break
elif current_path == last_path:
raise Exception('reached end of pages')
elif next_path is None:
raise Exception('no next page')
return pandas.DataFrame(data).set_index('date').rename({
'cumPeopleVaccinatedFirstDoseByPublishDate': 'cumFirstDoses',
'cumPeopleVaccinatedSecondDoseByPublishDate': 'cumSecondDoses',
}, axis='columns')
df = fetch_all()
df['cumTotal'] = df['cumFirstDoses'] + df['cumSecondDoses']
grouped = df.groupby('date').sum()
diffs = grouped.diff().rename(lambda name: 'n{}'.format(name[1:]), axis='columns')
grouped.merge(diffs, on='date')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment