This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup | |
import datetime as dt | |
import io | |
import matplotlib.dates as mdates | |
import matplotlib.pyplot as plt | |
import numpy as np | |
import pandas as pd | |
import requests | |
import seaborn as sns | |
import sys |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Simple example of | |
my attempt to gistify |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup | |
import datetime as dt | |
import io | |
import matplotlib.dates as mdates | |
import matplotlib.pyplot as plt | |
import numpy as np | |
import pandas as pd | |
import requests | |
import seaborn as sns | |
import sys |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# hacky way described here to circumvent the ons website bouncing you because it knows you're not a human | |
# https://stackoverflow.com/questions/13303449/urllib2-httperror-http-error-403-forbidden | |
hdr = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11', | |
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', | |
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3', | |
'Accept-Encoding': 'none', | |
'Accept-Language': 'en-US,en;q=0.8', | |
'Connection': 'keep-alive'} | |
# name the urls - ONS_DEATH_STRING comically named |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# now we know the urls to request the spreadsheets, we need to: | |
# - request them | |
# - get the weekyl deaths tab in the spreadsheet | |
# - parse into pandas and manipulate their to get the numbers we want | |
# - handle formatting changed throughout last 10 years | |
# - create homogenously named columns to concat each year of data together | |
# england/wales list of annual dfs of weekly deaths | |
ew_dfs = [] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ew_dfs[1].head() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# form query string for 2021 then request | |
file_name = [x for x in ENG_WALES_XLS if str(2021) in x][0] | |
file_name = ONS_ROOT + file_name | |
r = requests.get(file_name) | |
xl_obj = pd.ExcelFile(r.content) | |
# form df of deaths data | |
df_ew_21 = pd.read_excel(xl_obj, sheet_name='Weekly figures 2021', header=4) | |
df_ew_21 = pd.concat([df_ew_21.iloc[0], df_ew_21.iloc[3]], axis=1) | |
df_ew_21 = df_ew_21.reset_index() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# handle 2014 by using np.where to merge 2 incomplete cols (change in ICD coding that year results in 2 diff cols) | |
ew_dfs[4]['All respiratory diseases (ICD-10 J00-J99)\nICD-10 v 2013 (IRIS)'] = np.where(ew_dfs[4]['All respiratory diseases (ICD-10 J00-J99)\nICD-10 v 2013 (IRIS)'] == ':', ew_dfs[4]['ICD-10 v 2010 (NCHS)'], ew_dfs[4]['All respiratory diseases (ICD-10 J00-J99)\nICD-10 v 2013 (IRIS)']) | |
ew_dfs[4] = ew_dfs[4].drop(columns=['ICD-10 v 2010 (NCHS)']) | |
# rename all cols then concat | |
for df in ew_dfs: | |
df.columns = ['Week', 'week_end', 'Deaths', 'RespiratoryDeaths', 'Year'] | |
df_ew_ed = pd.concat(ew_dfs+[df_ew_21]) | |
# add country |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df_ew_ed.tail() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(24,7)) | |
df_ew_ed[['week_end', 'Deaths', 'RespiratoryDeaths']].set_index('week_end').plot(ax=ax[0]); | |
ax[0].set_title('Weekly Respiratory and Total Death Count in England & Wales', fontdict={'fontsize': 14}) | |
ax[0].set_xlabel('Time'); | |
ax[0].set_ylabel('Death Count'); | |
ax[0].set_yticklabels(['{:,}'.format(int(x)) for x in ax[0].get_yticks().tolist()]); | |
pd.DataFrame({'Respiratory Deaths as % of Total Deaths': df_ew_ed['RespiratoryDeaths'].divide(df_ew_ed['Deaths'])}).set_index(df_ew_ed['week_end']).plot(ax=ax[1]); |
OlderNewer