Skip to content

Instantly share code, notes, and snippets.

View mjam03's full-sized avatar

Mark Jamison mjam03

  • Belfast / London
View GitHub Profile
@mjam03
mjam03 / .py
Created February 21, 2021 13:34
import_example_corona
from bs4 import BeautifulSoup
import datetime as dt
import io
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import requests
import seaborn as sns
import sys
Simple example of
my attempt to gistify
from bs4 import BeautifulSoup
import datetime as dt
import io
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import requests
import seaborn as sns
import sys
# hacky way described here to circumvent the ons website bouncing you because it knows you're not a human
# https://stackoverflow.com/questions/13303449/urllib2-httperror-http-error-403-forbidden
hdr = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
'Accept-Encoding': 'none',
'Accept-Language': 'en-US,en;q=0.8',
'Connection': 'keep-alive'}
# name the urls - ONS_DEATH_STRING comically named
# now we know the urls to request the spreadsheets, we need to:
# - request them
# - get the weekyl deaths tab in the spreadsheet
# - parse into pandas and manipulate their to get the numbers we want
# - handle formatting changed throughout last 10 years
# - create homogenously named columns to concat each year of data together
# england/wales list of annual dfs of weekly deaths
ew_dfs = []
ew_dfs[1].head()
# form query string for 2021 then request
file_name = [x for x in ENG_WALES_XLS if str(2021) in x][0]
file_name = ONS_ROOT + file_name
r = requests.get(file_name)
xl_obj = pd.ExcelFile(r.content)
# form df of deaths data
df_ew_21 = pd.read_excel(xl_obj, sheet_name='Weekly figures 2021', header=4)
df_ew_21 = pd.concat([df_ew_21.iloc[0], df_ew_21.iloc[3]], axis=1)
df_ew_21 = df_ew_21.reset_index()
# handle 2014 by using np.where to merge 2 incomplete cols (change in ICD coding that year results in 2 diff cols)
ew_dfs[4]['All respiratory diseases (ICD-10 J00-J99)\nICD-10 v 2013 (IRIS)'] = np.where(ew_dfs[4]['All respiratory diseases (ICD-10 J00-J99)\nICD-10 v 2013 (IRIS)'] == ':', ew_dfs[4]['ICD-10 v 2010 (NCHS)'], ew_dfs[4]['All respiratory diseases (ICD-10 J00-J99)\nICD-10 v 2013 (IRIS)'])
ew_dfs[4] = ew_dfs[4].drop(columns=['ICD-10 v 2010 (NCHS)'])
# rename all cols then concat
for df in ew_dfs:
df.columns = ['Week', 'week_end', 'Deaths', 'RespiratoryDeaths', 'Year']
df_ew_ed = pd.concat(ew_dfs+[df_ew_21])
# add country
df_ew_ed.tail()
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(24,7))
df_ew_ed[['week_end', 'Deaths', 'RespiratoryDeaths']].set_index('week_end').plot(ax=ax[0]);
ax[0].set_title('Weekly Respiratory and Total Death Count in England & Wales', fontdict={'fontsize': 14})
ax[0].set_xlabel('Time');
ax[0].set_ylabel('Death Count');
ax[0].set_yticklabels(['{:,}'.format(int(x)) for x in ax[0].get_yticks().tolist()]);
pd.DataFrame({'Respiratory Deaths as % of Total Deaths': df_ew_ed['RespiratoryDeaths'].divide(df_ew_ed['Deaths'])}).set_index(df_ew_ed['week_end']).plot(ax=ax[1]);