Skip to content

Instantly share code, notes, and snippets.

View mjam03's full-sized avatar

Mark Jamison mjam03

  • Belfast / London
View GitHub Profile
df_s_ed_21 = df_s_cod[(df_s_cod.Year == 2021) & (df_s_cod.Cause == 'All')].drop(columns=['Cause'])
df_s_ed = pd.concat([df_s_ed, df_s_ed_21])
df_s_ed.tail(10)
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(20,10))
df_s_ed[['week_start', 'Deaths']].set_index('week_start').rename(columns={'Deaths': 'Weekly Deaths'}).plot(ax=ax);
df_s_ed[['week_start', 'Deaths']].set_index('week_start').rename(columns={'Deaths': 'Quarterly MA Deaths'}).rolling(12).mean().plot(ax=ax);
df_s_ed[['week_start', 'Deaths']].set_index('week_start').rename(columns={'Deaths': 'Annual MA Deaths'}).rolling(52).mean().plot(ax=ax);
ax.set_title('Weekly Total Death Count in Scotland', fontdict={'fontsize': 14})
ax.set_xlabel('Time');
ax.set_ylabel('Death Count');
ax.set_yticklabels(['{:,}'.format(int(x)) for x in ax.get_yticks().tolist()]);
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(20,7))
# (df_s_cod[(df_s_cod.Cause.isin(['COVID-19']))][['week_start', 'Deaths']].set_index('week_start') / df_s_cod[(df_s_cod.Cause.isin(['COVID Mention']))][['week_start', 'Deaths']].set_index('week_start')).plot(kind='line', ax=ax, marker='d', secondary_y=True);
pd.pivot_table(df_s_cod[(df_s_cod.Cause.isin(['COVID Mention', 'COVID-19']))][['week_start', 'Cause', 'Deaths']], columns='Cause', index='week_start', values='Deaths').plot(kind='bar', ax=ax)
ax.set_title('Weekly Count of COVID COD vs COVID Mention', fontdict={'fontsize': 14})
ax.set_xlabel('Time');
ax.set_ylabel('Death Count');
ax.set_yticklabels(['{:,}'.format(int(x)) for x in ax.get_yticks().tolist()]);
# 2010-2020 weekly data - request and form xl object
NI_URL = 'https://www.nisra.gov.uk/sites/nisra.gov.uk/files/publications/Weekly%20Deaths%20by%20Age%20and%20Respiratory%20Deaths%202011-2020.xls'
r = requests.get(NI_URL)
if r.status_code == 200:
xl_obj = pd.ExcelFile(r.content)
else:
print("Request failed with error code: {}".format(r.status_code))
# iterate through tabs and strip relevant data
ni_dfs = []
for sn in xl_obj.sheet_names:
df_ni = pd.read_excel(xl_obj, sheet_name=sn, header=2)
df_ni = df_ni.dropna().iloc[:,[0,1,2,3,7]]
df_ni.columns = ['Week', 'week_start', 'week_end', 'Deaths', 'RespiratoryDeaths']
ni_dfs.append(df_ni)
# name the urls - ONS_DEATH_STRING comically named
NISRA_ROOT = 'https://www.nisra.gov.uk/publications/weekly-deaths'
NISRA_DEATH_STRING = 'Weekly_Deaths'
# request website, parse and identify from the html only the url link elements
req = Request(NISRA_ROOT, headers=hdr)
html_page = urlopen(req)
soup = BeautifulSoup(html_page, "lxml")
links = []
for link in soup.findAll('a'):
l = link.get('href')
# request the 2021 spreadsheet
r = requests.get(NI_XLS)
xl_obj = pd.ExcelFile(r.content)
# read, format and rename cols
df_ni_21 = pd.read_excel(xl_obj, sheet_name='Table 1', header=4)
df_ni_21 = df_ni_21.iloc[:,[0,1,2]].dropna()
df_ni_21.columns = ['Week', 'week_end', 'Deaths']
# force date types and ensure numeric data types
# concat 2010-2020 and the 2021 data together now we have homogenised the cols/data formats
df_ni = pd.concat([df_ni, df_ni_21])
df_ni['Country'] = 'Northern Ireland'
df_ni['Week'] = pd.to_numeric(df_ni['Week'].apply(lambda x: str(x).replace('a','').replace('b','')))
df_ni.tail()
# align dates with ons date map
df_ni = pd.merge_asof(left=df_ni.sort_values('week_start'), right=df_week_map[df_week_map.Year >2010], on=['week_start'], suffixes=['_DROP', ''])
df_ni = df_ni.drop(columns=[x for x in df_ni.columns if 'DROP' in x])
df_ni['week_start'] = df_ni['week_end'] + dt.timedelta(days=-6)
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(24,7))
df_ni[['week_end', 'Deaths', 'RespiratoryDeaths']].set_index('week_end').plot(ax=ax[0]);
ax[0].set_title('Weekly Respiratory and Total Death Count in Northern Ireland', fontdict={'fontsize': 14})
ax[0].set_xlabel('Time');
ax[0].set_ylabel('Death Count');
ax[0].set_yticklabels(['{:,}'.format(int(x)) for x in ax[0].get_yticks().tolist()]);
pd.DataFrame({'Respiratory Death Cerificate Mentions as % of Total Deaths': df_ni['RespiratoryDeaths'].divide(df_ni['Deaths'])}).set_index(df_ni['week_end']).plot(ax=ax[1]);