Mark Jamison mjam03

## .py
from bs4 import BeautifulSoup
import datetime as dt
import io
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import requests
import seaborn as sns
import sys

## 38b0c12c-1657-44cc-8281-8b6e6ac04e80.py
Simple example of
 my attempt to gistify

## 5b02d9bb-5bb6-4ec0-b492-15316ed106a7.py
from bs4 import BeautifulSoup
import datetime as dt
import io
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import requests
import seaborn as sns
import sys

## e2235136-b18a-4883-be15-e6cdd01e8353.py
# hacky way described here to circumvent the ons website bouncing you because it knows you're not a human
# https://stackoverflow.com/questions/13303449/urllib2-httperror-http-error-403-forbidden
hdr = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
       'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
       'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
       'Accept-Encoding': 'none',
       'Accept-Language': 'en-US,en;q=0.8',
       'Connection': 'keep-alive'}

# name the urls - ONS_DEATH_STRING comically named

## b87ef933-bb04-4916-9d1d-758a3abcca78.py
# now we know the urls to request the spreadsheets, we need to:
# - request them
# - get the weekyl deaths tab in the spreadsheet
# - parse into pandas and manipulate their to get the numbers we want
# - handle formatting changed throughout last 10 years
# - create homogenously named columns to concat each year of data together

# england/wales list of annual dfs of weekly deaths
ew_dfs = []

## 70799526-ef71-4ee4-bb1c-f0bc40f13582.py
ew_dfs[1].head()

## ae66ce2d-fd85-4e8e-97a5-e28e27ff6673.py
# form query string for 2021 then request
file_name = [x for x in ENG_WALES_XLS if str(2021) in x][0]
file_name = ONS_ROOT + file_name
r = requests.get(file_name)
xl_obj = pd.ExcelFile(r.content)

# form df of deaths data
df_ew_21 = pd.read_excel(xl_obj, sheet_name='Weekly figures 2021', header=4)
df_ew_21 = pd.concat([df_ew_21.iloc[0], df_ew_21.iloc[3]], axis=1)
df_ew_21 = df_ew_21.reset_index()

## d76e8a0a-bd52-4d32-9bda-2212e3c61489.py
# handle 2014 by using np.where to merge 2 incomplete cols (change in ICD coding that year results in 2 diff cols)
ew_dfs[4]['All respiratory diseases (ICD-10 J00-J99)\nICD-10 v 2013 (IRIS)'] = np.where(ew_dfs[4]['All respiratory diseases (ICD-10 J00-J99)\nICD-10 v 2013 (IRIS)'] == ':', ew_dfs[4]['ICD-10 v 2010 (NCHS)'], ew_dfs[4]['All respiratory diseases (ICD-10 J00-J99)\nICD-10 v 2013 (IRIS)'])
ew_dfs[4] = ew_dfs[4].drop(columns=['ICD-10 v 2010 (NCHS)'])

# rename all cols then concat
for df in ew_dfs:
    df.columns = ['Week', 'week_end', 'Deaths', 'RespiratoryDeaths', 'Year']
df_ew_ed = pd.concat(ew_dfs+[df_ew_21])

# add country

## a6e5c134-3d98-4633-a382-d08d615161fd.py
df_ew_ed.tail()

## 0ba66efb-a1ff-46d1-b72d-a7edc2232506.py
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(24,7))

df_ew_ed[['week_end', 'Deaths', 'RespiratoryDeaths']].set_index('week_end').plot(ax=ax[0]);
ax[0].set_title('Weekly Respiratory and Total Death Count in England & Wales', fontdict={'fontsize': 14})

ax[0].set_xlabel('Time');
ax[0].set_ylabel('Death Count');
ax[0].set_yticklabels(['{:,}'.format(int(x)) for x in ax[0].get_yticks().tolist()]);

pd.DataFrame({'Respiratory Deaths as % of Total Deaths': df_ew_ed['RespiratoryDeaths'].divide(df_ew_ed['Deaths'])}).set_index(df_ew_ed['week_end']).plot(ax=ax[1]);
	from bs4 import BeautifulSoup
	import datetime as dt
	import io
	import matplotlib.dates as mdates
	import matplotlib.pyplot as plt
	import numpy as np
	import pandas as pd
	import requests
	import seaborn as sns
	import sys
	# hacky way described here to circumvent the ons website bouncing you because it knows you're not a human
	# https://stackoverflow.com/questions/13303449/urllib2-httperror-http-error-403-forbidden
	hdr = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
	'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,/;q=0.8',
	'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
	'Accept-Encoding': 'none',
	'Accept-Language': 'en-US,en;q=0.8',
	'Connection': 'keep-alive'}

	# name the urls - ONS_DEATH_STRING comically named
	# now we know the urls to request the spreadsheets, we need to:
	# - request them
	# - get the weekyl deaths tab in the spreadsheet
	# - parse into pandas and manipulate their to get the numbers we want
	# - handle formatting changed throughout last 10 years
	# - create homogenously named columns to concat each year of data together

	# england/wales list of annual dfs of weekly deaths
	ew_dfs = []
	# form query string for 2021 then request
	file_name = [x for x in ENG_WALES_XLS if str(2021) in x][0]
	file_name = ONS_ROOT + file_name
	r = requests.get(file_name)
	xl_obj = pd.ExcelFile(r.content)

	# form df of deaths data
	df_ew_21 = pd.read_excel(xl_obj, sheet_name='Weekly figures 2021', header=4)
	df_ew_21 = pd.concat([df_ew_21.iloc[0], df_ew_21.iloc[3]], axis=1)
	df_ew_21 = df_ew_21.reset_index()
	# handle 2014 by using np.where to merge 2 incomplete cols (change in ICD coding that year results in 2 diff cols)
	ew_dfs[4]['All respiratory diseases (ICD-10 J00-J99)\nICD-10 v 2013 (IRIS)'] = np.where(ew_dfs[4]['All respiratory diseases (ICD-10 J00-J99)\nICD-10 v 2013 (IRIS)'] == ':', ew_dfs[4]['ICD-10 v 2010 (NCHS)'], ew_dfs[4]['All respiratory diseases (ICD-10 J00-J99)\nICD-10 v 2013 (IRIS)'])
	ew_dfs[4] = ew_dfs[4].drop(columns=['ICD-10 v 2010 (NCHS)'])

	# rename all cols then concat
	for df in ew_dfs:
	df.columns = ['Week', 'week_end', 'Deaths', 'RespiratoryDeaths', 'Year']
	df_ew_ed = pd.concat(ew_dfs+[df_ew_21])

	# add country
	fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(24,7))

	df_ew_ed[['week_end', 'Deaths', 'RespiratoryDeaths']].set_index('week_end').plot(ax=ax[0]);
	ax[0].set_title('Weekly Respiratory and Total Death Count in England & Wales', fontdict={'fontsize': 14})

	ax[0].set_xlabel('Time');
	ax[0].set_ylabel('Death Count');
	ax[0].set_yticklabels(['{:,}'.format(int(x)) for x in ax[0].get_yticks().tolist()]);

	pd.DataFrame({'Respiratory Deaths as % of Total Deaths': df_ew_ed['RespiratoryDeaths'].divide(df_ew_ed['Deaths'])}).set_index(df_ew_ed['week_end']).plot(ax=ax[1]);