Skip to content

Instantly share code, notes, and snippets.

@mjam03
Created February 22, 2021 11:10
Show Gist options
  • Save mjam03/9c58228cef53baa7e9e8b629424e6ba1 to your computer and use it in GitHub Desktop.
Save mjam03/9c58228cef53baa7e9e8b629424e6ba1 to your computer and use it in GitHub Desktop.
test_2
# now we know the urls to request the spreadsheets, we need to:
# - request them
# - get the weekyl deaths tab in the spreadsheet
# - parse into pandas and manipulate their to get the numbers we want
# - handle formatting changed throughout last 10 years
# - create homogenously named columns to concat each year of data together
# england/wales list of annual dfs of weekly deaths
ew_dfs = []
# iterate through each year prior to 2021
for year in range(2010,2021):
# get file name, form URL, request spreadsheet
file_name = [x for x in ENG_WALES_XLS if str(year) in x][0]
file_name = ONS_ROOT + file_name
r = requests.get(file_name)
xl_obj = pd.ExcelFile(r.content)
# find the sheet in the xls file that contains the data
weekly_sheet_name = [x for x in xl_obj.sheet_names if 'Weekly' in x and str(year) in x][0]
# exception handling for format change across years
if year in [2011]:
df_ew_temp = pd.read_excel(xl_obj, sheet_name=weekly_sheet_name, header=3)
df_ew_temp = pd.concat([df_ew_temp.iloc[0], df_ew_temp.iloc[3], df_ew_temp.iloc[9]], axis=1)
elif year in [2014]:
df_ew_temp = pd.read_excel(xl_obj, sheet_name=weekly_sheet_name, header=2)
df_ew_temp = pd.concat([df_ew_temp.iloc[0], df_ew_temp.iloc[3], df_ew_temp.iloc[8], df_ew_temp.iloc[9]], axis=1)
elif year in [2016]:
df_ew_temp = pd.read_excel(xl_obj, sheet_name=weekly_sheet_name, header=3)
df_ew_temp = pd.concat([df_ew_temp.iloc[0], df_ew_temp.iloc[3], df_ew_temp.iloc[8]], axis=1)
elif year in [2020]:
df_ew_temp = pd.read_excel(xl_obj, sheet_name=weekly_sheet_name, header=4)
df_ew_temp = pd.concat([df_ew_temp.iloc[0], df_ew_temp.iloc[3], df_ew_temp.iloc[12]], axis=1)
else:
df_ew_temp = pd.read_excel(xl_obj, sheet_name=weekly_sheet_name, header=3)
df_ew_temp = pd.concat([df_ew_temp.iloc[0], df_ew_temp.iloc[3], df_ew_temp.iloc[8]], axis=1)
# reformat to name cols, remove first row
df_ew_temp = df_ew_temp.reset_index()
df_ew_temp.columns = df_ew_temp.iloc[0]
df_ew_temp = df_ew_temp[df_ew_temp.index != 0]
# if later years then one col needs renamed from nan
if year in [2016, 2017, 2018, 2019, 2020]:
df_ew_temp = df_ew_temp.rename(columns={np.nan: df_ew_temp[np.nan][1]})
df_ew_temp = df_ew_temp.dropna()
df_ew_temp['Year'] = year
ew_dfs.append(df_ew_temp)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment