Created
February 22, 2021 11:10
-
-
Save mjam03/9c58228cef53baa7e9e8b629424e6ba1 to your computer and use it in GitHub Desktop.
test_2
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# now we know the urls to request the spreadsheets, we need to: | |
# - request them | |
# - get the weekyl deaths tab in the spreadsheet | |
# - parse into pandas and manipulate their to get the numbers we want | |
# - handle formatting changed throughout last 10 years | |
# - create homogenously named columns to concat each year of data together | |
# england/wales list of annual dfs of weekly deaths | |
ew_dfs = [] | |
# iterate through each year prior to 2021 | |
for year in range(2010,2021): | |
# get file name, form URL, request spreadsheet | |
file_name = [x for x in ENG_WALES_XLS if str(year) in x][0] | |
file_name = ONS_ROOT + file_name | |
r = requests.get(file_name) | |
xl_obj = pd.ExcelFile(r.content) | |
# find the sheet in the xls file that contains the data | |
weekly_sheet_name = [x for x in xl_obj.sheet_names if 'Weekly' in x and str(year) in x][0] | |
# exception handling for format change across years | |
if year in [2011]: | |
df_ew_temp = pd.read_excel(xl_obj, sheet_name=weekly_sheet_name, header=3) | |
df_ew_temp = pd.concat([df_ew_temp.iloc[0], df_ew_temp.iloc[3], df_ew_temp.iloc[9]], axis=1) | |
elif year in [2014]: | |
df_ew_temp = pd.read_excel(xl_obj, sheet_name=weekly_sheet_name, header=2) | |
df_ew_temp = pd.concat([df_ew_temp.iloc[0], df_ew_temp.iloc[3], df_ew_temp.iloc[8], df_ew_temp.iloc[9]], axis=1) | |
elif year in [2016]: | |
df_ew_temp = pd.read_excel(xl_obj, sheet_name=weekly_sheet_name, header=3) | |
df_ew_temp = pd.concat([df_ew_temp.iloc[0], df_ew_temp.iloc[3], df_ew_temp.iloc[8]], axis=1) | |
elif year in [2020]: | |
df_ew_temp = pd.read_excel(xl_obj, sheet_name=weekly_sheet_name, header=4) | |
df_ew_temp = pd.concat([df_ew_temp.iloc[0], df_ew_temp.iloc[3], df_ew_temp.iloc[12]], axis=1) | |
else: | |
df_ew_temp = pd.read_excel(xl_obj, sheet_name=weekly_sheet_name, header=3) | |
df_ew_temp = pd.concat([df_ew_temp.iloc[0], df_ew_temp.iloc[3], df_ew_temp.iloc[8]], axis=1) | |
# reformat to name cols, remove first row | |
df_ew_temp = df_ew_temp.reset_index() | |
df_ew_temp.columns = df_ew_temp.iloc[0] | |
df_ew_temp = df_ew_temp[df_ew_temp.index != 0] | |
# if later years then one col needs renamed from nan | |
if year in [2016, 2017, 2018, 2019, 2020]: | |
df_ew_temp = df_ew_temp.rename(columns={np.nan: df_ew_temp[np.nan][1]}) | |
df_ew_temp = df_ew_temp.dropna() | |
df_ew_temp['Year'] = year | |
ew_dfs.append(df_ew_temp) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment