Skip to content

Instantly share code, notes, and snippets.

@Perishleaf
Created February 2, 2020 04:58
Show Gist options
  • Save Perishleaf/316b6de38cddd4066cd0d4c94b16b431 to your computer and use it in GitHub Desktop.
Save Perishleaf/316b6de38cddd4066cd0d4c94b16b431 to your computer and use it in GitHub Desktop.
# Import xlsx file and store each sheet in to a df list
xl_file = pd.ExcelFile('./data.xlsx',)
dfs = {sheet_name: xl_file.parse(sheet_name)
for sheet_name in xl_file.sheet_names}
# Data from each sheet can be accessed via key
keyList = list(dfs.keys())
# Data cleansing
for key, df in dfs.items():
dfs[key].loc[:,'Confirmed'].fillna(value=0, inplace=True)
dfs[key].loc[:,'Deaths'].fillna(value=0, inplace=True)
dfs[key].loc[:,'Recovered'].fillna(value=0, inplace=True)
dfs[key]=dfs[key].astype({'Confirmed':'int64', 'Deaths':'int64', 'Recovered':'int64'})
# Change as China for coordinate search
dfs[key]=dfs[key].replace({'Country/Region':'Mainland China'}, 'China')
dfs[key]=dfs[key].replace({'Province/State':'Queensland'}, 'Brisbane')
dfs[key]=dfs[key].replace({'Province/State':'New South Wales'}, 'Sydney')
dfs[key]=dfs[key].replace({'Province/State':'Victoria'}, 'Melbourne')
dfs[key]=dfs[key].replace({'Province/State':'South Australia'}, 'Adelaide')
# Add a zero to the date so can be convert by datetime.strptime as 0-padded date
dfs[key]['Last Update'] = '0' + dfs[key]['Last Update']
dfs[key]['Date_last_updated'] = [datetime.strptime(d, '%m/%d/%Y %H:%M') for d in dfs[key]['Last Update']]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment