Skip to content

Instantly share code, notes, and snippets.

@ebergam
Last active April 7, 2020 09:24
Show Gist options
  • Save ebergam/7725f1fd982a6e1078ace700ae99afed to your computer and use it in GitHub Desktop.
Save ebergam/7725f1fd982a6e1078ace700ae99afed to your computer and use it in GitHub Desktop.
import pandas as pd
import os
import matplotlib.pyplot as plt
import missingno as msno
## The script merges and reshapes the data provided by https://github.com/kylemcdonald/covid-mobility-data
## Assumes all tsv files are in a folder called "archive"
## Inputs different files, outputs longform tidy data.
bdf = pd.read_csv('archive/2020-03-29_ES_Mobility_Report_en.pdf.tsv', sep='\t', nrows=0)
for filename in os.listdir('archive'):
df = pd.read_csv('archive/{}'.format(filename), sep = '\t')
n = filename.replace('2020-03-29_', '')
n = n.replace('_Mobility_Report_en.pdf.tsv', '')
df['n'] = n
bdf = bdf.append(df)
print("Categories: {}".format(len(bdf.Category.unique())))
print("States: {}".format(len(bdf.n.unique())))
print("Regions: {}".format(len(bdf.Name.unique())))
## Stack data from wide to long shape
bdf = bdf.set_index(['Category', 'Name', 'n', 'Kind'])
# Reshape wide to long
d = bdf.stack(dropna=False).reset_index().rename(columns={'level_4':'date', 0: 'val', 'n':'CountryCode'})
d.date = pd.to_datetime(d.date, format='%Y-%m-%d') # convert dates to datetime
d = d.sort_values(['CountryCode', 'Name', 'date']).reset_index(drop=True) # sort data
d.to_csv('google_mobility.csv', index=False) # back it up
## Visualize all combinations for comparison
# Decomment and create and "all_charts/" folder
#from matplotlib import pyplot as plt; plt.ioff()
#%matplotlib agg
## For r in d.Region.unique(): #this would print *all* combos
#for r in ['Italy', 'Belgium', 'Spain']:
# for c in d.Category.unique():
# m = d[(d['Region']==r) & (d['Category']==c)]
# title = r + ' - ' + c
# fig = m.plot(x='date', y='val', title=title, style='k-', figsize=(8,5))
# plt.savefig('all_charts/'+title+'.png', quality=80, dpi=100, bbox_inches='tight')
### Viz to check code
# %matplotlib inline
# msno.matrix(d) #all
# msno.matrix(d[d['Kind']=='region']) #countrylevel
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment