Skip to content

Instantly share code, notes, and snippets.

@tommycarstensen
Created March 31, 2020 01:42
Show Gist options
  • Save tommycarstensen/8f743419f78fb72115948311eca30e43 to your computer and use it in GitHub Desktop.
Save tommycarstensen/8f743419f78fb72115948311eca30e43 to your computer and use it in GitHub Desktop.
Script to generate CoViD-19 choropleth maps
# Tommy Carstensen, March 2020
import argparse
from datetime import datetime
import pandas as pd
import geopandas as gpd
import numpy as np
import math
import matplotlib.pyplot as plt
from matplotlib.colors import Normalize
import imageio
import shutil
import requests
import os
def main():
args = parse_args()
df1, df2 = parse_data(args)
df2['dateRep'] = pd.to_datetime(df2['dateRep'], format='%d/%m/%Y')
# Cumulated sum by country (alpha3).
df2 = df2[['alpha3', 'dateRep', 'cases', 'deaths']].groupby(['alpha3', 'dateRep']).sum().groupby(level=[0]).cumsum().reset_index()
# Fill intermittently missing data for each alpha3 group after resampling grouped dataframe.
df2 = df2.set_index('dateRep').groupby('alpha3').resample('1D').ffill().reset_index(level='dateRep').reset_index(drop=True)
df1.rename(columns={'iso_a3': 'alpha3'}, inplace=True)
# df = pd.merge(df1, df2, on=['alpha3'], how='left').fillna(value={'cases': 0, 'deaths': 0})
# https://matplotlib.org/examples/color/colormaps_reference.html
# maxDateRep = max(df2['dateRep'].unique())
# for boolLog in (False, True,):
boolLog = True
cmap = 'OrRd'
for column in ('cases', 'deaths'):
# df = pd.merge(df1, df2[df2['dateRep'] == maxDateRep], on=['alpha3'], how='left').fillna(value={'cases': 0, 'deaths': 0})
# zlim_max = max(10**6 * df[column] / df['pop_est'])
valuemax = {
'cases': 10000, # Italy 1224.1
'deaths': 1000, # Italy 123.5
}[column]
# ax.clim(0, 100)
# for cmap in ('OrRd', 'YlGn'):
# import matplotlib as mpl
# cb = ColorbarBase(
# ax, cmap=mpl.cm.cool,
# norm = Normalize(vmin=0, vmax=vmax),
# orientation = 'horizontal',
# )
images = []
for DateRep in sorted(df2['dateRep'].unique()):
try:
dateString = pd.to_datetime(DateRep).strftime('%Y-%m-%d')
except ValueError:
continue
print(cmap, column, dateString)
df = pd.merge(df1, df2[df2['dateRep'] == DateRep], on=['alpha3'], how='left').fillna(value={'cases': 0, 'deaths': 0})
if boolLog is True:
df['proportion'] = np.log10(10**6 * df[column] / df['pop_est'])
df.loc[df['proportion'] == -math.inf, 'proportion'] = None
vmax = int(math.log10(valuemax))
vmin = {'cases': -2, 'deaths': -3}[column]
label = 'log10 of {} per 1 million'.format(column)
else:
df['proportion'] = 10**6 * df[column] / df['pop_est']
vmin = 0
vmax = 10 ** math.ceil(math.log10(max(0.001, max(df['proportion']))))
label = '{} per 1 million'.format(column)
# df = df[df['proportion'] != -math.inf]
# df_plot['proportion'].multiply(10**6) # per million
# https://geopandas.org/mapping.html
fig, ax = plt.subplots(1, 1)
ax.axis('off')
ax.set_title(
'CoViD19 {}\n{}'.format(
column, dateString), fontsize='large')
# vmax = int(math.log10(max(0.001, max(df['proportion']))))
df.plot(
column='proportion',
cmap=cmap,
linewidth=0.1,
ax=ax,
edgecolor='black',
legend=True,
legend_kwds = {
'label': label,
'orientation': "horizontal",
'norm': Normalize(vmin=0, vmax=vmax),
# 'properties': {'size': 'xx-small'},
},
missing_kwds={'color': 'white'},
vmin = vmin,
# vmax = vmax,
vmax = vmax,
)
# https://stackoverflow.com/questions/53158096/editing-colorbar-legend-in-geopandas
# pcm = ax[0].pcolor(X, Y, Z,
# norm=colors.LogNorm(vmin=Z.min(), vmax=Z.max()),
# cmap='PuBu_r')
# fig.colorbar(pcm, ax=ax[0], extend='max')
# plt.tight_layout()
path = 'covid19_{}_{}_log{}_{}.png'.format(
column, cmap, boolLog, dateString,
)
# fig.colorbar()
plt.savefig(path, dpi=100)
images.append(imageio.imread(path))
os.remove(path)
plt.close()
shutil.copyfile(path, 'covid19_{}_{}_log{}.png'.format(
column, cmap, boolLog))
imageio.mimsave('covid19_{}_{}_log{}.gif'.format(column, cmap, boolLog), images, fps=2)
return
def parse_data(args):
url = 'https://opendata.ecdc.europa.eu/covid19/casedistribution/csv'
r = requests.get(url)
with open('csv', 'w') as f:
f.write(r.text)
df_covid19 = pd.read_csv('csv')
df_covid19.rename(columns={
'geoId': 'alpha2',
'countryterritoryCode': 'alpha3',
}, inplace=True)
# # Get ISO 3166-1 alpha-2 and alpha-3 codes for each country.
# df_iso3166 = pd.DataFrame(iso3166.countries)
# # Merge geo data with alpha2 codes on alpha3 codes.
# df1 = pd.merge(df_geo, df_iso3166, on=['alpha3'])
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
world = world[(world.pop_est>0) & (world.name!="Antarctica")]
df1 = world
# Fix error in dataset.
# https://github.com/geopandas/geopandas/issues/1041
print(df1[df1['iso_a3'] == '-99']['name'])
df1.loc[df1['name'] == 'France', 'iso_a3'] = 'FRA'
df1.loc[df1['name'] == 'Norway', 'iso_a3'] = 'NOR'
df1.loc[df1['name'] == 'Somaliland', 'iso_a3'] = 'SOM'
df1.loc[df1['name'] == 'Kosovo', 'iso_a3'] = 'RKS'
print(df1[df1['iso_a3'] == '-99']['name'])
# print(set(df_covid19['alpha2'].values) - set(df_iso3166['alpha2'].values))
# df_covid19.loc[df_covid19['alpha2'] == 'UK', 'alpha2'] = 'GB'
print(set(df_covid19['alpha3'].values) - set(df1['iso_a3'].values))
print(set(df1['iso_a3'].values) - set(df_covid19['alpha3'].values))
for x in set(df_covid19['alpha3'].values) - set(df1['iso_a3'].values):
print(df_covid19[df_covid19['alpha3'] == x]['countriesAndTerritories'].unique())
# # Merge covid19 data with alpha3 codes on alpha2 codes.
# df2 = pd.merge(df_covid19, df_iso3166, on=['alpha2'])
df2 = df_covid19
# # Merge geo data with covid19 data on alpha2 codes.
# df = pd.merge(df, df_covid19.rename(columns={'GeoId': 'alpha2'}), on=['alpha2'])[['admin', 'alpha3', 'cases', 'deaths', 'dateRep']]
return df1, df2
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument(
'--date', default=datetime.today().strftime('%Y-%m-%d'),
help='Date in ISO 8601 format YYYY-MM-DD',
required=False,
)
args = parser.parse_args()
return args
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment