Skip to content

Instantly share code, notes, and snippets.

@tommycarstensen
Created March 14, 2020 03:26
Show Gist options
  • Save tommycarstensen/fa5be3c3ba94e1c53dfe12cb67bdfe01 to your computer and use it in GitHub Desktop.
Save tommycarstensen/fa5be3c3ba94e1c53dfe12cb67bdfe01 to your computer and use it in GitHub Desktop.
Script to fit COVID19 cases to logistic sigmoid functions.
from datetime import datetime
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import argparse
import os
import requests
import random
parser = argparse.ArgumentParser()
parser.add_argument(
'--countries', nargs='*',
help='e.g. Denmark,Sweden,Norway,Finland,Iceland',
)
parser.add_argument(
'--region',
choices=('Europe', 'EU', 'Scandinavia'),
)
args = parser.parse_args()
dateToday = datetime.today().strftime('%Y-%m-%d')
url = 'https://www.ecdc.europa.eu/sites/default/files/documents/COVID-19-geographic-disbtribution-worldwide-{}.xls'.format(dateToday)
basename = os.path.basename(url)
if not os.path.isfile(basename):
with open(basename, 'wb') as f:
r = requests.get(url)
f.write(r.content)
df = pd.read_excel(basename)
d_region2countries = {
'EU': [
'Austria', 'Belgium', 'Bulgaria', 'Croatia', 'Cyprus', 'Czech Republic',
'Czech republic', 'Denmark', 'Estonia', 'Finland', 'France', 'Germany',
'Greece', 'Hungary', 'Ireland', 'Italy', 'Latvia', 'Lithuania',
'Luxembourg',
'Malta', 'Netherlands', 'Poland', 'Portugal', 'Romania', 'Slovakia',
'Slovenia', 'Spain', 'Sweden',
],
'Asia': (
'China', 'Japan', 'India', 'Indonesia', 'South Korea', 'Singapore',
'Bangladesh',
),
'Scandinavia': ('Denmark', 'Sweden', 'Norway'),
}
if not args.countries is None:
countries = ' '.join(args.countries).split(',')
title = ','.join(countries)
affix = ''.join(countries).replace(' ','')
elif not args.region is None:
countries = d_region2countries[args.region]
title = args.region
affix = args.region
else:
countries = df['CountryExp'].unique()
title = 'World'
affix = 'World'
df = df[df['CountryExp'].isin(countries)]
df2 = df.filter(['NewConfCases', 'DateRep', 'NewDeaths']).groupby('DateRep').sum()
# x = df2.index.strftime('%Y-%m-%d').values
y = df2['NewConfCases'].values.cumsum()
x = list(range(len(y)))
if max(y) < 1000: # Singapore 187
print('Insufficient cumulated cases (n={}) to carry out fitting.'.format(max(y)))
x = df2.index.strftime('%Y-%m-%d').values
print('\n'.join('{}\t{}'.format(x, y) for x,y in zip(x, y)))
exit()
def sigmoid(x, a, b):
y = 1 / (1 + np.exp(-b*(x-a)))
return y
def logistic(x, a, b, c):
# a is maximum
# b is steepness/inclination
# c is midpoint
y = a / (1 + np.exp(-b * (x - c)))
return y
# Seed values for regression.
p0=[10 * max(y), 0.5, 75 + list(y[::-1]).index(0)]
# https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.curve_fit.html
try:
popt, pcov = curve_fit(logistic, x, y, p0=p0)
except:
print('\n'.join('{}\t{}'.format(x, y) for x,y in zip(x, y)))
exit()
perr = np.sqrt(np.diag(pcov))
if popt[0] < max(y):
print('Insufficient cumulated cases (n={}) to carry out fitting.'.format(max(y)))
print(popt)
exit()
print('maximum', popt[0])
print('midpoint', popt[2] - 3 * perr[2], popt[2] + 3 * perr[2])
print('steepness', popt[0])
plt.xlabel('Days')
plt.ylabel('Cases')
x2 = list(range(2 * len(y)))
for i in range(1000):
a = random.triangular(popt[0] - 3 * perr[0], popt[0] + 3 * perr[0])
b = random.triangular(popt[1] - 3 * perr[1], popt[1] + 3 * perr[1])
c = random.triangular(popt[2] - 3 * perr[2], popt[2] + 3 * perr[2])
if a < max(y):
print('Insufficient cumulated cases (n={}) to carry out fitting.'.format(max(y)))
print(a, b, c)
exit()
assert a >= max(y)
assert b > 0
assert b < 1
assert c > 0
assert c < 2 * len(y)
y2 = [logistic(_, a, b, c) for _ in x2]
plt.plot(x2, y2, 'y', alpha=.05, zorder=1)
plt.scatter(x, y, c='red', label='Present and past confirmed cases', zorder=2)
plt.bar(x, df2['NewConfCases'].values, label='New confirmed cases', color='green', zorder=3)
# plt.bar(x, df2['NewDeaths'].values, label='New deaths', color='orange', zorder=3)
plt.plot(x2, logistic(x2, *popt), 'blue', label='Fitted sigmoid logistic function', zorder=2)
plt.plot(x2, y2, color='yellow', alpha=.05, label='Probable range of outcomes', zorder=2)
title = '{} {}'.format(title, dateToday)
title += '\np={:.3f}, Maximum={:d}, Midpoint={:d}, Steepness={:.2f}'.format(
1, int(popt[0]), int(popt[2]), popt[1])
title += '\nCurrent day={}'.format(len(x))
title += ', Total cases={}'.format(max(y))
title += ', Total deaths={}'.format(max(df2['NewDeaths'].values))
plt.title(title, fontsize='small')
plt.legend()
# plt.yscale('log')
path = 'COVID19_sigmoid_{}_{}.png'.format(affix, dateToday)
plt.savefig(path)
path = 'COVID19_sigmoid_{}.png'.format(affix)
plt.savefig(path)
print(path)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment