tommycarstensen/covid19.py

## covid19.py
from datetime import datetime
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import argparse
import os
import requests
import random

parser = argparse.ArgumentParser()
parser.add_argument(
    '--countries', nargs='*',
    help='e.g. Denmark,Sweden,Norway,Finland,Iceland',
    )
parser.add_argument(
    '--region',
    choices=('Europe', 'EU', 'Scandinavia'),
    )
args = parser.parse_args()

dateToday = datetime.today().strftime('%Y-%m-%d')
url = 'https://www.ecdc.europa.eu/sites/default/files/documents/COVID-19-geographic-disbtribution-worldwide-{}.xls'.format(dateToday)
basename = os.path.basename(url)
if not os.path.isfile(basename):
    with open(basename, 'wb') as f:
        r = requests.get(url)
        f.write(r.content)
df = pd.read_excel(basename)

d_region2countries = {
    'EU': [
        'Austria', 'Belgium', 'Bulgaria', 'Croatia', 'Cyprus', 'Czech Republic',
        'Czech republic', 'Denmark', 'Estonia', 'Finland', 'France', 'Germany',
        'Greece', 'Hungary', 'Ireland', 'Italy', 'Latvia', 'Lithuania',
        'Luxembourg',
        'Malta', 'Netherlands', 'Poland', 'Portugal', 'Romania', 'Slovakia',
        'Slovenia', 'Spain', 'Sweden',
        ],
    'Asia': (
        'China', 'Japan', 'India', 'Indonesia', 'South Korea', 'Singapore',
        'Bangladesh',
        ),
    'Scandinavia': ('Denmark', 'Sweden', 'Norway'),
    }

if not args.countries is None:
    countries = ' '.join(args.countries).split(',')
    title = ','.join(countries)
    affix = ''.join(countries).replace(' ','')
elif not args.region is None:
    countries = d_region2countries[args.region]
    title = args.region
    affix = args.region
else:
    countries = df['CountryExp'].unique()
    title = 'World'
    affix = 'World'

df = df[df['CountryExp'].isin(countries)]

df2 = df.filter(['NewConfCases', 'DateRep', 'NewDeaths']).groupby('DateRep').sum()

# x = df2.index.strftime('%Y-%m-%d').values
y = df2['NewConfCases'].values.cumsum()
x = list(range(len(y)))

if max(y) < 1000:  # Singapore 187
    print('Insufficient cumulated cases (n={}) to carry out fitting.'.format(max(y)))
    x = df2.index.strftime('%Y-%m-%d').values
    print('\n'.join('{}\t{}'.format(x, y) for x,y in zip(x, y)))
    exit()

def sigmoid(x, a, b):
     y = 1 / (1 + np.exp(-b*(x-a)))
     return y

def logistic(x, a, b, c):
    # a is maximum
    # b is steepness/inclination
    # c is midpoint
    y = a / (1 + np.exp(-b * (x - c)))
    return y

# Seed values for regression.
p0=[10 * max(y), 0.5, 75 + list(y[::-1]).index(0)]
# https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.curve_fit.html
try:
    popt, pcov = curve_fit(logistic, x, y, p0=p0)
except:
    print('\n'.join('{}\t{}'.format(x, y) for x,y in zip(x, y)))
    exit()
perr = np.sqrt(np.diag(pcov))
if popt[0] < max(y):
    print('Insufficient cumulated cases (n={}) to carry out fitting.'.format(max(y)))
    print(popt)
    exit()
print('maximum', popt[0])
print('midpoint', popt[2] - 3 * perr[2], popt[2] + 3 * perr[2])
print('steepness', popt[0])

plt.xlabel('Days')
plt.ylabel('Cases')
x2 = list(range(2 * len(y)))
for i in range(1000):
    a = random.triangular(popt[0] - 3 * perr[0], popt[0] + 3 * perr[0])
    b = random.triangular(popt[1] - 3 * perr[1], popt[1] + 3 * perr[1])
    c = random.triangular(popt[2] - 3 * perr[2], popt[2] + 3 * perr[2])
    if a < max(y):
        print('Insufficient cumulated cases (n={}) to carry out fitting.'.format(max(y)))
        print(a, b, c)
        exit()
    assert a >= max(y)
    assert b > 0
    assert b < 1
    assert c > 0
    assert c < 2 * len(y)
    y2 = [logistic(_, a, b, c) for _ in x2]
    plt.plot(x2, y2, 'y', alpha=.05, zorder=1)
plt.scatter(x, y, c='red', label='Present and past confirmed cases', zorder=2)
plt.bar(x, df2['NewConfCases'].values, label='New confirmed cases', color='green', zorder=3)
# plt.bar(x, df2['NewDeaths'].values, label='New deaths', color='orange', zorder=3)
plt.plot(x2, logistic(x2, *popt), 'blue', label='Fitted sigmoid logistic function', zorder=2)
plt.plot(x2, y2, color='yellow', alpha=.05, label='Probable range of outcomes', zorder=2)
title = '{} {}'.format(title, dateToday)
title += '\np={:.3f}, Maximum={:d}, Midpoint={:d}, Steepness={:.2f}'.format(
    1, int(popt[0]), int(popt[2]), popt[1])
title += '\nCurrent day={}'.format(len(x))
title += ', Total cases={}'.format(max(y))
title += ', Total deaths={}'.format(max(df2['NewDeaths'].values))
plt.title(title, fontsize='small')
plt.legend()
# plt.yscale('log')
path = 'COVID19_sigmoid_{}_{}.png'.format(affix, dateToday)
plt.savefig(path)
path = 'COVID19_sigmoid_{}.png'.format(affix)
plt.savefig(path)
print(path)
	from datetime import datetime
	from scipy.optimize import curve_fit
	import matplotlib.pyplot as plt
	import numpy as np
	import pandas as pd
	import argparse
	import os
	import requests
	import random

	parser = argparse.ArgumentParser()
	parser.add_argument(
	'--countries', nargs='*',
	help='e.g. Denmark,Sweden,Norway,Finland,Iceland',
	)
	parser.add_argument(
	'--region',
	choices=('Europe', 'EU', 'Scandinavia'),
	)
	args = parser.parse_args()

	dateToday = datetime.today().strftime('%Y-%m-%d')
	url = 'https://www.ecdc.europa.eu/sites/default/files/documents/COVID-19-geographic-disbtribution-worldwide-{}.xls'.format(dateToday)
	basename = os.path.basename(url)
	if not os.path.isfile(basename):
	with open(basename, 'wb') as f:
	r = requests.get(url)
	f.write(r.content)
	df = pd.read_excel(basename)

	d_region2countries = {
	'EU': [
	'Austria', 'Belgium', 'Bulgaria', 'Croatia', 'Cyprus', 'Czech Republic',
	'Czech republic', 'Denmark', 'Estonia', 'Finland', 'France', 'Germany',
	'Greece', 'Hungary', 'Ireland', 'Italy', 'Latvia', 'Lithuania',
	'Luxembourg',
	'Malta', 'Netherlands', 'Poland', 'Portugal', 'Romania', 'Slovakia',
	'Slovenia', 'Spain', 'Sweden',
	],
	'Asia': (
	'China', 'Japan', 'India', 'Indonesia', 'South Korea', 'Singapore',
	'Bangladesh',
	),
	'Scandinavia': ('Denmark', 'Sweden', 'Norway'),
	}

	if not args.countries is None:
	countries = ' '.join(args.countries).split(',')
	title = ','.join(countries)
	affix = ''.join(countries).replace(' ','')
	elif not args.region is None:
	countries = d_region2countries[args.region]
	title = args.region
	affix = args.region
	else:
	countries = df['CountryExp'].unique()
	title = 'World'
	affix = 'World'

	df = df[df['CountryExp'].isin(countries)]

	df2 = df.filter(['NewConfCases', 'DateRep', 'NewDeaths']).groupby('DateRep').sum()

	# x = df2.index.strftime('%Y-%m-%d').values
	y = df2['NewConfCases'].values.cumsum()
	x = list(range(len(y)))

	if max(y) < 1000: # Singapore 187
	print('Insufficient cumulated cases (n={}) to carry out fitting.'.format(max(y)))
	x = df2.index.strftime('%Y-%m-%d').values
	print('\n'.join('{}\t{}'.format(x, y) for x,y in zip(x, y)))
	exit()

	def sigmoid(x, a, b):
	y = 1 / (1 + np.exp(-b*(x-a)))
	return y

	def logistic(x, a, b, c):
	# a is maximum
	# b is steepness/inclination
	# c is midpoint
	y = a / (1 + np.exp(-b * (x - c)))
	return y

	# Seed values for regression.
	p0=[10 * max(y), 0.5, 75 + list(y[::-1]).index(0)]
	# https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.curve_fit.html
	try:
	popt, pcov = curve_fit(logistic, x, y, p0=p0)
	except:
	print('\n'.join('{}\t{}'.format(x, y) for x,y in zip(x, y)))
	exit()
	perr = np.sqrt(np.diag(pcov))
	if popt[0] < max(y):
	print('Insufficient cumulated cases (n={}) to carry out fitting.'.format(max(y)))
	print(popt)
	exit()
	print('maximum', popt[0])
	print('midpoint', popt[2] - 3 * perr[2], popt[2] + 3 * perr[2])
	print('steepness', popt[0])

	plt.xlabel('Days')
	plt.ylabel('Cases')
	x2 = list(range(2 * len(y)))
	for i in range(1000):
	a = random.triangular(popt[0] - 3 * perr[0], popt[0] + 3 * perr[0])
	b = random.triangular(popt[1] - 3 * perr[1], popt[1] + 3 * perr[1])
	c = random.triangular(popt[2] - 3 * perr[2], popt[2] + 3 * perr[2])
	if a < max(y):
	print('Insufficient cumulated cases (n={}) to carry out fitting.'.format(max(y)))
	print(a, b, c)
	exit()
	assert a >= max(y)
	assert b > 0
	assert b < 1
	assert c > 0
	assert c < 2 * len(y)
	y2 = [logistic(_, a, b, c) for _ in x2]
	plt.plot(x2, y2, 'y', alpha=.05, zorder=1)
	plt.scatter(x, y, c='red', label='Present and past confirmed cases', zorder=2)
	plt.bar(x, df2['NewConfCases'].values, label='New confirmed cases', color='green', zorder=3)
	# plt.bar(x, df2['NewDeaths'].values, label='New deaths', color='orange', zorder=3)
	plt.plot(x2, logistic(x2, *popt), 'blue', label='Fitted sigmoid logistic function', zorder=2)
	plt.plot(x2, y2, color='yellow', alpha=.05, label='Probable range of outcomes', zorder=2)
	title = '{} {}'.format(title, dateToday)
	title += '\np={:.3f}, Maximum={:d}, Midpoint={:d}, Steepness={:.2f}'.format(
	1, int(popt[0]), int(popt[2]), popt[1])
	title += '\nCurrent day={}'.format(len(x))
	title += ', Total cases={}'.format(max(y))
	title += ', Total deaths={}'.format(max(df2['NewDeaths'].values))
	plt.title(title, fontsize='small')
	plt.legend()
	# plt.yscale('log')
	path = 'COVID19_sigmoid_{}_{}.png'.format(affix, dateToday)
	plt.savefig(path)
	path = 'COVID19_sigmoid_{}.png'.format(affix)
	plt.savefig(path)
	print(path)