Santiago Casas santiagocasas

## plotmodels.py
ax.scatter(df_dict[country].index ,y ,label=country,color=col)# Predicted logistic curve

ax.plot(pred_date, mean,
        label=chosen_plotmodel+" model", color=col)  #Predicted Logistic Curve

ax.fill_between(pred_date, upper_lim, lower_lim, color=col, alpha=0.4)

ax.plot(pred_date, [expo_model(i,p, N0=y0) for i in pred_x], '--',
        label="Exponential model", color=col)

## plotdates.py
#getting the saved asymptote
flatasymp = max([c_pars[(country,chosen_plotmodel,'c_time')] for country in countries_list])+1

#finding the minimum date, namely date of first case among chosen countries
mindate = min([df_dict[country].index[0] for country in countries_list])

#creating a date range for the plot for each country
pred_date = pd.date_range(start=df_dict[country].index[0], periods=flatasymp).values

# using the mindate - 3days and the last predicted date + 3days for the x-limits of the plot

## colors.py
color_list = plt.cm.Set1( np.linspace(0.,1.0, 9 ) )
color_iter = iter(color_list)
...
...
col=next(color_iter)

## prediction.py
chosenmodel='Logistic'

if chosenmodel=="Logistic":
    print("Chosen Model: ", chosenmodel)
    reg_model = logistic_model
elif chosenmodel=='Gompertz':
    print("Chosen Model: ", chosenmodel)
    reg_model = gompertz_model


## asymp.py
y_pred = logistic_model(x,ai,bi,ci)
MSLE=sklm.mean_squared_log_error(y,y_pred)
print("Mean squared log error (MSLE): ", '{:.3f}'.format(MSLE))
print("Exp of RMSLE: ", '{:.3f}'.format(np.exp(np.sqrt(MSLE))))
print("R2 score: ", '{:.3f}'.format(sklm.r2_score(y,y_pred)))

perc_flat = 0.98
sol = int(fsolve(lambda x : logistic_model(x,ai,bi,ci) - perc_flat*int(ci), bi))
print('Day of flattening of the infection curve')
datesol = datetime.strftime(df_dict[country].index[0] + timedelta(days=sol), ' %d, %b %Y' )

## optimize.py
    print('>>> Logistic Model')
    fit_i = curve_fit(logistic_model,x,y,p0=[3,20,5000], maxfev=10000)#, bounds=([0,0,0],[10,100,150000]))
    ai,bi,ci = fit_i[0]
    sigma_ai, sigma_bi, sigma_ci = np.sqrt(np.diag(fit_i[1]))

    c_pars[(country,'Logistic','a')] = ai
    c_pars[(country,'Logistic','b')] = bi
    c_pars[(country,'Logistic','c')] = ci
    c_pars[(country,'Logistic','sga')] = sigma_ai
    c_pars[(country,'Logistic','sgb')] = sigma_bi

## loaddata.py
leaveout = -1

for country in countries_list:
    print('----')
    print('Country: ', country)
    if leaveout==0:
      leaveout=None

    x = np.array(list(df_dict[country]['DayCount'].values))[0:leaveout]  ##remove last data point to make prediction
    y = np.array(list(df_dict[country]['Cases'].values))[0:leaveout]    ##remove last data point to make prediction

## import.py
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pandas.plotting import register_matplotlib_converters
##needed to properly use datetime in plots
register_matplotlib_converters()
from datetime import datetime,timedelta
from sklearn.metrics import mean_squared_error
from scipy.optimize import curve_fit
from scipy.optimize import fsolve

## plotmodels.py
tt= np.linspace(0,100,100)
plt.plot(tt, logistic_model(tt,4.8,50,100000), c='b', label='Logisitc')
plt.plot(tt, expo_model(tt, 0.23), c='g', label='Exponential')
plt.plot(tt, gompertz_model(tt, 12, 49, 100000), c='r', label='Gompertz')
plt.legend()
plt.xlabel('Time')
plt.ylabel('Cases')
plt.ylim(-1000,150000)

## models.py
def expo_model(x,p,N0=3,x0=1):
    return N0*(1+p)**(x-x0)

def logistic_model(x,a,b,c):
    return c/(1+np.exp(-(x-b)/a))

def gompertz_model(x,a,b,c):
    return c*np.exp(-b*np.exp(-x/a))
	ax.scatter(df_dict[country].index ,y ,label=country,color=col)# Predicted logistic curve

	ax.plot(pred_date, mean,
	label=chosen_plotmodel+" model", color=col) #Predicted Logistic Curve

	ax.fill_between(pred_date, upper_lim, lower_lim, color=col, alpha=0.4)

	ax.plot(pred_date, [expo_model(i,p, N0=y0) for i in pred_x], '--',
	label="Exponential model", color=col)
	#getting the saved asymptote
	flatasymp = max([c_pars[(country,chosen_plotmodel,'c_time')] for country in countries_list])+1

	#finding the minimum date, namely date of first case among chosen countries
	mindate = min([df_dict[country].index[0] for country in countries_list])

	#creating a date range for the plot for each country
	pred_date = pd.date_range(start=df_dict[country].index[0], periods=flatasymp).values

	# using the mindate - 3days and the last predicted date + 3days for the x-limits of the plot
	color_list = plt.cm.Set1( np.linspace(0.,1.0, 9 ) )
	color_iter = iter(color_list)
	...
	...
	col=next(color_iter)
	chosenmodel='Logistic'

	if chosenmodel=="Logistic":
	print("Chosen Model: ", chosenmodel)
	reg_model = logistic_model
	elif chosenmodel=='Gompertz':
	print("Chosen Model: ", chosenmodel)
	reg_model = gompertz_model
	y_pred = logistic_model(x,ai,bi,ci)
	MSLE=sklm.mean_squared_log_error(y,y_pred)
	print("Mean squared log error (MSLE): ", '{:.3f}'.format(MSLE))
	print("Exp of RMSLE: ", '{:.3f}'.format(np.exp(np.sqrt(MSLE))))
	print("R2 score: ", '{:.3f}'.format(sklm.r2_score(y,y_pred)))

	perc_flat = 0.98
	sol = int(fsolve(lambda x : logistic_model(x,ai,bi,ci) - perc_flat*int(ci), bi))
	print('Day of flattening of the infection curve')
	datesol = datetime.strftime(df_dict[country].index[0] + timedelta(days=sol), ' %d, %b %Y' )
	print('>>> Logistic Model')
	fit_i = curve_fit(logistic_model,x,y,p0=[3,20,5000], maxfev=10000)#, bounds=([0,0,0],[10,100,150000]))
	ai,bi,ci = fit_i[0]
	sigma_ai, sigma_bi, sigma_ci = np.sqrt(np.diag(fit_i[1]))

	c_pars[(country,'Logistic','a')] = ai
	c_pars[(country,'Logistic','b')] = bi
	c_pars[(country,'Logistic','c')] = ci
	c_pars[(country,'Logistic','sga')] = sigma_ai
	c_pars[(country,'Logistic','sgb')] = sigma_bi
	leaveout = -1

	for country in countries_list:
	print('----')
	print('Country: ', country)
	if leaveout==0:
	leaveout=None

	x = np.array(list(df_dict[country]['DayCount'].values))[0:leaveout] ##remove last data point to make prediction
	y = np.array(list(df_dict[country]['Cases'].values))[0:leaveout] ##remove last data point to make prediction
	import numpy as np
	import pandas as pd
	import matplotlib.pyplot as plt
	from pandas.plotting import register_matplotlib_converters
	##needed to properly use datetime in plots
	register_matplotlib_converters()
	from datetime import datetime,timedelta
	from sklearn.metrics import mean_squared_error
	from scipy.optimize import curve_fit
	from scipy.optimize import fsolve
	tt= np.linspace(0,100,100)
	plt.plot(tt, logistic_model(tt,4.8,50,100000), c='b', label='Logisitc')
	plt.plot(tt, expo_model(tt, 0.23), c='g', label='Exponential')
	plt.plot(tt, gompertz_model(tt, 12, 49, 100000), c='r', label='Gompertz')
	plt.legend()
	plt.xlabel('Time')
	plt.ylabel('Cases')
	plt.ylim(-1000,150000)
	def expo_model(x,p,N0=3,x0=1):
	return N0(1+p)*(x-x0)

	def logistic_model(x,a,b,c):
	return c/(1+np.exp(-(x-b)/a))

	def gompertz_model(x,a,b,c):
	return cnp.exp(-bnp.exp(-x/a))