Skip to content

Instantly share code, notes, and snippets.

@murbard
Last active March 29, 2020 06:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save murbard/c8a5574d1e52940bccc5c111ce6b2898 to your computer and use it in GitHub Desktop.
Save murbard/c8a5574d1e52940bccc5c111ce6b2898 to your computer and use it in GitHub Desktop.
import requests
import pandas
import numpy as np
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import ConstantKernel as C, RBF, WhiteKernel as W
import matplotlib
matplotlib.use('TkAgg')
from matplotlib import pyplot as plt
import os
filename = 'dpc-covid19-ita-regioni.csv'
if not os.path.exists(filename):
url = 'https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-regioni/%s' % filename
with open(filename, 'w') as fd:
fd.write(requests.get(url).text)
fd = open(filename, 'r')
df = pandas.read_csv(fd, parse_dates=['data'])
df = df.rename(columns={'totale_casi':'cases', 'data':'date'})
df = df[df['denominazione_regione'] == 'Lombardia'].copy()
df['days'] = (df['date'].dt.date - df['date'].iloc[0].date()).dt.days
df = df.set_index('days')
df['variation'] = df['cases'].diff()
df = df[df['cases'] >= 1]
kernel = C() * RBF() + W()
gp = GaussianProcessRegressor(kernel = kernel, n_restarts_optimizer=100, normalize_y = True)
X = np.array(df.index).reshape(-1,1)
y = np.log(np.array(df['cases']))
gp.fit(X, y)
pred, pred_cov = gp.predict(X, return_cov=True)
df['smoothed_cases'] = np.exp(pred)
variation_std = np.sqrt(pred_cov.diagonal()[:-1] + pred_cov.diagonal()[1:] - 2.0 * pred_cov.diagonal(offset=1))
variation_std = np.insert(variation_std, 0, np.nan)
dp = np.diff(pred, prepend=np.nan)
df['smoothed_returns'] = np.exp(dp) - 1
df['smoothed_returns_plus'] = np.exp(dp + 2.0 * variation_std) - 1
df['smoothed_returns_minus'] = np.exp(dp - 2.0 * variation_std) - 1
df = df[df['smoothed_cases'] >= 15][2:]
plt.scatter(df.index, df['variation'] / df['cases'], color='red')
plt.fill_between(df.index, df['smoothed_returns_minus'], df['smoothed_returns_plus'], alpha=0.2)
plt.plot(df.index, df['smoothed_returns'])
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment