Skip to content

Instantly share code, notes, and snippets.

@Gro-Tsen
Last active April 8, 2020 17:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Gro-Tsen/519cee2fcee431065a46757cf47096a6 to your computer and use it in GitHub Desktop.
Save Gro-Tsen/519cee2fcee431065a46757cf47096a6 to your computer and use it in GitHub Desktop.
import requests
import pandas
import numpy as np
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import ConstantKernel as C, RBF, WhiteKernel as W, DotProduct as DP
import matplotlib
matplotlib.use('TkAgg')
from matplotlib import pyplot as plt
import os
## Liste des départements à sommer:
lstdeps = ['75', '92', '93', '94', '77', '78', '91', '95']
## Données des urgences hospitalières et SOS médecins
## <URL: https://www.data.gouv.fr/fr/datasets/donnees-des-urgences-hospitalieres-et-de-sos-medecins-relatives-a-lepidemie-de-covid-19/ >
fn1 = '/tmp/sursaud-covid19-quotidien-2020-04-08-19h00-departement.csv'
fd1 = open(fn1, 'r')
df1 = pandas.read_csv(fd1, parse_dates=['date_de_passage'])
df1 = df1[df1['sursaud_cl_age_corona'] == '0'].copy()
tmp_deps = [np.array(df1[df1['dep'] == dep]['nbre_pass_corona']) for dep in lstdeps]
tmp_new_cases = sum(tmp_deps)
tmp_total_cases = tmp_new_cases.cumsum()
## Durée typique d'hospitalisation ~10j
tmp_total_cases_shifted = np.insert(tmp_total_cases, 0, np.full(10, 0))
tmp_open_cases = tmp_total_cases - tmp_total_cases_shifted[0:len(tmp_total_cases)]
tmp_open_cases_variation = np.diff(np.insert(tmp_open_cases, 0, 0))
starting_date = pandas.Timestamp('2020-02-24').date()
tmp_days = np.array((df1[df1['dep'] == '75']['date_de_passage'].dt.date - starting_date).dt.days)
data_urgn = pandas.DataFrame({'days': tmp_days, 'new_cases': tmp_new_cases, 'total_cases': tmp_total_cases, 'open_cases': tmp_open_cases, 'open_cases_variation': tmp_open_cases_variation})
data_urgn = data_urgn.set_index('days')
data_urgn = data_urgn[data_urgn['total_cases']>=1]
# kernel = C() * RBF() + C() * DP() + W()
kernel = C() * RBF() + W()
gp = GaussianProcessRegressor(kernel = kernel, n_restarts_optimizer=200, normalize_y = True)
x = np.array(data_urgn.index).reshape(-1,1)
y = np.log(np.array(data_urgn['open_cases']))
gp.fit(x, y)
pred = gp.predict(x)
data_urgn['smoothed_open_cases'] = np.exp(pred)
data_urgn['smoothed_open_cases_variation'] = np.diff(np.insert(np.exp(pred), 0, 0))
data_urgn = data_urgn[data_urgn['smoothed_open_cases'] >= 15]
## Données hospitalières
## <URL: https://www.data.gouv.fr/fr/datasets/donnees-hospitalieres-relatives-a-lepidemie-de-covid-19/ >
fn2 = '/tmp/donnees-hospitalieres-covid19-2020-04-08-19h00.csv'
fd2 = open(fn2, 'r')
df2 = pandas.read_csv(fd2, sep=";", parse_dates=['jour'])
df2 = df2[df2['sexe'] == 0].copy()
tmp_deps = [np.array(df2[df2['dep'] == dep]['hosp']) for dep in lstdeps]
tmp_open_cases = sum(tmp_deps)
tmp_open_cases_variation = np.diff(np.insert(tmp_open_cases, 0, 0))
tmp_days = np.array((df2[df2['dep'] == '75']['jour'].dt.date - starting_date).dt.days)
data_hosp = pandas.DataFrame({'days': tmp_days, 'open_cases': tmp_open_cases, 'open_cases_variation': tmp_open_cases_variation})
data_hosp = data_hosp.set_index('days')
data_hosp = data_hosp[data_hosp['open_cases']>=1]
# kernel = C() * RBF() + C() * DP() + W()
kernel = C() * RBF() + W()
gp = GaussianProcessRegressor(kernel = kernel, n_restarts_optimizer=200, normalize_y = True)
x = np.array(data_hosp.index).reshape(-1,1)
y = np.log(np.array(data_hosp['open_cases']))
gp.fit(x, y)
pred = gp.predict(x)
data_hosp['smoothed_open_cases'] = np.exp(pred)
data_hosp['smoothed_open_cases_variation'] = np.diff(np.insert(np.exp(pred), 0, 0))
data_hosp = data_hosp.iloc[1:]
plt.scatter(data_urgn.index, data_urgn['open_cases_variation']/data_urgn['open_cases'], color='orange')
plt.scatter(data_hosp.index, data_hosp['open_cases_variation']/data_hosp['open_cases'], color='red')
plt.plot(data_urgn.index, data_urgn['smoothed_open_cases_variation']/data_urgn['smoothed_open_cases'], color='cyan')
plt.plot(data_hosp.index, data_hosp['smoothed_open_cases_variation']/data_hosp['smoothed_open_cases'], color='blue')
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment