Last active
April 8, 2020 17:39
-
-
Save Gro-Tsen/519cee2fcee431065a46757cf47096a6 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import pandas | |
import numpy as np | |
from sklearn.gaussian_process import GaussianProcessRegressor | |
from sklearn.gaussian_process.kernels import ConstantKernel as C, RBF, WhiteKernel as W, DotProduct as DP | |
import matplotlib | |
matplotlib.use('TkAgg') | |
from matplotlib import pyplot as plt | |
import os | |
## Liste des départements à sommer: | |
lstdeps = ['75', '92', '93', '94', '77', '78', '91', '95'] | |
## Données des urgences hospitalières et SOS médecins | |
## <URL: https://www.data.gouv.fr/fr/datasets/donnees-des-urgences-hospitalieres-et-de-sos-medecins-relatives-a-lepidemie-de-covid-19/ > | |
fn1 = '/tmp/sursaud-covid19-quotidien-2020-04-08-19h00-departement.csv' | |
fd1 = open(fn1, 'r') | |
df1 = pandas.read_csv(fd1, parse_dates=['date_de_passage']) | |
df1 = df1[df1['sursaud_cl_age_corona'] == '0'].copy() | |
tmp_deps = [np.array(df1[df1['dep'] == dep]['nbre_pass_corona']) for dep in lstdeps] | |
tmp_new_cases = sum(tmp_deps) | |
tmp_total_cases = tmp_new_cases.cumsum() | |
## Durée typique d'hospitalisation ~10j | |
tmp_total_cases_shifted = np.insert(tmp_total_cases, 0, np.full(10, 0)) | |
tmp_open_cases = tmp_total_cases - tmp_total_cases_shifted[0:len(tmp_total_cases)] | |
tmp_open_cases_variation = np.diff(np.insert(tmp_open_cases, 0, 0)) | |
starting_date = pandas.Timestamp('2020-02-24').date() | |
tmp_days = np.array((df1[df1['dep'] == '75']['date_de_passage'].dt.date - starting_date).dt.days) | |
data_urgn = pandas.DataFrame({'days': tmp_days, 'new_cases': tmp_new_cases, 'total_cases': tmp_total_cases, 'open_cases': tmp_open_cases, 'open_cases_variation': tmp_open_cases_variation}) | |
data_urgn = data_urgn.set_index('days') | |
data_urgn = data_urgn[data_urgn['total_cases']>=1] | |
# kernel = C() * RBF() + C() * DP() + W() | |
kernel = C() * RBF() + W() | |
gp = GaussianProcessRegressor(kernel = kernel, n_restarts_optimizer=200, normalize_y = True) | |
x = np.array(data_urgn.index).reshape(-1,1) | |
y = np.log(np.array(data_urgn['open_cases'])) | |
gp.fit(x, y) | |
pred = gp.predict(x) | |
data_urgn['smoothed_open_cases'] = np.exp(pred) | |
data_urgn['smoothed_open_cases_variation'] = np.diff(np.insert(np.exp(pred), 0, 0)) | |
data_urgn = data_urgn[data_urgn['smoothed_open_cases'] >= 15] | |
## Données hospitalières | |
## <URL: https://www.data.gouv.fr/fr/datasets/donnees-hospitalieres-relatives-a-lepidemie-de-covid-19/ > | |
fn2 = '/tmp/donnees-hospitalieres-covid19-2020-04-08-19h00.csv' | |
fd2 = open(fn2, 'r') | |
df2 = pandas.read_csv(fd2, sep=";", parse_dates=['jour']) | |
df2 = df2[df2['sexe'] == 0].copy() | |
tmp_deps = [np.array(df2[df2['dep'] == dep]['hosp']) for dep in lstdeps] | |
tmp_open_cases = sum(tmp_deps) | |
tmp_open_cases_variation = np.diff(np.insert(tmp_open_cases, 0, 0)) | |
tmp_days = np.array((df2[df2['dep'] == '75']['jour'].dt.date - starting_date).dt.days) | |
data_hosp = pandas.DataFrame({'days': tmp_days, 'open_cases': tmp_open_cases, 'open_cases_variation': tmp_open_cases_variation}) | |
data_hosp = data_hosp.set_index('days') | |
data_hosp = data_hosp[data_hosp['open_cases']>=1] | |
# kernel = C() * RBF() + C() * DP() + W() | |
kernel = C() * RBF() + W() | |
gp = GaussianProcessRegressor(kernel = kernel, n_restarts_optimizer=200, normalize_y = True) | |
x = np.array(data_hosp.index).reshape(-1,1) | |
y = np.log(np.array(data_hosp['open_cases'])) | |
gp.fit(x, y) | |
pred = gp.predict(x) | |
data_hosp['smoothed_open_cases'] = np.exp(pred) | |
data_hosp['smoothed_open_cases_variation'] = np.diff(np.insert(np.exp(pred), 0, 0)) | |
data_hosp = data_hosp.iloc[1:] | |
plt.scatter(data_urgn.index, data_urgn['open_cases_variation']/data_urgn['open_cases'], color='orange') | |
plt.scatter(data_hosp.index, data_hosp['open_cases_variation']/data_hosp['open_cases'], color='red') | |
plt.plot(data_urgn.index, data_urgn['smoothed_open_cases_variation']/data_urgn['smoothed_open_cases'], color='cyan') | |
plt.plot(data_hosp.index, data_hosp['smoothed_open_cases_variation']/data_hosp['smoothed_open_cases'], color='blue') | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment