Skip to content

Instantly share code, notes, and snippets.

@tizianolattisi
Last active November 27, 2020 18:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save tizianolattisi/2cb6458d929803c9e5d70a68ff4d09df to your computer and use it in GitHub Desktop.
Save tizianolattisi/2cb6458d929803c9e5d70a68ff4d09df to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import pandas as pd
url = 'https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-andamento-nazionale/dpc-covid19-ita-andamento-nazionale.csv'
usecols=[
'data',
'tamponi',
'nuovi_positivi',
'deceduti']
df = pd.read_csv(url, usecols=usecols)
df['data'] = pd.to_datetime(df['data'], format="%Y-%m-%dT%H:00:00")
df = df[df['data']>'2020-07-31T17:00:00']
r = range(1, 30)
l = len(df)
df['delta_tamponi'] = 0.0
df['delta_deceduti'] = 0.0
for i in r:
df['delta_deceduti_%d' % i] = None
df.reset_index(inplace=True)
for idx in df.index:
if idx>0:
df.at[idx, 'delta_tamponi'] = df['tamponi'][idx] - df['tamponi'][idx-1]
df.at[idx, 'delta_deceduti'] = df['deceduti'][idx] - df['deceduti'][idx-1]
for idx in df.index:
for i in r:
if idx+i<l:
df.at[idx, 'delta_deceduti_%d' % i] = df['delta_deceduti'][idx+i]
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score
import numpy as np
ss = StandardScaler()
models = []
scores = []
predictions = []
for i in r:
target = 'delta_deceduti_%d' % i
df2 = df.dropna(subset=[target], inplace=False)
X = df2[['delta_tamponi', 'nuovi_positivi']]
y = df2[target]
partial_scores = []
for j in range(100):
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2)
X_train = ss.fit_transform(X_train)
X_test = ss.transform(X_test)
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
partial_scores.append(r2_score(y_pred, y_test))
scores.append(np.mean(partial_scores))
import matplotlib.pyplot as plt
line = plt.bar(r, scores)
line.set_label('scores')
plt.legend()
plt.gca().set(title='R2 score of the regression models', xlabel='n', ylabel='r2 score')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment