Skip to content

Instantly share code, notes, and snippets.

@knowblesse
Last active December 22, 2020 03:46
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save knowblesse/1e770aa41fb609189bceaea6f639fd9c to your computer and use it in GitHub Desktop.
import sklearn
if (sklearn.__version__ != '0.23.2'):
raise Exception("scikit-learn package version must be 0.23.2")
import os
import numpy as np
from scipy.io import loadmat
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
import seaborn as snsimport pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import matplotlib
matplotlib.rc('font', family='HCR Batang')
df = pd.read_csv('https://covid.ourworldindata.org/data/owid-covid-data.csv')
# Select KOR data
data_KOR = df.loc[df['iso_code']=='KOR',['date','total_cases','new_cases']].dropna().reset_index(drop=True)
# Generate datasets
num_consecutive_data = 7
predict_day = 1
data = np.array(data_KOR['new_cases'], dtype=int)
num_data = data.shape[0]
X = np.zeros([num_data-num_consecutive_data - predict_day + 1, num_consecutive_data], dtype=int)
Y = np.zeros([num_data-num_consecutive_data - predict_day + 1], dtype=int)
for i in range(num_data-num_consecutive_data - predict_day + 1):
X[i, :] = data[i:i+num_consecutive_data]
Y[i] = data[i+num_consecutive_data + predict_day - 1]
# Divide Train/Test datasets
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.1)
# Generate Regressor
from sklearn.neural_network import MLPRegressor
reg = MLPRegressor(activation='relu', max_iter=5000, batch_size=1, learning_rate='constant', learning_rate_init=0.001, alpha=1e-2)
reg.fit(X_train, Y_train)
# Continuous Graph
tick_length = 30
real = np.array(data_KOR['new_cases'])
aligned_predicted = np.hstack([np.zeros((num_consecutive_data + predict_day - 2,)),reg.predict(X)])
plt.figure(1, figsize=(5,5))
plt.clf()
plt.plot(real[np.arange(100,150)],c='b')
plt.plot(aligned_predicted[np.arange(100,150)], c='r')
#plt.plot(real,c='b')
#plt.plot(aligned_predicted, c='r')
plt.show()
plt.figure(2, figsize=(5,5))
plt.clf()
plt.plot(real[np.arange(200,250)],c='b')
plt.plot(aligned_predicted[np.arange(200,250)], c='r')
#plt.plot(real,c='b')
#plt.plot(aligned_predicted, c='r')
plt.scatter(6,reg.predict(real[200:207].reshape(1,-1)))
plt.show()
# Daily
days = 7
start = 323
tick_length = 1
plt.figure(2, figsize=(5,5))
plt.clf()
plt.plot(np.array(data_KOR['new_cases'][np.arange(start,start+days)]),c='b')
predicted = reg.predict(X)[np.arange(start - num_consecutive_data + 1 , start + days-num_consecutive_data + 2)]
print(predicted)
#plt.scatter(days, predicted, c='r')
plt.scatter(days,predicted[-1], c='r')
plt.plot([days-1, days], np.array(data_KOR['new_cases'][np.arange(start+days-1, start+days+1)]),'b--',label='_nolegend_')
plt.scatter(days, np.array(data_KOR['new_cases'][start+days]),c='b',label='_nolegend_')
plt.title('일일 확진자 수')
plt.ylim([600,1200])
plt.legend(['실제값', '예측값'])
plt.xticks(np.arange(0,days+1,tick_length),labels=data_KOR['date'][np.arange(start,start+days+1,tick_length)], rotation=45)
plt.ylabel('일일 확진자 수(명)')
plt.annotate(str(int(np.array(data_KOR['new_cases'][start+days]))),xy=(days,np.array(data_KOR['new_cases'][start+days])-30),c='b')
plt.annotate(str(int(predicted[-1])),xy=(days-40,predicted[-1] - 10),c='r')
plt.show()
print(predicted[-1])
import matplotlib.pyplot as plt
# Load .mat data
BASE_PATH = r'C:\VCF\Lobster\data\20JUN'
datalist = os.listdir(BASE_PATH)
data = loadmat(os.path.join(BASE_PATH, datalist[0]))
print(datalist[1] + ' is loaded \n')
X = data.get('X')
Y = data.get('y')
Y = np.squeeze(Y)
#np.random.shuffle(Y)
Y_label = ['Head Entry', 'Avoidance', 'Escape']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.5, stratify=Y)
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
param_grid = {'C' : np.linspace(1,3,21)}
scores = ['accuracy','precision', 'recall']
## Parameter search
print('Hyper parameter tuning for accuracy')
print()
search = GridSearchCV(SVC(kernel='rbf', gamma='auto'), iid=False, param_grid=param_grid, cv=5, n_jobs=-1, scoring='accuracy')
search.fit(X_train, Y_train)
print("Grid scores on development set:")
print()
means = search.cv_results_['mean_test_score']
stds = search.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, search.cv_results_['params']):
print("%0.3f (+/-%0.03f) for %r"
% (mean, std * 2, params))
print()
print("Detailed classification report:")
print()
print("The model is trained on the full development set.")
print("The scores are computed on the full evaluation set.")
print()
Y_true, Y_pred = Y_test, search.predict(X_test)
print(classification_report(Y_true, Y_pred))
print()
print('Best parameter')
print(search.best_params_)
# Classification Result
confusion_mat = confusion_matrix(Y_true, Y_pred,normalize='true') # row is actual. # column is predicted
cmap = sns.cubehelix_palette(start=.5, rot=-.5, as_cmap=True)
f, ax = plt.subplots(figsize=(11, 9))
sns.heatmap(confusion_mat, cmap=cmap, vmin=0, vmax=1, annot=True, square=True, linewidths=.5, cbar_kws={"shrink": .5}, xticklabels=Y_label, yticklabels=Y_label)
ax.set_xlabel('predicted')
ax.set_ylabel('actual')
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment