Skip to content

Instantly share code, notes, and snippets.

@purva91
Last active June 21, 2021 06:09
Show Gist options
  • Save purva91/eb395920be650c018d513dcf35d9db99 to your computer and use it in GitHub Desktop.
Save purva91/eb395920be650c018d513dcf35d9db99 to your computer and use it in GitHub Desktop.
y_pred_proba = knn.predict_proba(X_test)[:,1]
fpr, tpr, thresholds = roc_curve(y_test, y_pred_proba)
plt.figure(figsize = (10,8))
plt.plot([0, 1], [0, 1], 'k--')
plt.plot(fpr, tpr, label='Knn')
plt.xlabel('FPR')
plt.ylabel('TPR')
plt.title('Knn(n_neighbors = 8) ROC curve')
plt.show()
roc_auc_score(y_test, y_pred_proba)
# calculate precision-recall AUC
auc_prc = auc(recall, precision)
print(auc_prc)
precision, recall, thresholds = precision_recall_curve(y_test, y_pred_proba)
plt.figure(figsize = (10,8))
plt.plot([0, 1], [0.5, 0.5],'k--')
plt.plot(recall, precision, label = 'Knn')
plt.xlabel('recall')
plt.ylabel('precision')
plt.title('Knn(n_neighbors = 8) PRC curve')
plt.show()
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import auc
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
data_file_path = '../input/heart-disease-uci/heart.csv'
data_df = pd.read_csv(data_file_path)
#To get information on the number of entries and the datatypes of the features
data_df.head()
#2. distribution of target variable.
sns.countplot(data_df['target'])
# Add labels
plt.title('Countplot of Target')
plt.xlabel('target')
plt.ylabel('Patients')
plt.show()
y = data_df["target"].values
x = data_df.drop(["target"], axis = 1)
#Scaling - mandatory for knn
from sklearn.preprocessing import StandardScaler
ss = StandardScaler()
x = ss.fit_transform(x)
#SPlitting into train and test
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.3) # 70% training and 30% test
train_score = []
test_score = []
k_vals = []
for k in range(1, 21):
k_vals.append(k)
knn = KNeighborsClassifier(n_neighbors = k)
knn.fit(X_train, y_train)
tr_score = knn.score(X_train, y_train)
train_score.append(tr_score)
te_score = knn.score(X_test, y_test)
test_score.append(te_score)
## score that comes from the testing set only
max_test_score = max(test_score)
test_scores_ind = [i for i, v in enumerate(test_score) if v == max_test_score]
print('Max test score {} and k = {}'.format(max_test_score * 100, list(map(lambda x: x + 1, test_scores_ind))))
#Setup a knn classifier with k neighbors
knn = KNeighborsClassifier(3)
knn.fit(X_train, y_train)
knn.score(X_test, y_test)
y_pred = knn.predict(X_test)
confusion_matrix(y_test,y_pred)
pd.crosstab(y_test, y_pred, rownames = ['Actual'], colnames =['Predicted'], margins = True)
print(classification_report(y_test, y_pred))
data_df.isnull().sum()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment