Skip to content

Instantly share code, notes, and snippets.

Last active June 21, 2021 06:09
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save purva91/eb395920be650c018d513dcf35d9db99 to your computer and use it in GitHub Desktop.
y_pred_proba = knn.predict_proba(X_test)[:,1]
fpr, tpr, thresholds = roc_curve(y_test, y_pred_proba)
plt.figure(figsize = (10,8))
plt.plot([0, 1], [0, 1], 'k--')
plt.plot(fpr, tpr, label='Knn')
plt.title('Knn(n_neighbors = 8) ROC curve')
roc_auc_score(y_test, y_pred_proba)
# calculate precision-recall AUC
auc_prc = auc(recall, precision)
precision, recall, thresholds = precision_recall_curve(y_test, y_pred_proba)
plt.figure(figsize = (10,8))
plt.plot([0, 1], [0.5, 0.5],'k--')
plt.plot(recall, precision, label = 'Knn')
plt.title('Knn(n_neighbors = 8) PRC curve')
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import auc
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
data_file_path = '../input/heart-disease-uci/heart.csv'
data_df = pd.read_csv(data_file_path)
#To get information on the number of entries and the datatypes of the features
#2. distribution of target variable.
# Add labels
plt.title('Countplot of Target')
y = data_df["target"].values
x = data_df.drop(["target"], axis = 1)
#Scaling - mandatory for knn
from sklearn.preprocessing import StandardScaler
ss = StandardScaler()
x = ss.fit_transform(x)
#SPlitting into train and test
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.3) # 70% training and 30% test
train_score = []
test_score = []
k_vals = []
for k in range(1, 21):
knn = KNeighborsClassifier(n_neighbors = k), y_train)
tr_score = knn.score(X_train, y_train)
te_score = knn.score(X_test, y_test)
## score that comes from the testing set only
max_test_score = max(test_score)
test_scores_ind = [i for i, v in enumerate(test_score) if v == max_test_score]
print('Max test score {} and k = {}'.format(max_test_score * 100, list(map(lambda x: x + 1, test_scores_ind))))
#Setup a knn classifier with k neighbors
knn = KNeighborsClassifier(3), y_train)
knn.score(X_test, y_test)
y_pred = knn.predict(X_test)
pd.crosstab(y_test, y_pred, rownames = ['Actual'], colnames =['Predicted'], margins = True)
print(classification_report(y_test, y_pred))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment