View import
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np # linear algebra | |
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv) | |
from sklearn.model_selection import train_test_split | |
from sklearn.preprocessing import StandardScaler | |
from sklearn.neighbors import KNeighborsClassifier | |
from sklearn.neighbors import KNeighborsRegressor | |
from sklearn.metrics import confusion_matrix | |
from sklearn import metrics | |
import matplotlib.pyplot as plt |
View target_input
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
y = data_df["Outcome"].values | |
x = data_df.drop(["Outcome"],axis=1) |
View gist:5f1e1c7e7fe997b15dac6d523f620c66
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.preprocessing import StandardScaler | |
ss = StandardScaler() | |
data_df = ss.fit_transform(data_df) | |
#Divide into training and test data | |
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.3) # 70% training and 30% test |
View loop_k_values
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
train_score = [] | |
test_score = [] | |
k_vals = [] | |
for k in range(1, 201, 5): | |
k_vals.append(k) | |
knn = KNeighborsClassifier(n_neighbors = k) | |
knn.fit(X_train, y_train) | |
y_pred = knn.predict(X_test) | |
tr_score = knn.score(X_train, y_train) |
View plot_k_values
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
plt.figure(figsize=(10,5)) | |
plt.xlabel('Different Values of K') | |
plt.ylabel('Model score') | |
plt.plot(k_vals, train_score, color = 'r', label = "training score") | |
plt.plot(k_vals, test_score, color = 'b', label = 'test score') | |
plt.legend(bbox_to_anchor=(1, 1), | |
bbox_transform=plt.gcf().transFigure) | |
plt.show() |
View knn_score
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
knn = KNeighborsClassifier(n_neighbors = 14) | |
#Fit the model | |
knn.fit(X_train,y_train) | |
#get the score | |
knn.score(X_test,y_test) |
View read_info
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
data_file_path = '../input/heart-disease-uci/heart.csv' | |
data_df = pd.read_csv(data_file_path) | |
#To get information on the number of entries and the datatypes of the features | |
data_df.info() | |
#To check for missing values | |
print(data_df.isnull().sum()) |
View gist:d79869206c52d46885acb0be7f94f88c
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#2. distribution of target variable. | |
sns.countplot(data_df['target']) | |
# Add labels | |
plt.title('Countplot of Target') | |
plt.xlabel('target') | |
plt.ylabel('Patients') | |
plt.show() |
View split_data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
y = data_df["target"].values | |
x = data_df.drop(["target"],axis=1) | |
#Scaling - mandatory for knn | |
from sklearn.preprocessing import StandardScaler | |
ss = StandardScaler() | |
x = ss.fit_transform(x) | |
#SPlitting into train and test | |
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.3) # 70% training and 30% test |
View max_test_score
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## score that comes from the testing set only | |
max_test_score = max(test_score) | |
test_scores_ind = [i for i, v in enumerate(test_score) if v == max_test_score] | |
print('Max test score {} and k = {}'.format(max_test_score * 100,list(map(lambda x: x+1, test_scores_ind)))) |
OlderNewer