Skip to content

Instantly share code, notes, and snippets.

@shedoesdatascience
Created April 9, 2021 00:45
Show Gist options
  • Save shedoesdatascience/f22b56c43d00e142b372aa8ab59291c9 to your computer and use it in GitHub Desktop.
Save shedoesdatascience/f22b56c43d00e142b372aa8ab59291c9 to your computer and use it in GitHub Desktop.
k-nearestneighbours for iris dataset
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import confusion_matrix
from sklearn import metrics
import matplotlib.pyplot as plt
%matplotlib inline
iris = datasets.load_iris()
df_iris = pd.DataFrame(iris.data,columns=iris.feature_names)
df_iris['target'] = pd.Series(iris.target)
print(df_iris.head())
y = df_iris["target"].values
x = df_iris.drop(["target"],axis=1)
from sklearn.preprocessing import StandardScaler
ss = StandardScaler()
df_iris = ss.fit_transform(df_iris)
# Divide into training and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3) # 70% training and 30% test
train_score = []
test_score = []
k_vals = []
for k in range(1, 21):
k_vals.append(k)
knn = KNeighborsClassifier(n_neighbors = k)
knn.fit(X_train, y_train)
tr_score = knn.score(X_train, y_train)
train_score.append(tr_score)
te_score = knn.score(X_test, y_test)
test_score.append(te_score)
plt.figure(figsize=(10,5))
plt.xlabel('Different Values of K')
plt.ylabel('Model score')
plt.plot(k_vals, train_score, color = 'r', label = "training score")
plt.plot(k_vals, test_score, color = 'b', label = 'test score')
plt.legend(bbox_to_anchor=(1, 1),
bbox_transform=plt.gcf().transFigure)
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment