Skip to content

Instantly share code, notes, and snippets.

@puraminy
Last active December 22, 2019 21:00
Show Gist options
  • Save puraminy/f7d1755fc9af7487cd82c4b4cdf385f2 to your computer and use it in GitHub Desktop.
Save puraminy/f7d1755fc9af7487cd82c4b4cdf385f2 to your computer and use it in GitHub Desktop.
classifiers (KNN, Bayes, Parzen)
import numpy as np
from sklearn import preprocessing
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import itertools
import seaborn as sn
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, f1_score
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neighbors import RadiusNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
def plot_heat_map(ax, X, Y, Z, xlabel, ylabel, format='d', title='Heat Map'):
sn.set(font_scale=1.4) # for label size
sn.heatmap(Z, annot=True,fmt=format, annot_kws={"size": 12}, cmap="YlGnBu",
xticklabels=X, yticklabels=Y, ax=ax) # font size
ax.set_title(title)
ax.set_xlabel(xlabel)
ax.set_ylabel(ylabel)
def plot_confusion_matrix(ax, cm, class_names, normalize= False, title='Confusion Matrix'):
format = 'd'
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
format = '.2f'
plot_heat_map(ax, class_names, class_names, cm, 'Predicted', 'True Classes', format, title)
def preprocess(data):
# in this part we scale data between [0.1]
min_max_scaler = preprocessing.MinMaxScaler()
X_train_minmax = min_max_scaler.fit_transform(data)
return X_train_minmax
def KNN():
fig, axes = plt.subplots(4,2, figsize=(20,40))
row = 0
for K in [1, 3, 5, 10]:
classifier = KNeighborsClassifier(n_neighbors=K)
start = time.perf_counter()
classifier.fit(train, train_label)
end = time.perf_counter()
start_test = time.perf_counter()
predicted = classifier.predict(test)
end_test = time.perf_counter()
acc = accuracy_score(test_label, predicted).round(3)
train_time = np.round(end - start, 4)
test_time = np.round(end_test - start_test, 4)
confusion = confusion_matrix(test_label, predicted)
confidence = confusion_matrix(test_label, predicted)
plot_confusion_matrix(axes[row, 0], confusion, range(10),
False, title=f"KNN classifier Confusion K={K}, acc={acc}, train={train_time}, test={test_time}")
plot_confusion_matrix(axes[row, 1], confidence, range(10),
True, title=f"KNN classifier Confidence K={K}, acc={acc}")
row+=1
def Parzen():
fig, axes = plt.subplots(5,2, figsize=(20,50))
row = 0
for K in [0.5,1,1.5,2,3]:
classifier = RadiusNeighborsClassifier(K, weights='uniform',outlier_label=5)
start = time.perf_counter()
classifier.fit(train, train_label)
end = time.perf_counter()
start_test = time.perf_counter()
predicted = classifier.predict(test)
end_test = time.perf_counter()
acc = accuracy_score(test_label, predicted).round(3)
train_time = np.round(end - start, 4)
test_time = np.round(end_test - start_test, 4)
confusion = confusion_matrix(test_label, predicted)
confidence = confusion_matrix(test_label, predicted)
plot_confusion_matrix(axes[row, 0], confusion, range(10),
False, title=f"Parzen classifier Confusion K={K}, acc={acc}, train={train_time}, test={test_time}")
plot_confusion_matrix(axes[row, 1], confidence, range(10),
True, title=f"Parzen classifier Confidence, K={K}, acc={acc}")
row+=1
def Guassian():
classifier =GaussianNB()
start = time.perf_counter()
classifier.fit(train, train_label)
end = time.perf_counter()
start_test = time.perf_counter()
predicted = classifier.predict(test)
end_test = time.perf_counter()
acc = accuracy_score(test_label, predicted).round(3)
train_time = np.round(end - start, 4)
test_time = np.round(end_test - start_test, 4)
confusion = confusion_matrix(test_label, predicted)
confidence = confusion_matrix(test_label, predicted)
fig, (ax1, ax2) = plt.subplots(1,2, figsize=(20, 10))
plot_confusion_matrix(ax1, confusion, range(10),
False, title=f"Gaussian classifier Confusion acc={acc}, train={train_time}, test={test_time}")
plot_confusion_matrix(ax2, confidence, range(10),
True, title=f"Gausssian classifier Cofidence acc={acc}")
import pandas as pd
if __name__ == '__main__':
data = pd.read_csv("pendigits.tra")
train = data.iloc[:,0:16]
# print(train)
train_label = data.iloc[:,16]
# print(train_label)
data = pd.read_csv("pendigits.tra")
test = data.iloc[:,0:16]
test_label = data.iloc[:,16]
train=preprocess(train)
test=preprocess(test)
for method in [KNN, Parzen, Guassian]:
print("method=",method)
method()
plt.show()
plt.savefig("results.png")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment