Last active
November 14, 2017 08:59
Revisions
-
tpgmartin revised this gist
Nov 14, 2017 . 1 changed file with 0 additions and 4 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -27,10 +27,6 @@ def __closest(self, row): return self.__vote(sorted_distances) def __vote(self, distances): return Counter(x[0] for x in distances[:self.n_neighbors]).most_common(1)[0][0] # test.py -
tpgmartin revised this gist
Nov 7, 2017 . 1 changed file with 55 additions and 46 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,69 +1,78 @@ # main.py from scipy.spatial import distance from collections import Counter class KNN(): def __init__(self, n_neighbors=1): self.n_neighbors = n_neighbors def fit(self, X_train, y_train): self.X_train = X_train self.y_train = y_train def predict(self, X_test): predictions = [] for row in X_test: prediction = self.__closest(row) predictions.append(prediction) return predictions def __closest(self, row): distances = [] for i in range(len(self.X_train)): dist = distance.euclidean(row, self.X_train[i]) distances.append((self.y_train[i], dist)) sorted_distances = sorted(distances, key=lambda x: x[1]) return self.__vote(sorted_distances) def __vote(self, distances): # labels = [] # for i in range(self.n_neighbors): # labels.append(distances[i][0]) # return Counter(labels).most_common(1)[0][0] return Counter(x[0] for x in distances[:self.n_neighbors]).most_common(1)[0][0] # test.py import pytest from main import KNN X_train = [ [0, 0, 0, 0], [1, 1, 1, 1], [1, 1, 1, 1], [2, 2, 2, 2], [2, 2, 2, 2], [2, 2, 2, 2], [2, 2, 2, 2] ] y_train = [0, 1, 1, 2, 2, 2, 2] @pytest.mark.parametrize(('n_neighbors'),[1,3,5]) def test_KNN_should_be_initialised_with_n_neighbors(n_neighbors): clf = KNN(n_neighbors) clf.fit(X_train, y_train) assert clf.n_neighbors == n_neighbors @pytest.mark.parametrize(('n_neighbors'),[1,3,5]) def test_should_be_able_to_pass_training_data_to_classifier(n_neighbors): clf = KNN(n_neighbors) clf.fit(X_train, y_train) assert clf.X_train == X_train assert clf.y_train == y_train X_test = [[0, 0, 0, 0]] @pytest.mark.parametrize(('n_neighbors', 'y_test'),[(1, [0]),(3, [1]), (7, [2])]) def test_predict_should_return_label_for_test_data(n_neighbors, y_test): clf = KNN(n_neighbors) clf.fit(X_train, y_train) predictions = clf.predict(X_test) assert predictions == y_test -
tpgmartin created this gist
Nov 5, 2017 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,69 @@ import math from scipy.spatial import distance from collections import Counter def euc(a, b): return distance.euclidean(a,b) class ScrappyKNN(): def fit(self, X_train, y_train, k): self.X_train = X_train self.y_train = y_train self.k = k def predict(self, X_test): predictions = [] for row in X_test: # label = self.closest(row) predictions.append(label) return predictions # def closest(self, row): # best_dist = euc(row, self.X_train[0]) # best_index = 0 # for i in range(1, len(self.X_train)): # dist = euc(row, self.X_train[i]) # if dist < best_dist: # best_dist = dist # best_index = i # return self.y_train[best_index] def closest(self, row): # best_dist = euc(row, self.X_train[0]) # best_index = 0 distances = [] for i in range(1, len(self.X_train)): dist = euc(row, self.X_train[i]) distances.append([self.y_train[i], dist]) return sorted(distances, key=lambda x: x[1]) def vote(self, row): distances = self.closest(row) labels = [] for i in range(self.k): labels.append(distances[i][0]) return Counter(labels).most_common(1)[0][0] from sklearn import datasets iris = datasets.load_iris() X = iris.data y = iris.target from sklearn.cross_validation import train_test_split X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.5, random_state=0) # from sklearn.neighbors import KNeighborsClassifier clf = ScrappyKNN() clf.fit(X_train, y_train, 3) predictions = clf.predict(X_test) from sklearn.metrics import accuracy_score print(accuracy_score(y_test, predictions))