Skip to content

Instantly share code, notes, and snippets.

@jurand71
Created November 16, 2022 08:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jurand71/4d1842f280ce1ad7d64bb3dfaa563fe3 to your computer and use it in GitHub Desktop.
Save jurand71/4d1842f280ce1ad7d64bb3dfaa563fe3 to your computer and use it in GitHub Desktop.
class KNearestNeighbor(object):
""" a kNN classifier with L2 distance """
def __init__(self):
pass
def predict_label(self, dists, k=1):
num_test = dists.shape[0]
y_pred = np.zeros(num_test)
for i in range(num_test):
closest_y = []
closest_y = self.y_train[np.argsort(dists[i])][0:k]
y_pred[i] = np.bincount(closest_y).argmax()
return y_pred
def train(self, X, y):
"""
Train the classifier. For k-nearest neighbors this is just
memorizing the training data.
Inputs:
- X: A numpy array of shape (num_train, D) containing the training data
consisting of num_train samples each of dimension D.
- y: A numpy array of shape (N,) containing the training labels, where
y[i] is the label for X[i].
"""
self.X_train = X
self.y_train = y
def predict(self, X, k=1):
"""
Predict labels for test data using this classifier.
Inputs:
- X: A numpy array of shape (num_test, D) containing test data consisting
of num_test samples each of dimension D.
- k: The number of nearest neighbors that vote for the predicted labels.
- num_loops: Determines which implementation to use to compute distances
between training points and testing points.
Returns:
- y: A numpy array of shape (num_test,) containing predicted labels for the
test data, where y[i] is the predicted label for the test point X[i].
"""
dists = self.compute_distances_no_loops(X)
return self.predict_labels(dists, k=k)
def compute_distances_no_loops(self, X):
"""
Compute the distance between each test point in X and each training point
in self.X_train using no explicit loops.
Input / Output: Same as compute_distances_two_loops
"""
num_test = X.shape[0]
num_train = self.X_train.shape[0]
dists = np.zeros((num_test, num_train))
#########################################################################
# let x = [x1,x2], y =[y1,y2], then
# L2(x,y) = sqrt((x1-y1)^2 + (x2-y2)^2)
# L2(x,y) = sqrt(x1^2-2*x1*y1+y1^2 + x2^2-2*x2*y2 + y2^2)
# L2(x,y) = sqrt((x1^2+x2^2) + (y1^2+y2^2)) - 2*(x1*y1+x2*y2)
# L2(x,y) = sqrt(sum(square(x))+ sum(square(y)) - 2(x dot y))
# dists = np.sqrt((X ** 2).sum(axis=1, keepdims=True) + (self.X_train ** 2).sum(axis=1) - 2 * X.dot(self.X_train.T))
dists = np.sqrt(-2 * np.dot(X, self.X_train.T) +
np.sum(np.square(self.X_train), axis=1) +
np.sum(np.square(X), axis=1)[:, np.newaxis])
return dists
def predict_labels(self, dists, k=1):
"""
Given a matrix of distances between test points and training points,
predict a label for each test point.
Inputs:
- dists: A numpy array of shape (num_test, num_train) where dists[i, j]
gives the distance betwen the ith test point and the jth training point.
Returns:
- y: A numpy array of shape (num_test,) containing predicted labels for the
test data, where y[i] is the predicted label for the test point X[i].
"""
num_test = dists.shape[0]
y_pred = np.zeros(num_test)
for i in range(num_test):
# A list of length k storing the labels of the k nearest neighbors to
# the ith test point.
closest_y = []
closest_y = self.y_train[np.argsort(dists[i])][0:k]
closest_y = closest_y.astype(int)
y_pred[i] = np.bincount(closest_y).argmax()
return y_pred
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment