-
-
Save jurand71/4d1842f280ce1ad7d64bb3dfaa563fe3 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class KNearestNeighbor(object): | |
""" a kNN classifier with L2 distance """ | |
def __init__(self): | |
pass | |
def predict_label(self, dists, k=1): | |
num_test = dists.shape[0] | |
y_pred = np.zeros(num_test) | |
for i in range(num_test): | |
closest_y = [] | |
closest_y = self.y_train[np.argsort(dists[i])][0:k] | |
y_pred[i] = np.bincount(closest_y).argmax() | |
return y_pred | |
def train(self, X, y): | |
""" | |
Train the classifier. For k-nearest neighbors this is just | |
memorizing the training data. | |
Inputs: | |
- X: A numpy array of shape (num_train, D) containing the training data | |
consisting of num_train samples each of dimension D. | |
- y: A numpy array of shape (N,) containing the training labels, where | |
y[i] is the label for X[i]. | |
""" | |
self.X_train = X | |
self.y_train = y | |
def predict(self, X, k=1): | |
""" | |
Predict labels for test data using this classifier. | |
Inputs: | |
- X: A numpy array of shape (num_test, D) containing test data consisting | |
of num_test samples each of dimension D. | |
- k: The number of nearest neighbors that vote for the predicted labels. | |
- num_loops: Determines which implementation to use to compute distances | |
between training points and testing points. | |
Returns: | |
- y: A numpy array of shape (num_test,) containing predicted labels for the | |
test data, where y[i] is the predicted label for the test point X[i]. | |
""" | |
dists = self.compute_distances_no_loops(X) | |
return self.predict_labels(dists, k=k) | |
def compute_distances_no_loops(self, X): | |
""" | |
Compute the distance between each test point in X and each training point | |
in self.X_train using no explicit loops. | |
Input / Output: Same as compute_distances_two_loops | |
""" | |
num_test = X.shape[0] | |
num_train = self.X_train.shape[0] | |
dists = np.zeros((num_test, num_train)) | |
######################################################################### | |
# let x = [x1,x2], y =[y1,y2], then | |
# L2(x,y) = sqrt((x1-y1)^2 + (x2-y2)^2) | |
# L2(x,y) = sqrt(x1^2-2*x1*y1+y1^2 + x2^2-2*x2*y2 + y2^2) | |
# L2(x,y) = sqrt((x1^2+x2^2) + (y1^2+y2^2)) - 2*(x1*y1+x2*y2) | |
# L2(x,y) = sqrt(sum(square(x))+ sum(square(y)) - 2(x dot y)) | |
# dists = np.sqrt((X ** 2).sum(axis=1, keepdims=True) + (self.X_train ** 2).sum(axis=1) - 2 * X.dot(self.X_train.T)) | |
dists = np.sqrt(-2 * np.dot(X, self.X_train.T) + | |
np.sum(np.square(self.X_train), axis=1) + | |
np.sum(np.square(X), axis=1)[:, np.newaxis]) | |
return dists | |
def predict_labels(self, dists, k=1): | |
""" | |
Given a matrix of distances between test points and training points, | |
predict a label for each test point. | |
Inputs: | |
- dists: A numpy array of shape (num_test, num_train) where dists[i, j] | |
gives the distance betwen the ith test point and the jth training point. | |
Returns: | |
- y: A numpy array of shape (num_test,) containing predicted labels for the | |
test data, where y[i] is the predicted label for the test point X[i]. | |
""" | |
num_test = dists.shape[0] | |
y_pred = np.zeros(num_test) | |
for i in range(num_test): | |
# A list of length k storing the labels of the k nearest neighbors to | |
# the ith test point. | |
closest_y = [] | |
closest_y = self.y_train[np.argsort(dists[i])][0:k] | |
closest_y = closest_y.astype(int) | |
y_pred[i] = np.bincount(closest_y).argmax() | |
return y_pred |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment