jurand71/knn_4.py Secret

## knn_4.py
class KNearestNeighbor(object):
    """ a kNN classifier with L2 distance """

    def __init__(self):
        pass

    def predict_label(self, dists, k=1):
        num_test = dists.shape[0]
        y_pred = np.zeros(num_test)
        for i in range(num_test):
            closest_y = []
            closest_y = self.y_train[np.argsort(dists[i])][0:k]
            y_pred[i] = np.bincount(closest_y).argmax()
        return y_pred

    def train(self, X, y):
        """
        Train the classifier. For k-nearest neighbors this is just
        memorizing the training data.

        Inputs:
        - X: A numpy array of shape (num_train, D) containing the training data
          consisting of num_train samples each of dimension D.
        - y: A numpy array of shape (N,) containing the training labels, where
             y[i] is the label for X[i].
        """
        self.X_train = X
        self.y_train = y

    def predict(self, X, k=1):
        """
        Predict labels for test data using this classifier.

        Inputs:
        - X: A numpy array of shape (num_test, D) containing test data consisting
             of num_test samples each of dimension D.
        - k: The number of nearest neighbors that vote for the predicted labels.
        - num_loops: Determines which implementation to use to compute distances
          between training points and testing points.

        Returns:
        - y: A numpy array of shape (num_test,) containing predicted labels for the
          test data, where y[i] is the predicted label for the test point X[i].
        """
        dists = self.compute_distances_no_loops(X)

        return self.predict_labels(dists, k=k)

    def compute_distances_no_loops(self, X):
        """
        Compute the distance between each test point in X and each training point
        in self.X_train using no explicit loops.

        Input / Output: Same as compute_distances_two_loops
        """
        num_test = X.shape[0]
        num_train = self.X_train.shape[0]
        dists = np.zeros((num_test, num_train))
        #########################################################################
        # let x = [x1,x2], y =[y1,y2], then
        # L2(x,y) = sqrt((x1-y1)^2 + (x2-y2)^2)
        # L2(x,y) = sqrt(x1^2-2*x1*y1+y1^2 + x2^2-2*x2*y2 + y2^2)
        # L2(x,y) = sqrt((x1^2+x2^2) + (y1^2+y2^2)) - 2*(x1*y1+x2*y2)
        # L2(x,y) = sqrt(sum(square(x))+ sum(square(y)) - 2(x dot y))
        # dists = np.sqrt((X ** 2).sum(axis=1, keepdims=True) + (self.X_train ** 2).sum(axis=1) - 2 * X.dot(self.X_train.T))

        dists = np.sqrt(-2 * np.dot(X, self.X_train.T) +
                        np.sum(np.square(self.X_train), axis=1) +
                        np.sum(np.square(X), axis=1)[:, np.newaxis])

        return dists

    def predict_labels(self, dists, k=1):
        """
        Given a matrix of distances between test points and training points,
        predict a label for each test point.

        Inputs:
        - dists: A numpy array of shape (num_test, num_train) where dists[i, j]
          gives the distance betwen the ith test point and the jth training point.

        Returns:
        - y: A numpy array of shape (num_test,) containing predicted labels for the
          test data, where y[i] is the predicted label for the test point X[i].
        """
        num_test = dists.shape[0]
        y_pred = np.zeros(num_test)
        for i in range(num_test):
            # A list of length k storing the labels of the k nearest neighbors to
            # the ith test point.
            closest_y = []
            closest_y = self.y_train[np.argsort(dists[i])][0:k]
            closest_y = closest_y.astype(int)
            y_pred[i] = np.bincount(closest_y).argmax()
        return y_pred
	class KNearestNeighbor(object):
	""" a kNN classifier with L2 distance """

	def __init__(self):
	pass

	def predict_label(self, dists, k=1):
	num_test = dists.shape[0]
	y_pred = np.zeros(num_test)
	for i in range(num_test):
	closest_y = []
	closest_y = self.y_train[np.argsort(dists[i])][0:k]
	y_pred[i] = np.bincount(closest_y).argmax()
	return y_pred

	def train(self, X, y):
	"""
	Train the classifier. For k-nearest neighbors this is just
	memorizing the training data.

	Inputs:
	- X: A numpy array of shape (num_train, D) containing the training data
	consisting of num_train samples each of dimension D.
	- y: A numpy array of shape (N,) containing the training labels, where
	y[i] is the label for X[i].
	"""
	self.X_train = X
	self.y_train = y

	def predict(self, X, k=1):
	"""
	Predict labels for test data using this classifier.

	Inputs:
	- X: A numpy array of shape (num_test, D) containing test data consisting
	of num_test samples each of dimension D.
	- k: The number of nearest neighbors that vote for the predicted labels.
	- num_loops: Determines which implementation to use to compute distances
	between training points and testing points.

	Returns:
	- y: A numpy array of shape (num_test,) containing predicted labels for the
	test data, where y[i] is the predicted label for the test point X[i].
	"""
	dists = self.compute_distances_no_loops(X)

	return self.predict_labels(dists, k=k)

	def compute_distances_no_loops(self, X):
	"""
	Compute the distance between each test point in X and each training point
	in self.X_train using no explicit loops.

	Input / Output: Same as compute_distances_two_loops
	"""
	num_test = X.shape[0]
	num_train = self.X_train.shape[0]
	dists = np.zeros((num_test, num_train))
	#########################################################################
	# let x = [x1,x2], y =[y1,y2], then
	# L2(x,y) = sqrt((x1-y1)^2 + (x2-y2)^2)
	# L2(x,y) = sqrt(x1^2-2x1y1+y1^2 + x2^2-2x2y2 + y2^2)
	# L2(x,y) = sqrt((x1^2+x2^2) + (y1^2+y2^2)) - 2(x1y1+x2*y2)
	# L2(x,y) = sqrt(sum(square(x))+ sum(square(y)) - 2(x dot y))
	# dists = np.sqrt((X 2).sum(axis=1, keepdims=True) + (self.X_train 2).sum(axis=1) - 2 * X.dot(self.X_train.T))

	dists = np.sqrt(-2 * np.dot(X, self.X_train.T) +
	np.sum(np.square(self.X_train), axis=1) +
	np.sum(np.square(X), axis=1)[:, np.newaxis])

	return dists

	def predict_labels(self, dists, k=1):
	"""
	Given a matrix of distances between test points and training points,
	predict a label for each test point.

	Inputs:
	- dists: A numpy array of shape (num_test, num_train) where dists[i, j]
	gives the distance betwen the ith test point and the jth training point.

	Returns:
	- y: A numpy array of shape (num_test,) containing predicted labels for the
	test data, where y[i] is the predicted label for the test point X[i].
	"""
	num_test = dists.shape[0]
	y_pred = np.zeros(num_test)
	for i in range(num_test):
	# A list of length k storing the labels of the k nearest neighbors to
	# the ith test point.
	closest_y = []
	closest_y = self.y_train[np.argsort(dists[i])][0:k]
	closest_y = closest_y.astype(int)
	y_pred[i] = np.bincount(closest_y).argmax()
	return y_pred