Skip to content

Instantly share code, notes, and snippets.

Created Sep 5, 2020
What would you like to do?
def knn_predict(X_train, X_test, y_train, y_test, k, p):
# Counter to help with label voting
from collections import Counter
# Make predictions on the test data
# Need output of 1 prediction per test data point
y_hat_test = []
for test_point in X_test:
distances = []
for train_point in X_train:
distance = minkowski_distance(test_point, train_point, p=p)
# Store distances in a dataframe
df_dists = pd.DataFrame(data=distances, columns=['dist'],
# Sort distances, and only consider the k closest points
df_nn = df_dists.sort_values(by=['dist'], axis=0)[:k]
# Create counter object to track the labels of k closest neighbors
counter = Counter(y_train[df_nn.index])
# Get most common label of all the nearest neighbors
prediction = counter.most_common()[0][0]
# Append prediction to output list
return y_hat_test
# Make predictions on test dataset
y_hat_test = knn_predict(X_train, X_test, y_train, y_test, k=5, p=1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment