Last active
December 6, 2017 11:13
-
-
Save tuhinsherlock/d3cbfc3b911c451405bd6c3b0f853e39 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## K NEAREST NEIGHBOUR MACHINE LEARNING ALGORITHM ON PYTHON ## | |
def train(X_train, y_train): | |
# do nothing | |
return | |
def predict(X_train, y_train, x_test, k): | |
# create list for distances and targets | |
distances = [] | |
targets = [] | |
for i in range(len(X_train)): | |
# first we compute the euclidean distance | |
distance = np.sqrt(np.sum(np.square(x_test - X_train[i, :]))) | |
# add it to list of distances | |
distances.append([distance, i]) | |
# sort the list | |
distances = sorted(distances) | |
# make a list of the k neighbors' targets | |
for i in range(k): | |
index = distances[i][1] | |
targets.append(y_train[index]) | |
# return most common target | |
return Counter(targets).most_common(1)[0][0] | |
def kNearestNeighbor(X_train, y_train, X_test, predictions, k): | |
# check if k larger than n | |
if k > len(X_train): | |
raise ValueError | |
# train on the input data | |
train(X_train, y_train) | |
# predict for each testing observation | |
for i in range(len(X_test)): | |
predictions.append(predict(X_train, y_train, X_test[i, :], k)) | |
# making our predictions | |
predictions = [] | |
try: | |
kNearestNeighbor(X_train, y_train, X_test, predictions, 7) | |
predictions = np.asarray(predictions) | |
# evaluating accuracy | |
accuracy = accuracy_score(y_test, predictions) * 100 | |
print('\nThe accuracy of OUR classifier is %d%%' % accuracy) | |
except ValueError: | |
print('Can\'t have more neighbors than training samples!!') | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment