Skip to content

Instantly share code, notes, and snippets.

@leonaburime
Last active August 29, 2015 14:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save leonaburime/ef94c529cc9b7aa2de89 to your computer and use it in GitHub Desktop.
Save leonaburime/ef94c529cc9b7aa2de89 to your computer and use it in GitHub Desktop.
k-Nearest Neighbors Algorithm
from __future__ import division
from pprint import pprint
import numpy as np
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
#References - http://saravananthirumuruganathan.wordpress.com/2010/05/17/a-detailed-introduction-to-k-nearest-neighbor-knn-algorithm/
#http://www.saedsayad.com/k_nearest_neighbors.htm
input = [
#Age #Loan
[25, 40000],
[35, 60000],
[45, 80000],
[20, 20000],
[35, 120000],
[52, 18000],
[23, 95000],
[40, 62000],
[60, 100000],
[48, 220000],
[33, 150000],
]
#Default
output = [
'N',
'N',
'N',
'N',
'N',
'N',
'Y',
'Y',
'Y',
'Y',
'Y'
]
predict = [48, 142000]
class kNN:
k = 2
def __init__(self):
self.k = 2
#SKLearn Module's Implementation of k-Nearest Neighbors
def sklearnNearestNeighbors(self, input, output, norm_predict, predict, k=k):
self.skNeighbors = KNeighborsClassifier(n_neighbors=k)
self.skNeighbors.fit(input, output )
print "\nSKLearn: Minimum distances with their labels"
min_values = self.skNeighbors.kneighbors( norm_predict )
pprint ( zip( min_values[0][0].tolist() , list( output[min_values[1].tolist()] ) ) )
prediction_list = self.skNeighbors.predict(norm_predict)
print "\nSKLearn Prediction of %s with k = %d is %s \n" % ( predict, k, prediction_list[0] )
def solveKNN(self, input=input, output=output, predict=predict, k=k):
#Lets numpify our arrays and make them floats to give them access to more functions
np_input, np_predict, np_output = np.array(input).astype(float), np.array(predict).astype(float), np.array(output)
#Lets get the max and min values of each columns
max_value, min_value = np_input.max(axis=0), np_input.min(axis=0)
#Now we can standardize our arrays
norm_input = (np_input - min_value)/(max_value - min_value)
norm_predict = (predict - min_value)/(max_value - min_value)
#print np.around( norm_predict, decimals = 3)
#Create a list of Euclidean distances via np.linalg.norm using our normalized prediction vs our normalized input
distances = np.array( [np.linalg.norm( norm_predict-v ) for v in norm_input] )
pprint ( list( distances ))
#Lets find the indices of the minimum values as well as the minimum values
min_indices , min_values = distances.argsort()[:k], sorted( distances )[:k]
#Get the k nearest values
nearest = list( np_output[min_indices] )
print "\nMinimum distances with their labels"
print "\nPrediction of %s with k=%d is %s " % ( predict, k, max(nearest, key=nearest.count) )
self.sklearnNearestNeighbors( norm_input, np_output, norm_predict, predict, k)
#Comments for tutorials
"""
Steps to run k-Nearest Neighbor
1. import kNN
2. knn = kNN.kNN()
3. knn.solveKNN()
Normalized input should like
input = [
#Age #Loan
[ 0.125 0.109]
[ 0.375 0.208]
[ 0.625 0.307]
[ 0. 0.01 ]
[ 0.375 0.505]
[ 0.8 0. ]
[ 0.075 0.381]
[ 0.5 0.218]
[ 1. 0.406]
[ 0.7 1. ]
[ 0.325 0.653]
]
The result set you get should look like this using predict = [48, 142000]
distances = [
(0.3159611532261599, 'N'),
(0.3427631575852688, 'N'),
(0.36500829549319175, 'Y'),
(0.37708549916305584, 'Y'),
(0.3861386138613863, 'Y'),
(0.44367484479639735, 'Y'),
(0.5200122747640707, 'N'),
(0.6219532147935409, 'N'),
(0.6669046778427162, 'Y'),
(0.7652450603896376, 'N'),
(0.9245367272230537, 'N')
]
"""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment