Skip to content

Instantly share code, notes, and snippets.

@pearcemc
Created October 14, 2010 18:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pearcemc/626777 to your computer and use it in GitHub Desktop.
Save pearcemc/626777 to your computer and use it in GitHub Desktop.
"""
Quick and dirty experimental implementation of k-nearest neighbours technique
"""
from scipy import randn
import scipy.stats as stats
import random
from kdata import *
class KNearest:
"""k-nearest neighbour inferer"""
def __init__(self, ds):
#set the dataset
self.ds = ds
def predict(self, p1, k=1):
"""Given a test point p1, return the modal class of its knearest neighbours"""
distances = []
#calculate the distance between the test point and known data points.
for i, clas in enumerate( self.ds.classes ):
for p2 in clas.data:
dist = self._calc_distance(p1, p2)
distances.append( ( dist, i, p2 ) )
#rank the distances
distances = sorted( distances )
#the following is a bit scruffy, I should really be using mean
return int( stats.mode( [dist[1] for dist in distances[:k]] )[0] )
def _calc_distance(self, p1, p2):
""" Calculate the Euclidean distance between the two points """
return ( sum( [(p1[i] - p2[i])**2 for i in range( len(p1) )] ) )**0.5
class TestPredictor:
"""Iterate a KNearest predictor and return its success rate."""
def __init__(self, predictor, times=1000):
#set the KNearest predictor
self.predictor = predictor
#set number of iterations
self.times = times
def test(self, k=1):
results = []
#iteration
for i in range(self.times):
#pick a random class
ac = random.randint(0, len(self.predictor.ds.classes)-1)
#use it to generate a point
tp = self.predictor.ds.classes[ac].generate()
#test the predictor on it
pc = self.predictor.predict(tp, k=k)
#if it got it right it gets a cookie
if pc == ac:
results.append(1)
else:
results.append(0)
#return the mean result
return float(sum(results))/float(len(results))
if __name__=="__main__":
c = PDimClass([(3,2), (7,1), (2,1)])
d = PDimClass([(2,1), (3,1), (4,1)])
e = PDimClass([(0,1), (0,1), (0,2)])
ds = MultiClassDS([c, d, e], length=120)
k = KNearest(ds)
t = TestPredictor(k, times=100)
print t.test(k=3)
vis3d(ds)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment