Last active
February 1, 2016 00:44
-
-
Save jogonba2/04a7f8cbe1237fe47090 to your computer and use it in GitHub Desktop.
K nearest neighbours
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
# | |
# KNN.py | |
# | |
# Copyright 2015 Overxflow | |
# | |
""" Vector difference """ | |
def v_difference(x,y): return [abs(x[i]-y[i]) for i in xrange(len(x))] | |
""" Vector difference for weighted euclidean distance """ | |
def v_pond_difference(x,y,w): return [w*abs(x[i]-y[i]) for i in xrange(len(x))] | |
""" Returns index of max first element in list inside another list """ | |
def max_distance(l): | |
i,m,mp = 0,float("-inf"),0 | |
for sl in l: | |
if sl[0]>m: | |
m = sl[0] | |
mp = i | |
i += 1 | |
return (mp,m) | |
""" Defines p-distance family """ | |
def p_distance_function(p,x,y,w): | |
if p==0: return max(map(abs,v_difference(x,y))) | |
else: | |
if w<=0: return (sum(v_difference(x,y)))**(1.0/p) # If w<=0, distance function isn't a metric. | |
else: return (sum(v_difference(x,y,w)))**(1.0/p) | |
""" Wilson prototype edition algorithm """ | |
def wilson(prototypes,p,k,w): | |
error,new_prototypes = 1,[] | |
while error: | |
error = 0 | |
for i in xrange(len(prototypes)): | |
prototype = prototypes[i] | |
c = prototype[1] | |
cp = knn(prototypes,p,k,[prototype[0]],w) | |
if cp[0][1]==c: new_prototypes.append(prototype) | |
else: error=1; prototypes.remove(prototype); break; # Slowly method :( # | |
return prototypes | |
""" Condensed nearest neighbours algorithm """ | |
def cnn(prototypes,p,k,w): | |
S,G = [prototypes[0]],[] | |
# First phase # | |
for i in xrange(1,len(prototypes)): | |
prototype = prototypes[i] | |
c = prototype[1] | |
cp = knn(S,p,k,[prototype[0]],w) | |
if cp[0][1]!=c: S.append(prototype) | |
else: G.append(prototype) | |
# Second phase # | |
error = 1 | |
while G!=[] and error==1: | |
error = 0 | |
for prototype in G: | |
cp = knn(S,p,k,[prototype[0]],w) | |
if cp[0][1]!=prototype[1]: S.append(prototype); G.remove(prototype); error = 1; break; | |
return S | |
""" K nearest neighbours algorithm """ | |
def knn(prototypes,p,k,test_samples,w): | |
classes = [] | |
for y in test_samples: | |
k_nearest,l= [],0 | |
for prototype in prototypes: | |
c = prototype[1] | |
prototype = prototype[0] | |
distance = p_distance_function(p,prototype,y,w) | |
if l<k: k_nearest.append([distance,c]) | |
else: | |
(pos_max,max_dist) = max_distance(k_nearest) | |
if distance<max_dist: | |
k_nearest[pos_max] = [distance,c] | |
l += 1 | |
h = {} | |
for nearest in k_nearest: | |
if nearest[1] not in h: h[nearest[1]] = 1 | |
else: h[nearest[1]] += 1 | |
classes.append([y,max(h,key=h.get)]) | |
return classes | |
""" Core k nearest neighbours """ | |
def core(prototypes,p,k,test_samples,w,wil,cn): | |
# Make Wilson edition # | |
if wil==1: prototypes = wilson(prototypes,p,k,w) | |
# Make CNN # | |
if cn ==1: prototypes = cnn(prototypes,p,k,w) | |
# Classificate KNN # | |
return knn(prototypes,p,k,test_samples,w) | |
""" Stringify output """ | |
def __str__(classified): | |
for sample in classified: print "Class of ",sample[0]," -> ",sample[1] | |
if __name__ == "__main__": | |
prototypes = [([1,2],0),([1,3],0),([2,2],0),([2,3],0),([5,2],1),([5,3],1),([5,4],1),([4,3],1),([4,4],1)] # Train samples # | |
test_samples = [[1,3],[5,2],[25,25],[0,0]] # Test samples # | |
__str__(core(prototypes,2,1,test_samples,0,1,1)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment