Skip to content

Instantly share code, notes, and snippets.

@ivan-krukov
Last active August 29, 2015 14:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ivan-krukov/41928fda0cada44469de to your computer and use it in GitHub Desktop.
Save ivan-krukov/41928fda0cada44469de to your computer and use it in GitHub Desktop.
A simple kNN implementation
1 1 0
1 2 0
1 3 0
2 1 0
2 2 0
2 3 0
4 1 1
4 2 1
4 3 1
5 1 1
5 2 1
5 3 1
import csv
import math
from collections import defaultdict
class point:
"""Representation of a labeled point in N-dimensional space"""
vector = []
label = None
def __init__(self,vector,label):
"""A point is constructed from the data and a label"""
self.vector = list(map(float,vector))
self.label = int(label)
@classmethod
def fromlist(self,l):
"""Construct a point from a list, label being the last element"""
return self(l[:-1],l[-1])
def euclid(a,b):
"""Euclidian distance in N dimensions"""
s = 0
for i in zip(a,b):
s += (i[0]-i[1])**2
return math.sqrt(s)
def vote(measure):
"""Find the most common element in a list"""
count = defaultdict(int)
majority = 0
for m in majority:
count[m] += 1
if count[m] > majority:
majority = m
return majority
def knn(data,point,k,distance_f=euclid):
"""K-nearest-neighbor classifier"""
distances = []
for d in data:
p = distance_f(d.vector,point)
distances.append((p,d.label))
distances.sort() #sort by distance, being the first element
nearest = distances[:k]
return vote([n[1] for n in nearest]) #only look at the labels
if __name__=="__main__":
data = []
with open('data.csv') as csvfile:
reader = csv.reader(csvfile)
for row in reader:
d = point.fromlist(row)
data.append(d)
"""Classify a new point"""
p = [3,0]
l = knn(data,p,3)
print(l)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment