Skip to content

Instantly share code, notes, and snippets.

@Hsankesara
Created January 15, 2019 09:52
Show Gist options
  • Save Hsankesara/34174abb15c1426bad2985c67cae6f9e to your computer and use it in GitHub Desktop.
Save Hsankesara/34174abb15c1426bad2985c67cae6f9e to your computer and use it in GitHub Desktop.
K-Means clustering and KNN classifier from scratch in Python3
import math
def euclidean_dist(p1, p2):
return math.sqrt(math.pow(p1[0] - p2[0], 2) + math.pow(p1[1] - p2[1], 2))
def allocate_class(x, means):
classes = {}
for clas in means:
classes[clas] = []
for idx, point in enumerate(x):
min_dist = None
min_cls = None
for mean in means:
dist = euclidean_dist(means[mean], point)
if min_dist is None or min_dist > dist:
min_dist = dist
min_cls = mean
classes[min_cls].append(idx)
return classes
def compare_classes(class1, class2):
for i in class1:
m = max(len(class1[i]), len(class2[i]))
if len(set(class1[i]).intersection(set(class2[i]))) != m:
return True
return False
def get_mean(x, indx):
m = len(indx)
x_mean = 0
y_mean = 0
for i in indx:
x_mean += x[i][0]
y_mean += x[i][1]
x_mean /= m
y_mean /= m
return (x_mean, y_mean)
def kmeans(x, k, classes):
means = {}
prev_classes = {}
for i in classes:
prev_classes[i] = []
while(compare_classes(classes, prev_classes)):
for i in classes:
means[i] = get_mean(x, classes[i])
print('Means', means)
prev_classes = classes
classes = allocate_class(x, means)
return classes
def main():
points = [(1,1), (2,1), (1,2), (-1, -1), (-2, 1), (-2, -1), (-2, 2)]
k = 3
classes = {
'1' : [0,1,2],
'2' : [3],
'3' : [4,5,6]
}
classes = kmeans(points, k, classes)
print(classes)
if __name__ == "__main__":
main()
pass
import math
import operator
def euclidean_dist(p1, p2):
return math.sqrt(math.pow(p1[0] - p2[0], 2) + math.pow(p1[1] - p2[1], 2))
def knn(x, k, classes, test):
results = []
for t in test:
dist = [None] * len(x)
for i in range(len(x)):
dist[i] = euclidean_dist(x[i], t)
indexes = [i[0] for i in sorted(enumerate(dist), key=lambda x:x[1], reverse=True)]
k_indx = indexes[:k]
cls_count = {}
for p in classes:
cls_count[p] = 0
for idx in k_indx:
for j in classes:
if idx in classes[j]:
cls_count[j] += 1
sorted_x = sorted(cls_count.items(), key=operator.itemgetter(1) )
results.append(sorted_x[0][0])
return results
def main():
train = [(-2, -1), (-2, 1), (-2, 2), (-1, -1), (1, -1), (1, 1), (1, 2), (2, 1)]
k = 3
classes = {
'1' : [0, 3, 4],
'2' : [1, 2],
'3' : [5, 6, 7]
}
test = [(-1,1), (0, 0)]
out = knn(train, k, classes, test)
print(out)
if __name__ == "__main__":
main()
pass
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment