Created
January 15, 2019 09:52
-
-
Save Hsankesara/34174abb15c1426bad2985c67cae6f9e to your computer and use it in GitHub Desktop.
K-Means clustering and KNN classifier from scratch in Python3
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import math | |
def euclidean_dist(p1, p2): | |
return math.sqrt(math.pow(p1[0] - p2[0], 2) + math.pow(p1[1] - p2[1], 2)) | |
def allocate_class(x, means): | |
classes = {} | |
for clas in means: | |
classes[clas] = [] | |
for idx, point in enumerate(x): | |
min_dist = None | |
min_cls = None | |
for mean in means: | |
dist = euclidean_dist(means[mean], point) | |
if min_dist is None or min_dist > dist: | |
min_dist = dist | |
min_cls = mean | |
classes[min_cls].append(idx) | |
return classes | |
def compare_classes(class1, class2): | |
for i in class1: | |
m = max(len(class1[i]), len(class2[i])) | |
if len(set(class1[i]).intersection(set(class2[i]))) != m: | |
return True | |
return False | |
def get_mean(x, indx): | |
m = len(indx) | |
x_mean = 0 | |
y_mean = 0 | |
for i in indx: | |
x_mean += x[i][0] | |
y_mean += x[i][1] | |
x_mean /= m | |
y_mean /= m | |
return (x_mean, y_mean) | |
def kmeans(x, k, classes): | |
means = {} | |
prev_classes = {} | |
for i in classes: | |
prev_classes[i] = [] | |
while(compare_classes(classes, prev_classes)): | |
for i in classes: | |
means[i] = get_mean(x, classes[i]) | |
print('Means', means) | |
prev_classes = classes | |
classes = allocate_class(x, means) | |
return classes | |
def main(): | |
points = [(1,1), (2,1), (1,2), (-1, -1), (-2, 1), (-2, -1), (-2, 2)] | |
k = 3 | |
classes = { | |
'1' : [0,1,2], | |
'2' : [3], | |
'3' : [4,5,6] | |
} | |
classes = kmeans(points, k, classes) | |
print(classes) | |
if __name__ == "__main__": | |
main() | |
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import math | |
import operator | |
def euclidean_dist(p1, p2): | |
return math.sqrt(math.pow(p1[0] - p2[0], 2) + math.pow(p1[1] - p2[1], 2)) | |
def knn(x, k, classes, test): | |
results = [] | |
for t in test: | |
dist = [None] * len(x) | |
for i in range(len(x)): | |
dist[i] = euclidean_dist(x[i], t) | |
indexes = [i[0] for i in sorted(enumerate(dist), key=lambda x:x[1], reverse=True)] | |
k_indx = indexes[:k] | |
cls_count = {} | |
for p in classes: | |
cls_count[p] = 0 | |
for idx in k_indx: | |
for j in classes: | |
if idx in classes[j]: | |
cls_count[j] += 1 | |
sorted_x = sorted(cls_count.items(), key=operator.itemgetter(1) ) | |
results.append(sorted_x[0][0]) | |
return results | |
def main(): | |
train = [(-2, -1), (-2, 1), (-2, 2), (-1, -1), (1, -1), (1, 1), (1, 2), (2, 1)] | |
k = 3 | |
classes = { | |
'1' : [0, 3, 4], | |
'2' : [1, 2], | |
'3' : [5, 6, 7] | |
} | |
test = [(-1,1), (0, 0)] | |
out = knn(train, k, classes, test) | |
print(out) | |
if __name__ == "__main__": | |
main() | |
pass |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment