Created
January 29, 2017 23:05
-
-
Save raymanfx/413e69fb669daeeb07f38e0ea86b6bba to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import math | |
""" | |
k-means algorithm | |
""" | |
data_set = [[2, 2], | |
[2, 1], | |
[1, 1], | |
[0, 0], | |
[-1, 0], | |
[-2, 0], | |
[-1, -1]] | |
c1 = [] | |
c2 = [] | |
c3 = [] | |
cc1 = [1, 1] | |
cc2 = [] | |
cc3 = [] | |
def euclidian_distance(point1=[], point2=[]): | |
x1 = point1[0] | |
x2 = point2[0] | |
y1 = point1[1] | |
y2 = point2[1] | |
return math.sqrt(pow((x1-x2),2) + pow((y1-y2),2)) | |
def assign_point_to_c(point=[]): | |
dist_c1 = euclidian_distance(point, cc1) | |
dist_c2 = euclidian_distance(point, cc2) | |
dist_c3 = euclidian_distance(point, cc3) | |
if dist_c1 < dist_c2 and dist_c1 < dist_c3: | |
c1.append(point) | |
elif dist_c2 < dist_c1 and dist_c2 < dist_c3: | |
c2.append(point) | |
elif dist_c3 < dist_c1 and dist_c3 < dist_c2: | |
c3.append(point) | |
else: | |
c1.append(point) | |
def recompute_ccs(): | |
x_sum = 0 | |
y_sum = 0 | |
count = 0 | |
for i in range(0, len(c1)): | |
point = c1[i] | |
x_sum += point[0] | |
y_sum += point[1] | |
count += 1 | |
x_mean = x_sum / count | |
y_mean = y_sum / count | |
cc1[0] = x_mean | |
cc1[1] = y_mean | |
x_sum = 0 | |
y_sum = 0 | |
count = 0 | |
for i in range(0, len(c2)): | |
point = c2[i] | |
x_sum += point[0] | |
y_sum += point[1] | |
count += 1 | |
x_mean = x_sum / count | |
y_mean = y_sum / count | |
cc2[0] = x_mean | |
cc2[1] = y_mean | |
x_sum = 0 | |
y_sum = 0 | |
count = 0 | |
for i in range(0, len(c3)): | |
point = c3[i] | |
x_sum += point[0] | |
y_sum += point[1] | |
count += 1 | |
x_mean = x_sum / count | |
y_mean = y_sum / count | |
cc3[0] = x_mean | |
cc3[1] = y_mean | |
# find cc2 | |
max_dist = 0 | |
for i in range(0, len(data_set)): | |
point = data_set[i] | |
# skip cc1 | |
if point == cc1: | |
continue | |
dist = euclidian_distance(point, cc1) | |
if dist > max_dist: | |
max_dist = dist | |
cc2 = [point[0], point[1]] | |
print("cc2 is: [" + str(cc2[0]) + ", " + str(cc2[1]) + "]"); | |
# find cc3 | |
max_dist1 = 0 | |
max_dist2 = 0 | |
for i in range(0, len(data_set)): | |
point = data_set[i] | |
# skip cc1 | |
if point == cc1 or point == cc2: | |
continue | |
dist1 = euclidian_distance(point, cc1) | |
dist2 = euclidian_distance(point, cc2) | |
if dist1 > max_dist1 and dist2 > max_dist2: | |
max_dist1 = dist1 | |
max_dist2 = dist2 | |
cc3 = [point[0], point[1]] | |
print("cc3 is: [" + str(cc3[0]) + ", " + str(cc3[1]) + "]"); | |
iterations = 0 | |
abort = False | |
while abort != True: | |
iterations += 1 | |
# reset clusters | |
c1 = [] | |
c2 = [] | |
c3 = [] | |
for i in range(0, len(data_set)): | |
point = data_set[i] | |
assign_point_to_c(point) | |
print("[" + str(iterations) + "] ", end='') | |
print("instances in c1:") | |
for i in range(0, len(c1)): | |
point = c1[i] | |
print("[" + str(iterations) + "] ", end='') | |
print(str(i) + ": [" + str(point[0]) + ", " + str(point[1]) + "]"); | |
print("[" + str(iterations) + "] ", end='') | |
print("instances in c2:") | |
for i in range(0, len(c2)): | |
point = c2[i] | |
print("[" + str(iterations) + "] ", end='') | |
print(str(i) + ": [" + str(point[0]) + ", " + str(point[1]) + "]"); | |
print("[" + str(iterations) + "] ", end='') | |
print("instances in c3:") | |
for i in range(0, len(c3)): | |
point = c3[i] | |
print("[" + str(iterations) + "] ", end='') | |
print(str(i) + ": [" + str(point[0]) + ", " + str(point[1]) + "]"); | |
recompute_ccs() | |
print("[" + str(iterations) + "] ", end='') | |
print("recomputed cluster centers:") | |
print("[" + str(iterations) + "] ", end='') | |
print("cc1" + ": [" + str(cc1[0]) + ", " + str(cc1[1]) + "]"); | |
print("[" + str(iterations) + "] ", end='') | |
print("cc2" + ": [" + str(cc2[0]) + ", " + str(cc2[1]) + "]"); | |
print("[" + str(iterations) + "] ", end='') | |
print("cc3" + ": [" + str(cc3[0]) + ", " + str(cc3[1]) + "]"); | |
if iterations == 4: | |
abort = True |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment