Skip to content

Instantly share code, notes, and snippets.

@grohith327
Last active June 9, 2018 15:05
Show Gist options
  • Save grohith327/05f4e6d6ef6d24a76cbb8f5e38a8acd3 to your computer and use it in GitHub Desktop.
Save grohith327/05f4e6d6ef6d24a76cbb8f5e38a8acd3 to your computer and use it in GitHub Desktop.
## K-Means Algorithm
import random
import numpy as np
## Randomly place the centroids of the three clusters
c1 = [float(np.random.randint(4,8)),float(np.random.randint(1,5)),
float(np.random.randint(1,7)),float(np.random.randint(0,3))]
c2 = [float(np.random.randint(4,8)),float(np.random.randint(1,5)),
float(np.random.randint(1,7)),float(np.random.randint(0,3))]
c3 = [float(np.random.randint(4,8)),float(np.random.randint(1,5)),
float(np.random.randint(1,7)),float(np.random.randint(0,3))]
## Intialize the number of iterations you want to run
epochs = 1
while(epochs <= 100):
cluster_1 = []
cluster_2 = []
cluster_3 = []
for point in train_data:
## Find the eucledian distance between all points the centroid
dis_point_c1 = ((c1[0]-point[0])**2 + (c1[1]-point[1])**2 +
(c1[2]-point[2])**2 + (c1[3]-point[3])**2)**0.5
dis_point_c2 = ((c2[0]-point[0])**2 + (c2[1]-point[1])**2 +
(c2[2]-point[2])**2 + (c2[3]-point[3])**2)**0.5
dis_point_c3 = ((c3[0]-point[0])**2 + (c3[1]-point[1])**2 +
(c3[2]-point[2])**2 + (c3[3]-point[3])**2)**0.5
distances = [dis_point_c1,dis_point_c2,dis_point_c3]
## Find the closest centroid to the point and assign the point to that cluster
pos = distances.index(min(distances))
if(pos == 0):
cluster_1.append(point)
elif(pos == 1):
cluster_2.append(point)
else:
cluster_3.append(point)
## Store the centroid values to calculate new centroid values
prev_c1 = c1
prev_c2 = c2
prev_c3 = c3
cluster_1 = np.array(cluster_1)
cluster_2 = np.array(cluster_2)
cluster_3 = np.array(cluster_3)
## Find mean of all points within a cluster and make it as the centroid
if(len(cluster_1) != 0):
c1 = [sum(cluster_1[:,0])/float(len(cluster_1)),
sum(cluster_1[:,1])/float(len(cluster_1)),
sum(cluster_1[:,2])/float(len(cluster_1)),
sum(cluster_1[:,3])/float(len(cluster_1))]
if(len(cluster_2) != 0):
c2 = [sum(cluster_2[:,0])/float(len(cluster_2)),
sum(cluster_2[:,1])/float(len(cluster_2)),
sum(cluster_2[:,2])/float(len(cluster_2)),
sum(cluster_2[:,3])/float(len(cluster_2))]
if(len(cluster_3) != 0):
c3 = [sum(cluster_3[:,0])/float(len(cluster_3)),
sum(cluster_3[:,1])/float(len(cluster_3)),
sum(cluster_3[:,2])/float(len(cluster_3)),
sum(cluster_3[:,3])/float(len(cluster_3))]
## If centroid values hasn't changed, algorithm has convereged
if(prev_c1 == c1 and prev_c2 == c2 and prev_c3 == c3):
print("Converged")
break
print(epochs)
epochs += 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment