Skip to content

Instantly share code, notes, and snippets.

@tarlanahad
Created January 9, 2020 09:57
Show Gist options
  • Save tarlanahad/8161ab5668f544136081c63b43a0dc24 to your computer and use it in GitHub Desktop.
Save tarlanahad/8161ab5668f544136081c63b43a0dc24 to your computer and use it in GitHub Desktop.
import numpy as np
def get_distance(x1, x2):
sum = 0
for i in range(len(x1)):
sum += (x1[i] - x2[i]) ** 2
return np.sqrt(sum)
def kmeans(X, k, max_iters):
centroids = X[np.random.choice(range(len(X)), k, replace=False)]
converged = False
current_iter = 0
while (not converged) and (current_iter < max_iters):
cluster_list = [[] for i in range(len(centroids))]
for x in X: # Go through each data point
distances_list = []
for c in centroids:
distances_list.append(get_distance(c, x))
cluster_list[int(np.argmin(distances_list))].append(x)
cluster_list = list((filter(None, cluster_list)))
prev_centroids = centroids.copy()
centroids = []
for j in range(len(cluster_list)):
centroids.append(np.mean(cluster_list[j], axis=0))
pattern = np.abs(np.sum(prev_centroids) - np.sum(centroids))
print('K-MEANS: ', int(pattern))
converged = (pattern == 0)
current_iter += 1
return np.array(centroids), [np.std(x) for x in cluster_list]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment