Skip to content

Instantly share code, notes, and snippets.

@ImadDabbura
Last active November 25, 2022 13:36
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save ImadDabbura/6e2230b33373991aa3ccdbff6ebb3fd7 to your computer and use it in GitHub Desktop.
Save ImadDabbura/6e2230b33373991aa3ccdbff6ebb3fd7 to your computer and use it in GitHub Desktop.
import numpy as np
from numpy.linalg import norm
class Kmeans:
'''Implementing Kmeans algorithm.'''
def __init__(self, n_clusters, max_iter=100, random_state=123):
self.n_clusters = n_clusters
self.max_iter = max_iter
self.random_state = random_state
def initializ_centroids(self, X):
np.random.RandomState(self.random_state)
random_idx = np.random.permutation(X.shape[0])
centroids = X[random_idx[:self.n_clusters]]
return centroids
def compute_centroids(self, X, labels):
centroids = np.zeros((self.n_clusters, X.shape[1]))
for k in range(self.n_clusters):
centroids[k, :] = np.mean(X[labels == k, :], axis=0)
return centroids
def compute_distance(self, X, centroids):
distance = np.zeros((X.shape[0], self.n_clusters))
for k in range(self.n_clusters):
row_norm = norm(X - centroids[k, :], axis=1)
distance[:, k] = np.square(row_norm)
return distance
def find_closest_cluster(self, distance):
return np.argmin(distance, axis=1)
def compute_sse(self, X, labels, centroids):
distance = np.zeros(X.shape[0])
for k in range(self.n_clusters):
distance[labels == k] = norm(X[labels == k] - centroids[k], axis=1)
return np.sum(np.square(distance))
def fit(self, X):
self.centroids = self.initializ_centroids(X)
for i in range(self.max_iter):
old_centroids = self.centroids
distance = self.compute_distance(X, old_centroids)
self.labels = self.find_closest_cluster(distance)
self.centroids = self.compute_centroids(X, self.labels)
if np.all(old_centroids == self.centroids):
break
self.error = self.compute_sse(X, self.labels, self.centroids)
def predict(self, X):
distance = self.compute_distance(X, self.centroids)
return self.find_closest_cluster(distance)
@spatiallysaying
Copy link

At line#53 ,we are getting the error 'undefined name old_centroids'

@NassimF
Copy link

NassimF commented Nov 1, 2021

At line#53 ,we are getting the error 'undefined name old_centroids'

I got the same error. I guess we should change old_centroids to 'self.centroids'.

@ImadDabbura
Copy link
Author

Thanks for the catch! Yes, it should be self.centroids. I'll fix it shortly.

@NassimF
Copy link

NassimF commented Nov 1, 2021

Thanks for the catch! Yes, it should be self.centroids. I'll fix it shortly.

You're welcome!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment