bvarghese1/kmeans_cluster.py

## kmeans_cluster.py
import os
import numpy as np
from sklearn.cluster import Kmeans

from cluster import Cluster

class KmeansCluster(Cluster):

    def __init__(self, path):
        super(KmeansCluster, self).__init__(path)

    # Implementation of the base class abstract method
    def cluster(self, features, num_clusters):

        # Create the clustering algorithm(Kmeans)
        clustering_algo = KmeansClustering(n_clusters=num_clusters)

        # Train with the data
        clustering_algo.fit(features)

        # Extract the assigned cluster labels
        labels = clustering_algo.labels_

        # Generate centroids using the features and assigned cluster labels
        data = np.empty((0, features.shape[1]), 'float32')
        for i in range(num_clusters):
            row = np.dot(labels == i, embeddings) / np.sum(labels == i)
            data = np.vstack((data, row))

        # Normalize the centroids
        tdata = data.transpose()
        centroids = (tdata / np.sqrt(np.sum(tdata * tdata, axis=0))).transpose()

        # Save the centroids
        np.save(os.path.join(self.path, "kmeans_centroids"), centroids)
	import os
	import numpy as np
	from sklearn.cluster import Kmeans

	from cluster import Cluster

	class KmeansCluster(Cluster):

	def __init__(self, path):
	super(KmeansCluster, self).__init__(path)

	# Implementation of the base class abstract method
	def cluster(self, features, num_clusters):

	# Create the clustering algorithm(Kmeans)
	clustering_algo = KmeansClustering(n_clusters=num_clusters)

	# Train with the data
	clustering_algo.fit(features)

	# Extract the assigned cluster labels
	labels = clustering_algo.labels_

	# Generate centroids using the features and assigned cluster labels
	data = np.empty((0, features.shape[1]), 'float32')
	for i in range(num_clusters):
	row = np.dot(labels == i, embeddings) / np.sum(labels == i)
	data = np.vstack((data, row))

	# Normalize the centroids
	tdata = data.transpose()
	centroids = (tdata / np.sqrt(np.sum(tdata * tdata, axis=0))).transpose()

	# Save the centroids
	np.save(os.path.join(self.path, "kmeans_centroids"), centroids)