Last active
May 31, 2022 01:03
-
-
Save j-adamczyk/9312f41892badf9571ad4fd44718b5af to your computer and use it in GitHub Desktop.
K-Means clustring with faiss library
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import faiss | |
import numpy as np | |
class FaissKMeans: | |
def __init__(self, n_clusters=8, n_init=10, max_iter=300): | |
self.n_clusters = n_clusters | |
self.n_init = n_init | |
self.max_iter = max_iter | |
self.kmeans = None | |
self.cluster_centers_ = None | |
self.inertia_ = None | |
def fit(self, X, y): | |
self.kmeans = faiss.Kmeans(d=X.shape[1], | |
k=self.n_clusters, | |
niter=self.max_iter, | |
nredo=self.n_init) | |
self.kmeans.train(X.astype(np.float32)) | |
self.cluster_centers_ = self.kmeans.centroids | |
self.inertia_ = self.kmeans.obj[-1] | |
def predict(self, X): | |
return self.kmeans.index.search(X.astype(np.float32), 1)[1] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Could you please check that the inertia is reported correctly for kmeans clustering with faiss? For me it goes up with the number of clusters, which is suspicious... Thanks!