Skip to content

Instantly share code, notes, and snippets.

View GeorgeSeif's full-sized avatar

George GeorgeSeif

View GitHub Profile
k = 10
n_init = 10
max_iter = 300
kmeans = faiss.Kmeans(d=data.shape[1], k=k, niter=max_iter, nredo=n_init, gpu=True)
kmeans.train(data.astype(np.float32))
e = time.time()
print("Training time = {}".format(e - s))
clf = KMeans(n_clusters=10)
s = time.time()
clf.fit(data)
e = time.time()
print("Training time = {}".format(e - s))
s = time.time()
clf.predict(data)
import numpy as np
data_size = 1000
data = np.random.normal((100, 100, 100), (20, 20, 20), (data_size, 3))
s = time.time()
k = 10
n_init = 10
max_iter = 300
kmeans = faiss.Kmeans(d=x_train.shape[1], k=k, niter=max_iter, nredo=n_init)
kmeans.train(x_train.astype(np.float32))
e = time.time()
print("Training time = {}".format(e - s))
clf = KMeans(n_clusters=10)
s = time.time()
clf.fit(x_train)
e = time.time()
print("Training time = {}".format(e - s))
s = time.time()
clf.predict(x_test)
import numpy as np
import time
from sklearn.cluster import KMeans
from keras.datasets import mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(len(x_train), -1).astype(float) / 255.
x_test = x_test.reshape(len(x_test), -1).astype(float) / 255.
# CPU version only
conda install faiss-cpu -c pytorch
# Additional Python packages
pip3 install numpy
pip3 install scikit-learn
pip3 install tensorflow
pip3 install keras
conda create --name faiss
conda activate faiss
from sklearn.decomposition import LatentDirichletAllocation as LDA
NUM_TOPICS = 3
# Here we create and fit the LDA model
# The "document_word_matrix" is a 2D array where each row is a document
# and each column is a word. The cells contain the count of the word within
# each document
lda = LDA(n_components=NUM_TOPICS, n_jobs=-1)
lda.fit(document_word_matrix)
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
def get_tf_idf(vectorizer):
feature_names = vectorizer.get_feature_names()
dense_vec = vectors.todense()
dense_list = dense_vec.tolist()
tfidf_data = pd.DataFrame(dense_list, columns=feature_names)
return tfidf_data