Skip to content

Instantly share code, notes, and snippets.

@djokester
Last active August 8, 2018 17:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save djokester/454d7c4f2b93c04026c28770a3b0670e to your computer and use it in GitHub Desktop.
Save djokester/454d7c4f2b93c04026c28770a3b0670e to your computer and use it in GitHub Desktop.
from sklearn.cluster import KMeans
from sklearn import metrics
import pylab as pl
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
kmeans_model = KMeans(n_clusters= 60, init='k-means++', max_iter=100)
X = kmeans_model.fit(model.docvecs.doctag_syn0)
labels= kmeans_model.labels_.tolist()
l = kmeans_model.fit_predict(model.docvecs.doctag_syn0)
#map each centroid to its topic tag
word_centroid_map = dict(zip( model.docvecs.offset2doctag, l))
#Print Cluster List
for cluster in range(0,100):
print("\nCluster %d" % cluster)
words = []
for i in range(0,len(word_centroid_map.values())):
if(list(word_centroid_map.values())[i] == cluster ):
words.append(list(word_centroid_map.keys())[i])
print(words)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment