Skip to content

Instantly share code, notes, and snippets.

@avriiil
Last active January 30, 2023 15:16
Show Gist options
  • Save avriiil/c5e72bd0654d10a875ebeb6715869a95 to your computer and use it in GitHub Desktop.
Save avriiil/c5e72bd0654d10a875ebeb6715869a95 to your computer and use it in GitHub Desktop.
# import library from gensim
from gensim.models import CoherenceModel
# define function to get words in topics
def get_topics_lists(model, top_clusters, n_words):
'''
Gets lists of words in topics as a list of lists.
model: gsdmm instance
top_clusters: numpy array containing indices of top_clusters
n_words: top n number of words to include
'''
# create empty list to contain topics
topics = []
# iterate over top n clusters
for cluster in top_clusters:
#create sorted dictionary of word distributions
sorted_dict = sorted(model.cluster_word_distribution[cluster].items(), key=lambda k: k[1], reverse=True)[:n_words]
#create empty list to contain words
topic = []
#iterate over top n words in topic
for k,v in sorted_dict:
#append words to topic list
topic.append(k)
#append topics to topics list
topics.append(topic)
return topics
# get topics to feed to coherence model
topics = get_topics_lists(gsdmm, top_index, 20)
# evaluate model using Topic Coherence score
cm_gsdmm = CoherenceModel(topics=topics,
dictionary=dictionary,
corpus=bow_corpus,
texts=docs,
coherence='c_v')
# get coherence value
coherence_gsdmm = cm_gsdmm.get_coherence()
print(coherence_gsdmm)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment