Skip to content

Instantly share code, notes, and snippets.

@dataman-git
dataman-git / Coherence
Created February 15, 2023 23:31
Coherence
# Compute Coherence Score
from gensim.models.coherencemodel import CoherenceModel
coherence_lda_bow = CoherenceModel(model=lda_bow, texts=text_tokenized,
dictionary=gensim_dictionary, coherence='c_v')
coherence_lda = coherence_lda_bow.get_coherence()
print('\nCoherence Score: ', coherence_lda)
@dataman-git
dataman-git / build_lda_models
Created February 15, 2023 23:31
build_lda_models
np.random.seed(42)
numTopicsList = np.arange(1,500,50)
for k in numTopicsList:
build_lda_models(corpus_tfidf, "tfidf", k)
@dataman-git
dataman-git / numTopicsList
Created February 15, 2023 23:30
numTopicsList
np.random.seed(42)
def build_lda_models(input_data, name, k):
# Train the model on the corpus.
lda = LdaModel(input_data, num_topics=k,
id2word = gensim_dictionary)
from gensim.test.utils import datapath
# Save the LDA model on BOW data
tempfile = datapath("/content/gdrive/My Drive/data/gensim/LDA_" + name + "_" + str(k))
lda.save(tempfile)
@dataman-git
dataman-git / lda_tfidf2
Created February 15, 2023 23:29
lda_tfidf2
# Print the Keyword in the 10 topics
import pprint as pp
pp.pprint(lda_tfidf.print_topics())
#doc_lda = lda_bow[bow_corpus]
@dataman-git
dataman-git / lda_bow2
Created February 15, 2023 23:28
lda_bow2
# Print the Keyword in the 10 topics
import pprint as pp
pp.pprint(lda_bow.print_topics())
#doc_lda = lda_bow[bow_corpus]
@dataman-git
dataman-git / tfidf_file
Created February 15, 2023 23:27
tfidf_file
from gensim.test.utils import datapath
# Save the LDA model on BOW data
bow_file = datapath("/content/gdrive/My Drive/data/gensim/LDA_bow_AGnews")
lda_bow.save(bow_file)
# Save the LDA model on TF-IDF data
tfidf_file = datapath("/content/gdrive/My Drive/data/gensim/LDA_tfidf_AGnews")
lda_tfidf.save(tfidf_file)
@dataman-git
dataman-git / lda_tfidf
Created February 15, 2023 23:27
lda_tfidf
lda_tfidf = LdaModel(corpus_tfidf, num_topics=10,
id2word = gensim_dictionary)
@dataman-git
dataman-git / lda_bow
Created February 15, 2023 23:27
lda_bow
# Train the model on the corpus.
from gensim.models import LdaModel
lda_bow = LdaModel(bow_corpus, num_topics=10,
id2word = gensim_dictionary)
@dataman-git
dataman-git / id_words[0:3]
Created February 15, 2023 23:26
id_words[0:3]
id_words[0:3]
@dataman-git
dataman-git / bow_corpus[0]
Created February 15, 2023 23:25
bow_corpus[0]
bow_corpus[0]