This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Compute Coherence Score | |
from gensim.models.coherencemodel import CoherenceModel | |
coherence_lda_bow = CoherenceModel(model=lda_bow, texts=text_tokenized, | |
dictionary=gensim_dictionary, coherence='c_v') | |
coherence_lda = coherence_lda_bow.get_coherence() | |
print('\nCoherence Score: ', coherence_lda) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
np.random.seed(42) | |
numTopicsList = np.arange(1,500,50) | |
for k in numTopicsList: | |
build_lda_models(corpus_tfidf, "tfidf", k) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
np.random.seed(42) | |
def build_lda_models(input_data, name, k): | |
# Train the model on the corpus. | |
lda = LdaModel(input_data, num_topics=k, | |
id2word = gensim_dictionary) | |
from gensim.test.utils import datapath | |
# Save the LDA model on BOW data | |
tempfile = datapath("/content/gdrive/My Drive/data/gensim/LDA_" + name + "_" + str(k)) | |
lda.save(tempfile) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Print the Keyword in the 10 topics | |
import pprint as pp | |
pp.pprint(lda_tfidf.print_topics()) | |
#doc_lda = lda_bow[bow_corpus] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Print the Keyword in the 10 topics | |
import pprint as pp | |
pp.pprint(lda_bow.print_topics()) | |
#doc_lda = lda_bow[bow_corpus] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from gensim.test.utils import datapath | |
# Save the LDA model on BOW data | |
bow_file = datapath("/content/gdrive/My Drive/data/gensim/LDA_bow_AGnews") | |
lda_bow.save(bow_file) | |
# Save the LDA model on TF-IDF data | |
tfidf_file = datapath("/content/gdrive/My Drive/data/gensim/LDA_tfidf_AGnews") | |
lda_tfidf.save(tfidf_file) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
lda_tfidf = LdaModel(corpus_tfidf, num_topics=10, | |
id2word = gensim_dictionary) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Train the model on the corpus. | |
from gensim.models import LdaModel | |
lda_bow = LdaModel(bow_corpus, num_topics=10, | |
id2word = gensim_dictionary) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
id_words[0:3] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
bow_corpus[0] |