This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
for doc in new_texts_vector: | |
print(doc) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
new_corpus |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# predicting new text which is in text dataframe | |
new_texts = [ | |
'The new Windows operating system will be released', | |
'The system uses the most difficult technologies and techniques', | |
'The U.S. consumer prices dropped for the first time in ten months' | |
] | |
from gensim.parsing.preprocessing import preprocess_string | |
new_texts_tokenized = [] | |
for doc in new_texts: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Print the Keyword in the 10 topics | |
import pprint as pp | |
pp.pprint(lda_bow.print_topics()[2]) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Creating Topic Distance Visualization | |
pyLDAvis.enable_notebook() | |
p = gensimvis.prepare(lda_bow, bow_corpus, gensim_dictionary) | |
p |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Load the model trained before. | |
from gensim.models import LdaModel | |
from gensim.test.utils import datapath | |
from gensim.corpora import Dictionary | |
# Load the LDA model trained on BOW data | |
bow_file = datapath("/content/gdrive/My Drive/data/gensim/LDA_bow_151") | |
lda_bow = LdaModel.load(bow_file) | |
# Load the LDA model trained on TF-IDF data |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!pip install pyLDAvis | |
import pyLDAvis | |
import pyLDAvis.sklearn | |
import pyLDAvis.gensim_models as gensimvis | |
pyLDAvis.enable_notebook() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from matplotlib import pyplot as plt | |
plt.plot(numTopicsList, coherenceList_UMass) | |
plt.savefig("/content/gdrive/My Drive/data/gensim/LDA_bow_coherence") | |
plt.show() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
coherenceList_UMass |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from gensim.models import LdaModel | |
from gensim.models.coherencemodel import CoherenceModel | |
from gensim.test.utils import datapath | |
np.random.seed(42) | |
def coherence_UMass(corpus, name, k): | |
# Load the model trained before. | |
tempfile = datapath("/content/gdrive/My Drive/data/gensim/LDA_" + name + "_" + str(k)) | |
lda = LdaModel.load(tempfile) | |
coherence = CoherenceModel(model=lda, | |
texts=text_tokenized, |