Skip to content

Instantly share code, notes, and snippets.

@dataman-git
dataman-git / new_texts_vector
Created February 15, 2023 23:37
new_texts_vector
for doc in new_texts_vector:
print(doc)
@dataman-git
dataman-git / new_corpus
Created February 15, 2023 23:37
new_corpus
new_corpus
@dataman-git
dataman-git / predicting
Created February 15, 2023 23:37
predicting
# predicting new text which is in text dataframe
new_texts = [
'The new Windows operating system will be released',
'The system uses the most difficult technologies and techniques',
'The U.S. consumer prices dropped for the first time in ten months'
]
from gensim.parsing.preprocessing import preprocess_string
new_texts_tokenized = []
for doc in new_texts:
@dataman-git
dataman-git / print_topics()
Created February 15, 2023 23:34
print_topics()
# Print the Keyword in the 10 topics
import pprint as pp
pp.pprint(lda_bow.print_topics()[2])
@dataman-git
dataman-git / p
Last active February 18, 2023 16:32
p
#Creating Topic Distance Visualization
pyLDAvis.enable_notebook()
p = gensimvis.prepare(lda_bow, bow_corpus, gensim_dictionary)
p
@dataman-git
dataman-git / dict_file2
Last active February 18, 2023 16:31
dict_file2
# Load the model trained before.
from gensim.models import LdaModel
from gensim.test.utils import datapath
from gensim.corpora import Dictionary
# Load the LDA model trained on BOW data
bow_file = datapath("/content/gdrive/My Drive/data/gensim/LDA_bow_151")
lda_bow = LdaModel.load(bow_file)
# Load the LDA model trained on TF-IDF data
@dataman-git
dataman-git / pyLDAvis
Last active February 18, 2023 16:30
pyLDAvis
#!pip install pyLDAvis
import pyLDAvis
import pyLDAvis.sklearn
import pyLDAvis.gensim_models as gensimvis
pyLDAvis.enable_notebook()
@dataman-git
dataman-git / plt.plot(numTopicsList
Created February 15, 2023 23:32
plt.plot(numTopicsList
from matplotlib import pyplot as plt
plt.plot(numTopicsList, coherenceList_UMass)
plt.savefig("/content/gdrive/My Drive/data/gensim/LDA_bow_coherence")
plt.show()
@dataman-git
dataman-git / coherenceList_UMass2
Created February 15, 2023 23:32
coherenceList_UMass2
coherenceList_UMass
@dataman-git
dataman-git / coherence_UMass
Created February 15, 2023 23:32
coherence_UMass
from gensim.models import LdaModel
from gensim.models.coherencemodel import CoherenceModel
from gensim.test.utils import datapath
np.random.seed(42)
def coherence_UMass(corpus, name, k):
# Load the model trained before.
tempfile = datapath("/content/gdrive/My Drive/data/gensim/LDA_" + name + "_" + str(k))
lda = LdaModel.load(tempfile)
coherence = CoherenceModel(model=lda,
texts=text_tokenized,