dataman-git

## new_texts_vector
for doc in new_texts_vector:
  print(doc)

## new_corpus
new_corpus

## predicting
#  predicting new text which is in text dataframe
new_texts = [
    'The new Windows operating system will be released',
    'The system uses the most difficult technologies and techniques',
    'The U.S. consumer prices dropped for the first time in ten months'
 ]

from gensim.parsing.preprocessing import preprocess_string
new_texts_tokenized = []
for doc in new_texts:

## print_topics()
# Print the Keyword in the 10 topics
import pprint as pp
pp.pprint(lda_bow.print_topics()[2])

## p
#Creating Topic Distance Visualization
pyLDAvis.enable_notebook()
p = gensimvis.prepare(lda_bow, bow_corpus, gensim_dictionary)
p

## dict_file2
# Load the model trained before.
from gensim.models import LdaModel
from gensim.test.utils import datapath
from gensim.corpora import Dictionary

# Load the LDA model trained on BOW data
bow_file = datapath("/content/gdrive/My Drive/data/gensim/LDA_bow_151")
lda_bow = LdaModel.load(bow_file)

# Load the LDA model trained on TF-IDF data

## pyLDAvis
#!pip install pyLDAvis
import pyLDAvis
import pyLDAvis.sklearn
import pyLDAvis.gensim_models as gensimvis
pyLDAvis.enable_notebook()

## plt.plot(numTopicsList
from matplotlib import pyplot as plt
plt.plot(numTopicsList, coherenceList_UMass)
plt.savefig("/content/gdrive/My Drive/data/gensim/LDA_bow_coherence")
plt.show()

## coherenceList_UMass2
coherenceList_UMass

## coherence_UMass
from gensim.models import LdaModel
from gensim.models.coherencemodel import CoherenceModel
from gensim.test.utils import datapath
np.random.seed(42)
def coherence_UMass(corpus, name, k):
   # Load the model trained before.
   tempfile = datapath("/content/gdrive/My Drive/data/gensim/LDA_" + name + "_" + str(k))
   lda = LdaModel.load(tempfile)
   coherence = CoherenceModel(model=lda,
                              texts=text_tokenized,
	# predicting new text which is in text dataframe
	new_texts = [
	'The new Windows operating system will be released',
	'The system uses the most difficult technologies and techniques',
	'The U.S. consumer prices dropped for the first time in ten months'
	]

	from gensim.parsing.preprocessing import preprocess_string
	new_texts_tokenized = []
	for doc in new_texts:
	# Print the Keyword in the 10 topics
	import pprint as pp
	pp.pprint(lda_bow.print_topics()[2])
	#Creating Topic Distance Visualization
	pyLDAvis.enable_notebook()
	p = gensimvis.prepare(lda_bow, bow_corpus, gensim_dictionary)
	p
	# Load the model trained before.
	from gensim.models import LdaModel
	from gensim.test.utils import datapath
	from gensim.corpora import Dictionary

	# Load the LDA model trained on BOW data
	bow_file = datapath("/content/gdrive/My Drive/data/gensim/LDA_bow_151")
	lda_bow = LdaModel.load(bow_file)

	# Load the LDA model trained on TF-IDF data
	#!pip install pyLDAvis
	import pyLDAvis
	import pyLDAvis.sklearn
	import pyLDAvis.gensim_models as gensimvis
	pyLDAvis.enable_notebook()
	from matplotlib import pyplot as plt
	plt.plot(numTopicsList, coherenceList_UMass)
	plt.savefig("/content/gdrive/My Drive/data/gensim/LDA_bow_coherence")
	plt.show()
	from gensim.models import LdaModel
	from gensim.models.coherencemodel import CoherenceModel
	from gensim.test.utils import datapath
	np.random.seed(42)
	def coherence_UMass(corpus, name, k):
	# Load the model trained before.
	tempfile = datapath("/content/gdrive/My Drive/data/gensim/LDA_" + name + "_" + str(k))
	lda = LdaModel.load(tempfile)
	coherence = CoherenceModel(model=lda,
	texts=text_tokenized,