kajal yadav techykajal

## matrix.py
mat = pd.read_csv('coherence_matrix_10.csv')
mat.reset_index(drop=True)
mat

## LDA.py
LDA = gensim.models.ldamulticore.LdaMulticore
ldamodel = LDA.load(f"ldamodel_for_16topics_Run_10")
pyLDAvis.gensim.prepare(ldamodel, doc_term_matrix, dictionary)

## visual_coherence.py
# Show graph
x = range(2,25)
plt.plot(x, mat['coherence_score'])
plt.xlabel("Num Topics")
plt.ylabel("Coherence score")
plt.legend(("coherence_values"), loc='best')
plt.show() # Num Topics = 4 is having highest coherence score.

## mat_read.py
mat = pd.read_csv('coherence_matrix_10.csv')
mat.reset_index(drop=True)
mat

## dataframe.py
pd.DataFrame(coherence, columns=['LDA_Model','alpha','eta','coherence_score']).to_csv('coherence_matrix_10.csv', index=False)

## coherence_score.py
coherence = []
for k in range(2,25):
    LDA = gensim.models.ldamulticore.LdaMulticore
    ldamodel = LDA.load(f"ldamodel_for_{k}topics_Run_10")
    cm = gensim.models.coherencemodel.CoherenceModel(model=ldamodel, texts=Complete_Content, dictionary=dictionary, coherence='c_v')
    coherence.append((k, 'default', 'default', cm.get_coherence()))

## prepare_LDA_model.py
for k in range(2,25): # Train LDA on different values of k
    print('Round: '+str(k))
    LDA = gensim.models.ldamulticore.LdaMulticore
    ldamodel = LDA(doc_term_matrix, num_topics=k, id2word = dictionary, passes=20, iterations=100,
                   chunksize = 10000, eval_every = 10, random_state=20)
    ldamodel.save(f"ldamodel_for_{k}topics_Run_10")
    pprint(ldamodel.print_topics())


## load_dict.py
# function to load dictionary and doc to term matrix from the file
def load_dict_and_docterm_matirx(dict_path, matrix_path):
    """
    This fucntion will load and return
    dictionary and doc term matrix

    Arguments:
        dict_path: path to corpus dictionary
        matrix_path: path to corpus document to term matrix


## create_dictionary.py
# define the function to create dictionary and document to term matrix
def create_dic_and_docterm_matrix(Complete_Content, dict_file_path, matrix_file_path):
    """
    This function will create corpus dictionary and document to term matrix

    Argument:
        X: tokenized text corpus
        dict_file_path: file path to save dictionary
        matrix_file_path: file path to save matrix
    returns:

## total_tokens.py
# Total no. of tokens in the corpus
tokens = []
for article in DF['Updated_content']:
    for word in article:
        tokens.append(word)
len(tokens) # There are total 1 lac 50 thousand tokens in the corpus
	mat = pd.read_csv('coherence_matrix_10.csv')
	mat.reset_index(drop=True)
	mat
	LDA = gensim.models.ldamulticore.LdaMulticore
	ldamodel = LDA.load(f"ldamodel_for_16topics_Run_10")
	pyLDAvis.gensim.prepare(ldamodel, doc_term_matrix, dictionary)
	# Show graph
	x = range(2,25)
	plt.plot(x, mat['coherence_score'])
	plt.xlabel("Num Topics")
	plt.ylabel("Coherence score")
	plt.legend(("coherence_values"), loc='best')
	plt.show() # Num Topics = 4 is having highest coherence score.
	coherence = []
	for k in range(2,25):
	LDA = gensim.models.ldamulticore.LdaMulticore
	ldamodel = LDA.load(f"ldamodel_for_{k}topics_Run_10")
	cm = gensim.models.coherencemodel.CoherenceModel(model=ldamodel, texts=Complete_Content, dictionary=dictionary, coherence='c_v')
	coherence.append((k, 'default', 'default', cm.get_coherence()))
	for k in range(2,25): # Train LDA on different values of k
	print('Round: '+str(k))
	LDA = gensim.models.ldamulticore.LdaMulticore
	ldamodel = LDA(doc_term_matrix, num_topics=k, id2word = dictionary, passes=20, iterations=100,
	chunksize = 10000, eval_every = 10, random_state=20)
	ldamodel.save(f"ldamodel_for_{k}topics_Run_10")
	pprint(ldamodel.print_topics())
	# function to load dictionary and doc to term matrix from the file
	def load_dict_and_docterm_matirx(dict_path, matrix_path):
	"""
	This fucntion will load and return
	dictionary and doc term matrix

	Arguments:
	dict_path: path to corpus dictionary
	matrix_path: path to corpus document to term matrix
	# define the function to create dictionary and document to term matrix
	def create_dic_and_docterm_matrix(Complete_Content, dict_file_path, matrix_file_path):
	"""
	This function will create corpus dictionary and document to term matrix

	Argument:
	X: tokenized text corpus
	dict_file_path: file path to save dictionary
	matrix_file_path: file path to save matrix
	returns:
	# Total no. of tokens in the corpus
	tokens = []
	for article in DF['Updated_content']:
	for word in article:
	tokens.append(word)
	len(tokens) # There are total 1 lac 50 thousand tokens in the corpus