CyrilRJK/artm_determine_topics.py Secret

## artm_determine_topics.py
def determine_optimal_artm_topics(data,
                                  tokenized_documents,
                                  batch_vectorizer,
                                  min_topics,
                                  max_topics):

    best_coherence = 0
    best_topics = 0

    for n_topics in range(min_topics, max_topics, 1):

        topic_names = ['topic_{}'.format(i) for i in range(n_topics)] # initialize topic names
        model_artm = get_artm_model(topic_names, batch_vectorizer.dictionary) # initalize ARTM model
        model_artm.fit_offline(batch_vectorizer=batch_vectorizer, num_collection_passes=10) # train model
        topics = get_artm_topics(model_artm) # get topic representations
        coherence = get_coherence(topics, tokenized_documents) # calculate coherence

        if coherence > best_coherence:
            best_coherence = coherence
            best_n_topics = n_topics

    return best_n_topics
	def determine_optimal_artm_topics(data,
	tokenized_documents,
	batch_vectorizer,
	min_topics,
	max_topics):

	best_coherence = 0
	best_topics = 0

	for n_topics in range(min_topics, max_topics, 1):

	topic_names = ['topic_{}'.format(i) for i in range(n_topics)] # initialize topic names
	model_artm = get_artm_model(topic_names, batch_vectorizer.dictionary) # initalize ARTM model
	model_artm.fit_offline(batch_vectorizer=batch_vectorizer, num_collection_passes=10) # train model
	topics = get_artm_topics(model_artm) # get topic representations
	coherence = get_coherence(topics, tokenized_documents) # calculate coherence

	if coherence > best_coherence:
	best_coherence = coherence
	best_n_topics = n_topics

	return best_n_topics