-
-
Save CyrilRJK/a215353cd66b9244d9ea9866294b6b2d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def determine_optimal_artm_topics(data, | |
tokenized_documents, | |
batch_vectorizer, | |
min_topics, | |
max_topics): | |
best_coherence = 0 | |
best_topics = 0 | |
for n_topics in range(min_topics, max_topics, 1): | |
topic_names = ['topic_{}'.format(i) for i in range(n_topics)] # initialize topic names | |
model_artm = get_artm_model(topic_names, batch_vectorizer.dictionary) # initalize ARTM model | |
model_artm.fit_offline(batch_vectorizer=batch_vectorizer, num_collection_passes=10) # train model | |
topics = get_artm_topics(model_artm) # get topic representations | |
coherence = get_coherence(topics, tokenized_documents) # calculate coherence | |
if coherence > best_coherence: | |
best_coherence = coherence | |
best_n_topics = n_topics | |
return best_n_topics |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment