Skip to content

Instantly share code, notes, and snippets.

@avriiil
Last active November 27, 2021 14:46
Show Gist options
  • Save avriiil/af64ea79cc676a05d35a04560c5d58cb to your computer and use it in GitHub Desktop.
Save avriiil/af64ea79cc676a05d35a04560c5d58cb to your computer and use it in GitHub Desktop.
# importing libraries
import pandas as pd
import numpy as np
import gensim
from gensim import corpora, models
# cast tweets to numpy array
docs = df.tweet_text.to_numpy()
# create dictionary of all words in all documents
dictionary = gensim.corpora.Dictionary(docs)
# filter extreme cases out of dictionary
dictionary.filter_extremes(no_below=15, no_above=0.5, keep_n=100000)
# create BOW dictionary
bow_corpus = [dictionary.doc2bow(doc) for doc in docs]
# create LDA model using preferred hyperparameters
lda_model = gensim.models.LdaMulticore(bow_corpus,
num_topics=5,
id2word=dictionary,
passes=4,
workers=2,
random_state=21)
# Save LDA model to disk
path_to_model = ""
lda_model.save(path_to_model)
# for each topic, print words occuring in that topic
for idx, topic in lda_model.print_topics(-1):
print('Topic: {} \nWords: {}'.format(idx, topic))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment