Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
def freq_words(x, terms = 30):
all_words = ' '.join([text for text in x])
all_words = all_words.split()
fdist = nltk.FreqDist(all_words)
words_df = pd.DataFrame({'word':list(fdist.keys()), 'count':list(fdist.values())})
# selecting top 20 most frequent words
d = words_df.nlargest(columns="count", n = terms)
# visualize words and frequencies
ax = sns.barplot(data=d, x= "count", y = "word")
ax.set(ylabel = 'Word')
# print 100 most frequent words
freq_words(movies_new['clean_plot'], 100)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment