Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
# Processing the reviews titles data
comments_titles = []
stop_words = set(stopwords.words('portuguese'))
for words in review_data_title:
only_letters = re.sub("[^a-zA-Z]", " ",words)
tokens = nltk.word_tokenize(only_letters) #tokenize the sentences
lower_case = [l.lower() for l in tokens] #convert all letters to lower case
filtered_result = list(filter(lambda l: l not in stop_words, lower_case)) #Remove stopwords from the comments
comments_titles.append(' '.join(filtered_result))
#Using wordcloud to visualize the comments titles
unique_string=(" ").join(comments_titles)
wordcloud = WordCloud(width = 2000, height = 1000,background_color='white').generate(unique_string)
plt.figure(figsize=(20,12))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment