Skip to content

Instantly share code, notes, and snippets.

@hamletbatista
Last active April 21, 2019 01:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hamletbatista/e2b60c727bc78c83a0abd639e71f044b to your computer and use it in GitHub Desktop.
Save hamletbatista/e2b60c727bc78c83a0abd639e71f044b to your computer and use it in GitHub Desktop.
from collections import Counter
import re
import nltk
from nltk.corpus import stopwords
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
import matplotlib.pyplot as plt
nltk.download('stopwords')
def create_word_cloud(phrase_list):
cnt=Counter()
english_stopwords = set(stopwords.words('english'))
for phrase in [x.replace("tommy hilfiger", "") for x in phrase_list]: # remove brand to learn what people want
words = re.split(" ", phrase)
for word in words:
if len(word) > 0 and word not in english_stopwords and not word.isdigit():
cnt[word] += 1
word_cloud = [x[0] for x in cnt.most_common(25)]
word_cloud_obj = WordCloud(max_words=25, background_color="white").generate(" ".join(word_cloud))
#word_cloud_obj = WordCloud().generate(" ".join(word_cloud)) #default with ugly black background
plt.imshow(word_cloud_obj, interpolation='bilinear')
plt.axis("off")
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment