hamletbatista/plot_macys_tommy_wordcloud.py

## plot_macys_tommy_wordcloud.py
from collections import Counter
import re

import nltk
from nltk.corpus import stopwords

from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
import matplotlib.pyplot as plt

nltk.download('stopwords')

def create_word_cloud(phrase_list):
  cnt=Counter()
  english_stopwords = set(stopwords.words('english'))

  for phrase in [x.replace("tommy hilfiger", "") for x in phrase_list]: # remove brand to learn what people want
    words = re.split(" ", phrase)
    for word in words:
      if len(word) > 0 and word not in english_stopwords and not word.isdigit():
        cnt[word] += 1


  word_cloud = [x[0] for x in cnt.most_common(25)]

  word_cloud_obj = WordCloud(max_words=25, background_color="white").generate(" ".join(word_cloud))

  #word_cloud_obj = WordCloud().generate(" ".join(word_cloud)) #default with ugly black background

  plt.imshow(word_cloud_obj, interpolation='bilinear')
  plt.axis("off")
  plt.show()
	from collections import Counter
	import re

	import nltk
	from nltk.corpus import stopwords

	from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
	import matplotlib.pyplot as plt

	nltk.download('stopwords')

	def create_word_cloud(phrase_list):
	cnt=Counter()
	english_stopwords = set(stopwords.words('english'))

	for phrase in [x.replace("tommy hilfiger", "") for x in phrase_list]: # remove brand to learn what people want
	words = re.split(" ", phrase)
	for word in words:
	if len(word) > 0 and word not in english_stopwords and not word.isdigit():
	cnt[word] += 1


	word_cloud = [x[0] for x in cnt.most_common(25)]

	word_cloud_obj = WordCloud(max_words=25, background_color="white").generate(" ".join(word_cloud))

	#word_cloud_obj = WordCloud().generate(" ".join(word_cloud)) #default with ugly black background

	plt.imshow(word_cloud_obj, interpolation='bilinear')
	plt.axis("off")
	plt.show()