Skip to content

Instantly share code, notes, and snippets.

@braun-steven
Created June 5, 2017 16:43
Show Gist options
  • Save braun-steven/63d367605c82026ee20da47c29793fbc to your computer and use it in GitHub Desktop.
Save braun-steven/63d367605c82026ee20da47c29793fbc to your computer and use it in GitHub Desktop.
import re
import pandas as pd
from pandas import DataFrame
import random
import sys
from os import path
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
from wordcloud import WordCloud, STOPWORDS
def wordcloud():
X = pd.read_csv('data/result_1.csv')
texts = X['text'].values[-3000:]
join = '\n'.join(texts).lower()
join = re.sub('[^a-z0-9#]', ' ', join).replace('\s+',' ')
d = path.dirname(__file__)
text = join
mask = np.array(Image.open(path.join(d, "../data/mask.png")))
stopwords = set(STOPWORDS)
stopwords.add('https')
stopwords.add('http')
stopwords.add('realdonaldtrump')
stopwords.add('co')
cmap = plt.cm.get_cmap('Dark2')
wc = WordCloud(background_color="white", max_words=250, mask=mask,
stopwords=stopwords, colormap=cmap)
# generate word cloud
wc.generate(text)
# store to file
wc.to_file(path.join(d, "wordcloud.png"))
# show
plt.imshow(wc, interpolation='bilinear')
plt.axis("off")
plt.figure()
plt.imshow(mask, cmap=cmap, interpolation='bilinear')
plt.axis("off")
plt.show()
if __name__ == '__main__':
wordcloud()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment