Skip to content

Instantly share code, notes, and snippets.

@beta-decay
Created May 10, 2018 16:35
Show Gist options
  • Save beta-decay/04a2c76f3b84fe5bc0f245c8cecbc429 to your computer and use it in GitHub Desktop.
Save beta-decay/04a2c76f3b84fe5bc0f245c8cecbc429 to your computer and use it in GitHub Desktop.
from os import path
from scipy.misc import imread
import matplotlib.pyplot as plt
import random
import re
from wordcloud import WordCloud, STOPWORDS
import numpy as np
from PIL import Image
with open("combined.txt","r") as f:
text = f.read()
text = text.lower()
word_count = {}
text_nourl = re.sub(r'(https?:\/\/)?([\da-z\.-]+)\.([a-z\.]{2,6})([\/\w\.-]*)*\/?\S', '', text)
stopwords = set(STOPWORDS)
stopwords.add("colleagues")
stopwords.add("must")
stopwords.add("need")
stopwords.add("will")
mask = np.array(Image.open("russia_mask.png"))
wordcloud = WordCloud(background_color="white", mask=mask, max_words=2000, width=3000, height=2000, stopwords=stopwords).generate(text_nourl)
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.savefig('putin_word_cloud.png', dpi = 1000)
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment