Created
November 5, 2015 22:47
-
-
Save jarv/3818c5de8c05b74b41a7 to your computer and use it in GitHub Desktop.
Generate wordcloud of TPP
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from os import path | |
from scipy.misc import imread | |
import matplotlib.pyplot as plt | |
import random | |
import os | |
from wordcloud import WordCloud, STOPWORDS | |
text = "" | |
for f in os.listdir("."): | |
if f.endswith(".txt"): | |
text += open(f).read() | |
stopwords = STOPWORDS.copy() | |
stopwords.add("shall") | |
stopwords.add("non") | |
stopwords.add("article") | |
stopwords.add("subject") | |
stopwords.add("parties") | |
stopwords.add("party") | |
stopwords.add("may") | |
stopwords.add("provide") | |
stopwords.add("version") | |
stopwords.add("paragraph") | |
for char in "abcdefghijklmnopqrstuvwxyz": | |
stopwords.add(char) | |
wc = WordCloud(max_words=1000, width=2048, height=1024, stopwords=stopwords, margin=10, | |
random_state=1).generate(text) | |
default_colors = wc.to_array() | |
plt.imshow(default_colors) | |
wc.to_file("out.png") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment