Skip to content

Instantly share code, notes, and snippets.

@jarv
Created November 5, 2015 22:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jarv/3818c5de8c05b74b41a7 to your computer and use it in GitHub Desktop.
Save jarv/3818c5de8c05b74b41a7 to your computer and use it in GitHub Desktop.
Generate wordcloud of TPP
from os import path
from scipy.misc import imread
import matplotlib.pyplot as plt
import random
import os
from wordcloud import WordCloud, STOPWORDS
text = ""
for f in os.listdir("."):
if f.endswith(".txt"):
text += open(f).read()
stopwords = STOPWORDS.copy()
stopwords.add("shall")
stopwords.add("non")
stopwords.add("article")
stopwords.add("subject")
stopwords.add("parties")
stopwords.add("party")
stopwords.add("may")
stopwords.add("provide")
stopwords.add("version")
stopwords.add("paragraph")
for char in "abcdefghijklmnopqrstuvwxyz":
stopwords.add(char)
wc = WordCloud(max_words=1000, width=2048, height=1024, stopwords=stopwords, margin=10,
random_state=1).generate(text)
default_colors = wc.to_array()
plt.imshow(default_colors)
wc.to_file("out.png")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment