Skip to content

Instantly share code, notes, and snippets.

@icaoberg
Created December 29, 2016 04:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save icaoberg/fefe302e145074011239f536693ceba0 to your computer and use it in GitHub Desktop.
Save icaoberg/fefe302e145074011239f536693ceba0 to your computer and use it in GitHub Desktop.
Word cloud from the text of Little Women
from os import path
import os
from scipy.misc import imread
import matplotlib.pyplot as plt
import random
import urllib
from wordcloud import WordCloud, STOPWORDS
#change this to your taste
dpi = 1000
#i used wc to count the number of words in little_women.txt
number_of_words = 188986
def grey_color_func(word, font_size, position, orientation, random_state=None, **kwargs):
return "hsl(0, 0%%, %d%%)" % random.randint(60, 100)
d = os.getcwd()
#you can find the original stencil in
#https://openclipart.org/detail/1196/kamma-rahbek-silhouette
filename = 'stencil.png'
mask = imread(path.join(d, filename))
# little women by louisa may alcott
filename = 'little_women.txt'
gutenberg_url = 'http://www.gutenberg.org/cache/epub/514/pg514.txt'
if not path.isfile( filename ):
urllib.urlretrieve(gutenberg_url, filename)
text = open(filename).read()
# adding movie script specific stopwords
stopwords = STOPWORDS.copy()
wordcloud = WordCloud(max_words=number_of_words, mask=mask, stopwords=stopwords, margin=0,
random_state=1).generate(text)
# store default colored image
image = wordcloud.to_image()
filename='little_women.png'
image.save( filename, "PNG" )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment