Word cloud & word count
#!/usr/bin/env python | |
from collections import OrderedDict | |
from os import path | |
from PIL import Image | |
import argparse | |
import matplotlib.pyplot as plt | |
import numpy as np | |
import os | |
from wordcloud import WordCloud, STOPWORDS | |
# I'm not making this official, these are for my own testing | |
parser = argparse.ArgumentParser(description='Word cloud and bar graph') | |
parser.add_argument('--cloud', action='store_true', | |
default=False, | |
dest='cloud', | |
help='Generate and save word cloud') | |
parser.add_argument('--plot', action='store_true', | |
default=False, | |
dest='plot', | |
help='Generate and save the word plot') | |
parser.add_argument('--topn', action='store', type=int, | |
default=20, | |
dest='topn', | |
help='Include the top N words in your plot or cloud') | |
args = parser.parse_args() | |
# get data directory (using getcwd() is needed to support running example in generated IPython notebook) | |
d = path.dirname(__file__) if "__file__" in locals() else os.getcwd() | |
# Read the whole text. | |
text = open(path.join(d, 'words.txt')).read() | |
text = text.lower() | |
stopwords = set(STOPWORDS) | |
stopwords = stopwords.union(("references", "summary", "questions", "notes", "introduction", "illustration", "bibliographical", | |
"remarks", "history", "practical", "considerations", "overview", "conclusions", "definitions", | |
"concepts", "related areas", "open problems", "background", "applications", "terminology", | |
"bibliography", "using")) | |
bender_mask = np.array(Image.open(path.join(d, "bender-mask-4k.png"))) | |
wc = WordCloud(background_color="black", max_words=2000, mask=bender_mask, | |
stopwords=stopwords, contour_width=3, contour_color="grey") | |
#wc = WordCloud(background_color="black", max_words=2000, #mask=bender_mask, | |
# height=2000, width=1000, | |
# stopwords=stopwords, contour_width=3, contour_color="grey") | |
if args.cloud: | |
# | |
# Create word cloud | |
# | |
# generate word cloud | |
wc.generate(text) | |
# save to file | |
wc.to_file(path.join(d, "bendercloud.png")) | |
elif args.plot: | |
# | |
# Create plot | |
# | |
#wc = WordCloud() | |
topwords = wc.process_text(text) | |
sorted_words = sorted(topwords.items(), key=lambda x: x[1]) | |
sorted_words.reverse() | |
if len(sorted_words) >= args.topn: | |
sorted_words = sorted_words[:args.topn] | |
od = OrderedDict(sorted_words) | |
plt.rcdefaults() | |
fig, ax = plt.subplots() | |
ypos = np.arange(len(od.keys())) | |
vals = list(od.values()) | |
ax.barh(ypos, vals, align='center', height=0.5) | |
ax.set_ylim(bottom=-1, top=25) # Removes excess vertical space. Orig: (-5, 25) | |
ax.set_yticks(ypos) | |
ax.set_yticklabels(list(od.keys())) | |
ax.invert_yaxis() | |
ax.set_title("Robotics ToCs: Individual Word Counts") | |
fig.tight_layout() | |
fig.subplots_adjust(top=0.5) # Compresses the whole plot | |
plt.show() | |
#fig.savefig('plot.png', dpi=fig.dpi) | |
else: | |
print('No options specified') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment