Created
May 6, 2019 02:58
-
-
Save Seanmatthews/9187fd5ffc568138114dfdcaa5d4ba3a to your computer and use it in GitHub Desktop.
Word cloud & word count
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
from collections import OrderedDict | |
from os import path | |
from PIL import Image | |
import argparse | |
import matplotlib.pyplot as plt | |
import numpy as np | |
import os | |
from wordcloud import WordCloud, STOPWORDS | |
# I'm not making this official, these are for my own testing | |
parser = argparse.ArgumentParser(description='Word cloud and bar graph') | |
parser.add_argument('--cloud', action='store_true', | |
default=False, | |
dest='cloud', | |
help='Generate and save word cloud') | |
parser.add_argument('--plot', action='store_true', | |
default=False, | |
dest='plot', | |
help='Generate and save the word plot') | |
parser.add_argument('--topn', action='store', type=int, | |
default=20, | |
dest='topn', | |
help='Include the top N words in your plot or cloud') | |
args = parser.parse_args() | |
# get data directory (using getcwd() is needed to support running example in generated IPython notebook) | |
d = path.dirname(__file__) if "__file__" in locals() else os.getcwd() | |
# Read the whole text. | |
text = open(path.join(d, 'words.txt')).read() | |
text = text.lower() | |
stopwords = set(STOPWORDS) | |
stopwords = stopwords.union(("references", "summary", "questions", "notes", "introduction", "illustration", "bibliographical", | |
"remarks", "history", "practical", "considerations", "overview", "conclusions", "definitions", | |
"concepts", "related areas", "open problems", "background", "applications", "terminology", | |
"bibliography", "using")) | |
bender_mask = np.array(Image.open(path.join(d, "bender-mask-4k.png"))) | |
wc = WordCloud(background_color="black", max_words=2000, mask=bender_mask, | |
stopwords=stopwords, contour_width=3, contour_color="grey") | |
#wc = WordCloud(background_color="black", max_words=2000, #mask=bender_mask, | |
# height=2000, width=1000, | |
# stopwords=stopwords, contour_width=3, contour_color="grey") | |
if args.cloud: | |
# | |
# Create word cloud | |
# | |
# generate word cloud | |
wc.generate(text) | |
# save to file | |
wc.to_file(path.join(d, "bendercloud.png")) | |
elif args.plot: | |
# | |
# Create plot | |
# | |
#wc = WordCloud() | |
topwords = wc.process_text(text) | |
sorted_words = sorted(topwords.items(), key=lambda x: x[1]) | |
sorted_words.reverse() | |
if len(sorted_words) >= args.topn: | |
sorted_words = sorted_words[:args.topn] | |
od = OrderedDict(sorted_words) | |
plt.rcdefaults() | |
fig, ax = plt.subplots() | |
ypos = np.arange(len(od.keys())) | |
vals = list(od.values()) | |
ax.barh(ypos, vals, align='center', height=0.5) | |
ax.set_ylim(bottom=-1, top=25) # Removes excess vertical space. Orig: (-5, 25) | |
ax.set_yticks(ypos) | |
ax.set_yticklabels(list(od.keys())) | |
ax.invert_yaxis() | |
ax.set_title("Robotics ToCs: Individual Word Counts") | |
fig.tight_layout() | |
fig.subplots_adjust(top=0.5) # Compresses the whole plot | |
plt.show() | |
#fig.savefig('plot.png', dpi=fig.dpi) | |
else: | |
print('No options specified') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment