Seanmatthews/robowordcloud.py

## robowordcloud.py
#!/usr/bin/env python

from collections import OrderedDict
from os import path
from PIL import Image
import argparse
import matplotlib.pyplot as plt
import numpy as np
import os

from wordcloud import WordCloud, STOPWORDS

# I'm not making this official, these are for my own testing
parser = argparse.ArgumentParser(description='Word cloud and bar graph')
parser.add_argument('--cloud', action='store_true',
                    default=False,
                    dest='cloud',
                    help='Generate and save word cloud')
parser.add_argument('--plot', action='store_true',
                    default=False,
                    dest='plot',
                    help='Generate and save the word plot')
parser.add_argument('--topn', action='store', type=int,
                    default=20,
                    dest='topn',
                    help='Include the top N words in your plot or cloud')

args = parser.parse_args()

# get data directory (using getcwd() is needed to support running example in generated IPython notebook)
d = path.dirname(__file__) if "__file__" in locals() else os.getcwd()

# Read the whole text.
text = open(path.join(d, 'words.txt')).read()
text = text.lower()

stopwords = set(STOPWORDS)
stopwords = stopwords.union(("references", "summary", "questions", "notes", "introduction", "illustration", "bibliographical",
                             "remarks", "history", "practical", "considerations", "overview", "conclusions", "definitions",
                             "concepts", "related areas", "open problems", "background", "applications", "terminology",
                             "bibliography", "using"))

bender_mask = np.array(Image.open(path.join(d, "bender-mask-4k.png")))

wc = WordCloud(background_color="black", max_words=2000, mask=bender_mask,
               stopwords=stopwords, contour_width=3, contour_color="grey")
#wc = WordCloud(background_color="black", max_words=2000, #mask=bender_mask,
#               height=2000, width=1000,
#               stopwords=stopwords, contour_width=3, contour_color="grey")

if args.cloud:
    #
    # Create word cloud
    #

    # generate word cloud
    wc.generate(text)

    # save to file
    wc.to_file(path.join(d, "bendercloud.png"))

elif args.plot:
    #
    # Create plot
    #
    #wc = WordCloud()
    topwords = wc.process_text(text)
    sorted_words = sorted(topwords.items(), key=lambda x: x[1])
    sorted_words.reverse()
    if len(sorted_words) >= args.topn:
        sorted_words = sorted_words[:args.topn]
    od = OrderedDict(sorted_words)

    plt.rcdefaults()
    fig, ax = plt.subplots()
    ypos = np.arange(len(od.keys()))
    vals = list(od.values())
    ax.barh(ypos, vals, align='center', height=0.5)
    ax.set_ylim(bottom=-1, top=25) # Removes excess vertical space. Orig: (-5, 25)
    ax.set_yticks(ypos)
    ax.set_yticklabels(list(od.keys()))
    ax.invert_yaxis()
    ax.set_title("Robotics ToCs: Individual Word Counts")
    fig.tight_layout()
    fig.subplots_adjust(top=0.5) # Compresses the whole plot

    plt.show()
    #fig.savefig('plot.png', dpi=fig.dpi)

else:
    print('No options specified')
	#!/usr/bin/env python

	from collections import OrderedDict
	from os import path
	from PIL import Image
	import argparse
	import matplotlib.pyplot as plt
	import numpy as np
	import os

	from wordcloud import WordCloud, STOPWORDS

	# I'm not making this official, these are for my own testing
	parser = argparse.ArgumentParser(description='Word cloud and bar graph')
	parser.add_argument('--cloud', action='store_true',
	default=False,
	dest='cloud',
	help='Generate and save word cloud')
	parser.add_argument('--plot', action='store_true',
	default=False,
	dest='plot',
	help='Generate and save the word plot')
	parser.add_argument('--topn', action='store', type=int,
	default=20,
	dest='topn',
	help='Include the top N words in your plot or cloud')

	args = parser.parse_args()

	# get data directory (using getcwd() is needed to support running example in generated IPython notebook)
	d = path.dirname(__file__) if "__file__" in locals() else os.getcwd()

	# Read the whole text.
	text = open(path.join(d, 'words.txt')).read()
	text = text.lower()

	stopwords = set(STOPWORDS)
	stopwords = stopwords.union(("references", "summary", "questions", "notes", "introduction", "illustration", "bibliographical",
	"remarks", "history", "practical", "considerations", "overview", "conclusions", "definitions",
	"concepts", "related areas", "open problems", "background", "applications", "terminology",
	"bibliography", "using"))

	bender_mask = np.array(Image.open(path.join(d, "bender-mask-4k.png")))

	wc = WordCloud(background_color="black", max_words=2000, mask=bender_mask,
	stopwords=stopwords, contour_width=3, contour_color="grey")
	#wc = WordCloud(background_color="black", max_words=2000, #mask=bender_mask,
	# height=2000, width=1000,
	# stopwords=stopwords, contour_width=3, contour_color="grey")

	if args.cloud:
	#
	# Create word cloud
	#

	# generate word cloud
	wc.generate(text)

	# save to file
	wc.to_file(path.join(d, "bendercloud.png"))

	elif args.plot:
	#
	# Create plot
	#
	#wc = WordCloud()
	topwords = wc.process_text(text)
	sorted_words = sorted(topwords.items(), key=lambda x: x[1])
	sorted_words.reverse()
	if len(sorted_words) >= args.topn:
	sorted_words = sorted_words[:args.topn]
	od = OrderedDict(sorted_words)

	plt.rcdefaults()
	fig, ax = plt.subplots()
	ypos = np.arange(len(od.keys()))
	vals = list(od.values())
	ax.barh(ypos, vals, align='center', height=0.5)
	ax.set_ylim(bottom=-1, top=25) # Removes excess vertical space. Orig: (-5, 25)
	ax.set_yticks(ypos)
	ax.set_yticklabels(list(od.keys()))
	ax.invert_yaxis()
	ax.set_title("Robotics ToCs: Individual Word Counts")
	fig.tight_layout()
	fig.subplots_adjust(top=0.5) # Compresses the whole plot

	plt.show()
	#fig.savefig('plot.png', dpi=fig.dpi)

	else:
	print('No options specified')