rlvaugh/wordcloud_trivia_game.py

## wordcloud_trivia_game.py
"""Create a wordcloud quiz game from Wikipedia film pages."""

import matplotlib.pyplot as plt
from matplotlib import patches
import requests
from bs4 import BeautifulSoup
from wordcloud import WordCloud, STOPWORDS

# Create dictionary of movie Wikipedia pages:
urls = {'avengers infinity war': 'https://w.wiki/3hxu',
        'avengers end game': 'https://w.wiki/3hHY',
        'deathly hallows 1': 'https://w.wiki/9PuP',
        'deathly hallows 2': 'https://w.wiki/8u8Y'}

# Capture stopwords for later removal:
stopwords = set(STOPWORDS)
# stopwords.update(['us', 'one']  # Add additional stopwords if needed.

def extract_plot_text(url):
    """Extract text from 'Plot' section of Wikipedia film page."""
    response = requests.get(url, timeout=10)  # 10 second timeout.
    soup = BeautifulSoup(response.content, 'html.parser')
    plot_header = soup.find('span', {'id': 'Plot'})

    if plot_header:
        plot_text = ''
        next_element = plot_header.find_next()

        while next_element and next_element.name != "h2":
            if next_element.name == "p":
                plot_text += next_element.get_text() + "\n"
            next_element = next_element.find_next()

        return plot_text.strip()

def make_wordcloud(text):
    """Return a word cloud object for a corpus."""
    return WordCloud(max_words=50,
                     width=800,
                     height=500,
                     relative_scaling=0.2,
                     mask=None,
                     background_color='white',
                     stopwords=stopwords,
                     margin=5,
                     random_state=1).generate(text)

def add_outline_to_figure(fig):
    """Add a black outline to the given figure."""
    rect = patches.Rectangle((0, 0), 1, 1,
                             transform=fig.transFigure,
                             fill=False,
                             color='black',
                             linewidth=2,
                             zorder=1000)
    fig.add_artist(rect)

def make_quiz(url_dict):
    """Generate final figures and return answer key."""
    answers = []

    for i, (key, value) in enumerate(url_dict.items()):
        answers.append((i + 1, key))
        plot = extract_plot_text(value)
        wc = make_wordcloud(plot)

        # Convert cloud into NumPy array to use with matplotlib:
        colors = wc.to_array()

        # Make the word cloud figure:
        fig = plt.figure()
        plt.title(f'Quiz #{i + 1}')
        plt.imshow(colors, interpolation="bilinear")
        plt.axis("off")
        plt.tight_layout()

        # Add outline with dimensions of the figure:
        add_outline_to_figure(fig)

        # Save and show figure:
        fig.savefig(f'{key}.png', dpi=600)
        plt.show()

    return answers

# Generate the figures and answer key:
answer_key = make_quiz(urls)

# Save the answers as a text file:
with open('answer_key.txt', 'w') as f:
    for item in answer_key:
        print(f"Quiz {item[0]}: {item[1]}", file=f)
	"""Create a wordcloud quiz game from Wikipedia film pages."""

	import matplotlib.pyplot as plt
	from matplotlib import patches
	import requests
	from bs4 import BeautifulSoup
	from wordcloud import WordCloud, STOPWORDS

	# Create dictionary of movie Wikipedia pages:
	urls = {'avengers infinity war': 'https://w.wiki/3hxu',
	'avengers end game': 'https://w.wiki/3hHY',
	'deathly hallows 1': 'https://w.wiki/9PuP',
	'deathly hallows 2': 'https://w.wiki/8u8Y'}

	# Capture stopwords for later removal:
	stopwords = set(STOPWORDS)
	# stopwords.update(['us', 'one'] # Add additional stopwords if needed.

	def extract_plot_text(url):
	"""Extract text from 'Plot' section of Wikipedia film page."""
	response = requests.get(url, timeout=10) # 10 second timeout.
	soup = BeautifulSoup(response.content, 'html.parser')
	plot_header = soup.find('span', {'id': 'Plot'})

	if plot_header:
	plot_text = ''
	next_element = plot_header.find_next()

	while next_element and next_element.name != "h2":
	if next_element.name == "p":
	plot_text += next_element.get_text() + "\n"
	next_element = next_element.find_next()

	return plot_text.strip()

	def make_wordcloud(text):
	"""Return a word cloud object for a corpus."""
	return WordCloud(max_words=50,
	width=800,
	height=500,
	relative_scaling=0.2,
	mask=None,
	background_color='white',
	stopwords=stopwords,
	margin=5,
	random_state=1).generate(text)

	def add_outline_to_figure(fig):
	"""Add a black outline to the given figure."""
	rect = patches.Rectangle((0, 0), 1, 1,
	transform=fig.transFigure,
	fill=False,
	color='black',
	linewidth=2,
	zorder=1000)
	fig.add_artist(rect)

	def make_quiz(url_dict):
	"""Generate final figures and return answer key."""
	answers = []

	for i, (key, value) in enumerate(url_dict.items()):
	answers.append((i + 1, key))
	plot = extract_plot_text(value)
	wc = make_wordcloud(plot)

	# Convert cloud into NumPy array to use with matplotlib:
	colors = wc.to_array()

	# Make the word cloud figure:
	fig = plt.figure()
	plt.title(f'Quiz #{i + 1}')
	plt.imshow(colors, interpolation="bilinear")
	plt.axis("off")
	plt.tight_layout()

	# Add outline with dimensions of the figure:
	add_outline_to_figure(fig)

	# Save and show figure:
	fig.savefig(f'{key}.png', dpi=600)
	plt.show()

	return answers

	# Generate the figures and answer key:
	answer_key = make_quiz(urls)

	# Save the answers as a text file:
	with open('answer_key.txt', 'w') as f:
	for item in answer_key:
	print(f"Quiz {item[0]}: {item[1]}", file=f)