Skip to content

Instantly share code, notes, and snippets.

@rlvaugh
Last active March 10, 2024 15:38
Show Gist options
  • Save rlvaugh/39707f7d67d684dc42262a0985c187d5 to your computer and use it in GitHub Desktop.
Save rlvaugh/39707f7d67d684dc42262a0985c187d5 to your computer and use it in GitHub Desktop.
A word cloud trivia game.
"""Create a wordcloud quiz game from Wikipedia film pages."""
import matplotlib.pyplot as plt
from matplotlib import patches
import requests
from bs4 import BeautifulSoup
from wordcloud import WordCloud, STOPWORDS
# Create dictionary of movie Wikipedia pages:
urls = {'avengers infinity war': 'https://w.wiki/3hxu',
'avengers end game': 'https://w.wiki/3hHY',
'deathly hallows 1': 'https://w.wiki/9PuP',
'deathly hallows 2': 'https://w.wiki/8u8Y'}
# Capture stopwords for later removal:
stopwords = set(STOPWORDS)
# stopwords.update(['us', 'one'] # Add additional stopwords if needed.
def extract_plot_text(url):
"""Extract text from 'Plot' section of Wikipedia film page."""
response = requests.get(url, timeout=10) # 10 second timeout.
soup = BeautifulSoup(response.content, 'html.parser')
plot_header = soup.find('span', {'id': 'Plot'})
if plot_header:
plot_text = ''
next_element = plot_header.find_next()
while next_element and next_element.name != "h2":
if next_element.name == "p":
plot_text += next_element.get_text() + "\n"
next_element = next_element.find_next()
return plot_text.strip()
def make_wordcloud(text):
"""Return a word cloud object for a corpus."""
return WordCloud(max_words=50,
width=800,
height=500,
relative_scaling=0.2,
mask=None,
background_color='white',
stopwords=stopwords,
margin=5,
random_state=1).generate(text)
def add_outline_to_figure(fig):
"""Add a black outline to the given figure."""
rect = patches.Rectangle((0, 0), 1, 1,
transform=fig.transFigure,
fill=False,
color='black',
linewidth=2,
zorder=1000)
fig.add_artist(rect)
def make_quiz(url_dict):
"""Generate final figures and return answer key."""
answers = []
for i, (key, value) in enumerate(url_dict.items()):
answers.append((i + 1, key))
plot = extract_plot_text(value)
wc = make_wordcloud(plot)
# Convert cloud into NumPy array to use with matplotlib:
colors = wc.to_array()
# Make the word cloud figure:
fig = plt.figure()
plt.title(f'Quiz #{i + 1}')
plt.imshow(colors, interpolation="bilinear")
plt.axis("off")
plt.tight_layout()
# Add outline with dimensions of the figure:
add_outline_to_figure(fig)
# Save and show figure:
fig.savefig(f'{key}.png', dpi=600)
plt.show()
return answers
# Generate the figures and answer key:
answer_key = make_quiz(urls)
# Save the answers as a text file:
with open('answer_key.txt', 'w') as f:
for item in answer_key:
print(f"Quiz {item[0]}: {item[1]}", file=f)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment