Last active
March 10, 2024 15:38
-
-
Save rlvaugh/39707f7d67d684dc42262a0985c187d5 to your computer and use it in GitHub Desktop.
A word cloud trivia game.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Create a wordcloud quiz game from Wikipedia film pages.""" | |
import matplotlib.pyplot as plt | |
from matplotlib import patches | |
import requests | |
from bs4 import BeautifulSoup | |
from wordcloud import WordCloud, STOPWORDS | |
# Create dictionary of movie Wikipedia pages: | |
urls = {'avengers infinity war': 'https://w.wiki/3hxu', | |
'avengers end game': 'https://w.wiki/3hHY', | |
'deathly hallows 1': 'https://w.wiki/9PuP', | |
'deathly hallows 2': 'https://w.wiki/8u8Y'} | |
# Capture stopwords for later removal: | |
stopwords = set(STOPWORDS) | |
# stopwords.update(['us', 'one'] # Add additional stopwords if needed. | |
def extract_plot_text(url): | |
"""Extract text from 'Plot' section of Wikipedia film page.""" | |
response = requests.get(url, timeout=10) # 10 second timeout. | |
soup = BeautifulSoup(response.content, 'html.parser') | |
plot_header = soup.find('span', {'id': 'Plot'}) | |
if plot_header: | |
plot_text = '' | |
next_element = plot_header.find_next() | |
while next_element and next_element.name != "h2": | |
if next_element.name == "p": | |
plot_text += next_element.get_text() + "\n" | |
next_element = next_element.find_next() | |
return plot_text.strip() | |
def make_wordcloud(text): | |
"""Return a word cloud object for a corpus.""" | |
return WordCloud(max_words=50, | |
width=800, | |
height=500, | |
relative_scaling=0.2, | |
mask=None, | |
background_color='white', | |
stopwords=stopwords, | |
margin=5, | |
random_state=1).generate(text) | |
def add_outline_to_figure(fig): | |
"""Add a black outline to the given figure.""" | |
rect = patches.Rectangle((0, 0), 1, 1, | |
transform=fig.transFigure, | |
fill=False, | |
color='black', | |
linewidth=2, | |
zorder=1000) | |
fig.add_artist(rect) | |
def make_quiz(url_dict): | |
"""Generate final figures and return answer key.""" | |
answers = [] | |
for i, (key, value) in enumerate(url_dict.items()): | |
answers.append((i + 1, key)) | |
plot = extract_plot_text(value) | |
wc = make_wordcloud(plot) | |
# Convert cloud into NumPy array to use with matplotlib: | |
colors = wc.to_array() | |
# Make the word cloud figure: | |
fig = plt.figure() | |
plt.title(f'Quiz #{i + 1}') | |
plt.imshow(colors, interpolation="bilinear") | |
plt.axis("off") | |
plt.tight_layout() | |
# Add outline with dimensions of the figure: | |
add_outline_to_figure(fig) | |
# Save and show figure: | |
fig.savefig(f'{key}.png', dpi=600) | |
plt.show() | |
return answers | |
# Generate the figures and answer key: | |
answer_key = make_quiz(urls) | |
# Save the answers as a text file: | |
with open('answer_key.txt', 'w') as f: | |
for item in answer_key: | |
print(f"Quiz {item[0]}: {item[1]}", file=f) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment