Skip to content

Instantly share code, notes, and snippets.

import requests
import bs4
from film import Film
horror_films = []
for year in range(1960, 2023):
content = requests.get(
f"https://en.wikipedia.org/wiki/List_of_horror_films_of_{year}"
)
# Create dictionary of states for scraping Wikipedia.
with open("data/us_states.csv", 'r') as f:
reader = csv.reader(f)
us_states = [row[0] for row in reader]
cleaned_us_states = {}
for state in us_states:
if state == "Georgia":
new_state = "Georgia_(U.S._state)"
cleaned_us_states[state] = new_state
class Film:
def __init__(self, title, year_released=None, setting=None):
if "(" in title:
if "(film)" not in title:
title_characters = title.split()
year_released = title_characters[-2:]
year_released = year_released[0]
year_released = year_released.replace("(", "")
year_released = year_released.replace(")", "")
self.year_released = year_released
class Film:
...
def __eq__(self, other):
return self.title == other.title
from string import ascii_uppercase
# Scrape Wikipedia for which films are set in which US states.
states_films = []
for state, URL_parameter in cleaned_us_states.items():
for letter in ascii_uppercase:
content = requests.get(
f"https://en.wikipedia.org/w/index.php?title=Category:Films_set_in_{URL_parameter}&from={letter}"
)
soup = bs4.BeautifulSoup(content.text, "html.parser")
def format_quote(quote):
"""Format quote into a more readable format."""
quote = quote.strip()
quotation_marks = ['"', "“", "”"]
character_mapping = [
index for index, character in enumerate(quote) if character in quotation_marks
]
# Remove author info from quote.
last_quotation_mark = (
character_mapping[-1] + 1 # Plus 1 to ensure slice of quote doesn't miss last quotation mark.
def format_quote(quote):
"""Format quote into a more readable format."""
quote = quote.strip()
quotation_marks = ['"', "“", "”"]
# Find quotation mark indexes.
character_mapping = [
index for index, character in enumerate(quote) if character in quotation_marks
]
# Remove author info from quote.
last_quotation_mark = (
def format_quote(quote):
"""Format quote into a more readable format."""
quote = quote.strip()
quotation_marks = ['"', "“", "”"]
# Find quotation mark indexes.
character_mapping = [
index for index, character in enumerate(quote) if character in quotation_marks
]
# Remove author info from quote.
last_quotation_mark = (
def format_quote(quote):
"""Format quote into a more readable format."""
quote = quote.strip()
quotation_marks = ['"', "“", "”"]
# Find quotation mark indexes.
character_mapping = [
index for index, character in enumerate(quote) if character in quotation_marks
]
# Remove author info from quote.
last_quotation_mark = (
colours = ['Blue', 'Red', 'Yellow', 'Green', 'Purple', 'Orange']
colours_new_list = colours[2:4]