This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import bs4 | |
from film import Film | |
horror_films = [] | |
for year in range(1960, 2023): | |
content = requests.get( | |
f"https://en.wikipedia.org/wiki/List_of_horror_films_of_{year}" | |
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Create dictionary of states for scraping Wikipedia. | |
with open("data/us_states.csv", 'r') as f: | |
reader = csv.reader(f) | |
us_states = [row[0] for row in reader] | |
cleaned_us_states = {} | |
for state in us_states: | |
if state == "Georgia": | |
new_state = "Georgia_(U.S._state)" | |
cleaned_us_states[state] = new_state |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Film: | |
def __init__(self, title, year_released=None, setting=None): | |
if "(" in title: | |
if "(film)" not in title: | |
title_characters = title.split() | |
year_released = title_characters[-2:] | |
year_released = year_released[0] | |
year_released = year_released.replace("(", "") | |
year_released = year_released.replace(")", "") | |
self.year_released = year_released |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Film: | |
... | |
def __eq__(self, other): | |
return self.title == other.title |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from string import ascii_uppercase | |
# Scrape Wikipedia for which films are set in which US states. | |
states_films = [] | |
for state, URL_parameter in cleaned_us_states.items(): | |
for letter in ascii_uppercase: | |
content = requests.get( | |
f"https://en.wikipedia.org/w/index.php?title=Category:Films_set_in_{URL_parameter}&from={letter}" | |
) | |
soup = bs4.BeautifulSoup(content.text, "html.parser") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def format_quote(quote): | |
"""Format quote into a more readable format.""" | |
quote = quote.strip() | |
quotation_marks = ['"', "“", "”"] | |
character_mapping = [ | |
index for index, character in enumerate(quote) if character in quotation_marks | |
] | |
# Remove author info from quote. | |
last_quotation_mark = ( | |
character_mapping[-1] + 1 # Plus 1 to ensure slice of quote doesn't miss last quotation mark. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def format_quote(quote): | |
"""Format quote into a more readable format.""" | |
quote = quote.strip() | |
quotation_marks = ['"', "“", "”"] | |
# Find quotation mark indexes. | |
character_mapping = [ | |
index for index, character in enumerate(quote) if character in quotation_marks | |
] | |
# Remove author info from quote. | |
last_quotation_mark = ( |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def format_quote(quote): | |
"""Format quote into a more readable format.""" | |
quote = quote.strip() | |
quotation_marks = ['"', "“", "”"] | |
# Find quotation mark indexes. | |
character_mapping = [ | |
index for index, character in enumerate(quote) if character in quotation_marks | |
] | |
# Remove author info from quote. | |
last_quotation_mark = ( |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def format_quote(quote): | |
"""Format quote into a more readable format.""" | |
quote = quote.strip() | |
quotation_marks = ['"', "“", "”"] | |
# Find quotation mark indexes. | |
character_mapping = [ | |
index for index, character in enumerate(quote) if character in quotation_marks | |
] | |
# Remove author info from quote. | |
last_quotation_mark = ( |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
colours = ['Blue', 'Red', 'Yellow', 'Green', 'Purple', 'Orange'] | |
colours_new_list = colours[2:4] |