Created
August 4, 2018 13:41
-
-
Save jamiebrynes7/d3bd75e9d5b6cc55f9b87b14018f1926 to your computer and use it in GitHub Desktop.
Mitchell and Webb Script Markov Generator
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import random | |
import markovify | |
import requests | |
from bs4 import BeautifulSoup | |
MAX_SEASON = 4 | |
EPISODES_PER_SEASON = 6 | |
SCRIPT_LENGTH = 100 | |
BASE_URL = "https://www.springfieldspringfield.co.uk/view_episode_scripts.php?tv-show=that-mitchell-and-webb-look-2006&episode=s0{0}e0{1}" | |
CACHE_FILE = ".data" | |
full_data = "" | |
if os.path.isfile(CACHE_FILE): | |
with open(CACHE_FILE, "r") as cached_data: | |
full_data = cached_data.read() | |
else: | |
data = [] | |
for season in range (1, MAX_SEASON + 1): | |
for episode in range(1, EPISODES_PER_SEASON + 1): | |
url = BASE_URL.format(season, episode) | |
r = requests.get(url) | |
html = r.text | |
soup = BeautifulSoup(html, "html.parser") | |
data_div = soup.find_all("div", class_="scrolling-script-container")[0] | |
data.append(data_div.getText().replace(" -",".").replace(" #", ".")) | |
full_data = ".".join(data) | |
with open(CACHE_FILE, "w") as cached_data: | |
cached_data.write(full_data) | |
PEOPLE = [[0.25, "David Mitchell"], [0.25, "Robert Webb"], [0.15, "James Bachman"], [0.2, "Olivia Colman"], [0.15, "Paterson Joseph"]] | |
def select_person(): | |
r = random.random() | |
s = 0 | |
for person in PEOPLE: | |
s += person[0] | |
if s > r: | |
return person[1] | |
return person[0] | |
text_model = markovify.Text(full_data, state_size=2) | |
for i in range(SCRIPT_LENGTH): | |
person = select_person() | |
sentence = text_model.make_sentence(tries=100) | |
print("{0}: {1}".format(person, sentence)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment