eseiver/vod.py

## vod.py
#!/usr/bin/python3
import sys
from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
try:
    TIMEZONE = ZoneInfo("America/Los_Angeles")
except ZoneInfoNotFoundError:
    print("\nTimezone info not found. Please run `pip install tzdata` and try again.\n", file=sys.stderr)
    sys.exit()

"""
Requires Python 3.9+ (imports zoneinfo). Place this file in your scripts folder.
* Windows users: You will need to run `pip install tzdata` one time to install timezone information.
This script is for criticalrole.miraheze.org when a new video posts to Critical Role's YouTube channel.

To test the script run:
>> python pwb.py vod -simulate -all -ep_id:3x25 -page:User:FCGBot/episode

Example command for cut and pasting in the values:
>> python pwb.py vod -all -ep:3x38 -page:"Campaign 3 Episode 38" -yt:U5mkmw46m4U -new_ep_name:"A Dark Balance" -runtime:4:20:43 -episode_summary:"Bells Hells return to safety..." -actors:"Marisha, Sam" -airdate:2022-10-20 -simulate

A number of maintenance activities can be performed together (-all) or independently:

-update_page      Add runtime, thumbnail image & caption, episode summary, & VOD link to episode page

-move             Move the episode page from a placeholder name to the name specified in the video

-ep_list          Add entry to list of episodes page, as determined from EPISODE_DECODER

-ep_array         In Module:Ep/Array, make new episode title valid input & the display value

-yt_switcher      Add the episode + YouTube ID to Module:Ep/YTURLSwitcher

-airdate_order    Add the episode id & airdate to Module:AirdateOrder/Array

-transcript       Create transcript page (auto-skips TRANSCRIPT_EXCLUSIONS)

-transcript_list  Add /transcript page to list of transcripts (auto-skips TRANSCRIPT_EXCLUSIONS)

-upload           Upload and link to the episode thumbnail; ignored if already exists

-main_page        Check to see if the latest episode image on the main page needs updating

-redirects        Make sure episode code redirect(s) exist and link to newest episode name

-navbox           Make sure the episode code is in the navbox, as determined from EPISODE_DECODER

-4SD              For 4-Sided Dive only, add ep_id to the 3xNN episodes since the previous

Use global -simulate option for test purposes. No changes to live wiki will be done.
For every potential change, you will be shown a diff of the edit and asked to accept or reject it.
No changes will be made automatically. Actions are skipped if change is not needed (e.g., an entry for
the episode already exists on the module page).

All other parameters are passed in the format -parameter:value. Use "quotes" around value if it has
spaces (e.g., -actors:"Marisha, Taliesin, Matt"). "!" needs to be escaped, even in quotes, as "\!".
You will be prompted to enter a missing value if needed. No quotation marks needed in this case.

-ep:              REQUIRED. The CxNN code of the episode with newly uploaded VOD (-ep_id also valid)

-page:            REQUIRED. The page to be edited, usually current episode page

-yt:              The 11-character unique identifier or full URL for the YouTube video (-yt_id also valid)

-airdate:         YYYY-MM-DD of the date episode aired. Can be inferred from episode page if filled in.

-airtime:         Time of day the episode aired. Optional, can be inferred from episode page if filled in.

-runtime:         HH:MM:SS length of the episode video

-actors:          L-R of actors in thumbnail. Separate with ','. First names ok (from ACTORS list)

-episode_summary: The 1-2 line summary of the episode from the YouTube video.

-old_ep_name:     If different from -page:, the current name of the episode (mostly for testing)

-new_ep_name:     Where the episode will be moved to, if it has been renamed

-new_page_name:   Only if page name differs from new_ep_name (usually 'A' vs 'A (episode)')

-summary:         A pywikibot command that adds an edit summary message and shouldn't be needed.

-host:            Actor who is the 4SD host or running one-shot (DM, GM also work here)

-game_system:     For one-shots, game system if not Dungeons & Dragons

Other parameters (most of which are automatically calculated values but still can be passed in)
can be found in `update_options` for EpisodeBot (line 804).

Potential future features:
1) Make sure that the episode has been removed from upcoming events
2) Update the episode on the main page
3) Pull YouTube info automatically using the YouTube API

This script is a
:py:obj:`ConfigParserBot <pywikibot.bot.ConfigParserBot>`. All settings can be
made either by giving option with the command line or with a settings file
which is scripts.ini by default.
"""
# Distributed under the terms of the MIT license.

import re
from collections import Counter
from copy import deepcopy
from datetime import datetime
from itertools import groupby
from string import ascii_lowercase
import mwparserfromhell
import pywikibot
from nltk.util import everygrams
from pywikibot import pagegenerators
from pywikibot.bot import (
    AutomaticTWSummaryBot,
    ConfigParserBot,
    ExistingPageBot,
    SingleSiteBot,
    QuitKeyboardInterrupt,
)
from pywikibot.specialbots import UploadRobot
from youtube_transcript_api import YouTubeTranscriptApi, NoTranscriptFound
from youtube_transcript_api.formatters import TextFormatter

# regular expressions for string matching
EP_REGEX = '^(\d+|OS|M|E\d*|U|4SD|LVM\d+|TM(OS|S)?\d*)x\d+(a|b)?$'  # https://regex101.com/r/QXhVhb/4
ARRAY_ENTRY_REGEX = '''\[\"(?P<epcode>.*?)\"\] = \{\s*\[\"title\"\] = \"(?P<title>.*)\",?((\s*\[\"pagename\"\] = \"(?P<pagename>.*)\",)?(\s*\[\"altTitles\"\] = \{(?P<altTitles>.*)\})?)?'''
YT_LINK_REGEX = '(?P<vod>(?:https?:\/\/)?(?:www\.)?(?:youtu\.be\/|youtube\.com\/(?:embed\/|v\/|watch\?v=|watch\?.+&v=))(?P<yt_id>[-\w_]{11})(&t=(?P<timecode>.*))?)'
YT_ID_REGEX = '[-\w_]{11}'

# pagenames
INFOBOX = 'Infobox Episode'
EP_ARRAY = 'Module:Ep/Array'
AIRDATE_ORDER = 'Module:AirdateOrder/Array'
YT_SWITCHER = 'Module:Ep/YTURLSwitcher/URLs'
TRANSCRIPTS_LIST = 'Transcripts'

# date and time
TIMEZONE = ZoneInfo("America/Los_Angeles")  # where Critical Role is based
DATE_REGEX = '\d{4}-\d{1,2}-\d{1,2}'
DATE_FORMAT = '%Y-%m-%d'
DATE_2_REGEX = '\d{1,2}-\d{1,2}-\d{4}'
DATE_2_FORMAT = '%m-%d-%Y'
TIME_REGEX = '\d{1,2}:\d{2}\s*(?P<tz_entry>\w{2,3})?'
TIME_FORMAT = '%H:%M'
DATETIME_REGEX = '\s*'.join([DATE_REGEX, TIME_REGEX])
DATETIME_FORMAT = ' '.join([DATE_FORMAT, TIME_FORMAT])
date_options = ((DATETIME_REGEX, DATETIME_FORMAT),
                (DATE_REGEX, DATE_FORMAT),
                (DATE_2_REGEX, DATE_2_FORMAT),
                (TIME_REGEX, TIME_FORMAT),
               )
ACTORS = [
    # main cast
    'Ashley Johnson',
    'Laura Bailey',
    "Liam O'Brien",
    'Marisha Ray',
    'Matthew Mercer',
    'Sam Riegel',
    'Taliesin Jaffe',
    'Travis Willingham',

    # guest stars
    'Aabria Iyengar',
    'Brennan Lee Mulligan',
    'Dani Carr',
    'Erika Ishii',
    'Robbie Daymond',
]
SPEAKER_TAGS = [
    'ASHLEY', 'LAURA', 'LIAM', 'MARISHA', 'MATT', 'SAM', 'TALIESIN', 'TRAVIS',
    'ALL', 'AABRIA', 'BRENNAN', 'DANI', 'ERIKA', 'ROBBIE',
]
EPISODE_DECODER = {
    '3': ('Campaign 3', 'List of Campaign 3 episodes',
          'Campaign 3 episode thumbnails', 'Template:Nav-C3Arc1'),
    'OS': ('One-shots', 'One-shots', 'One-shot episode thumbnails', 'Template:Nav-OneShots'),
    'M': ('Bits and bobs', 'Bits and bobs',
          'Bits and bobs episode thumbnails',  'Template:Nav-Bitsnbobs'),
    'LVM2': ('The Legend of Vox Machina',
            'List of The Legend of Vox Machina episodes',
            'The Legend of Vox Machina episode thumbnails',
            'Template:Nav-LoVM Season 2',
            ),
    '4SD': ('4-Sided Dive', '4-Sided Dive', 'Episode thumbnails', 'Template:Nav-4SD'),
    # 'Ep_type': ('show page', 'episode list page', 'episode thumbnail category', 'navbox'),
}

# Episode codes where the transcript will not be added (-transcript is auto-skipped)
TRANSCRIPT_EXCLUSIONS = ['4SD', 'LVM2']

# Episode codes that are currently producing new episodes
CURRENT_PREFIXES = ['3', '4SD', 'OS', 'M', 'LVM2']

# This is required for the text that is shown when you run this script
# with the parameter -help.
docuReplacements = {'&params;': pagegenerators.parameterHelp}  # noqa: N816


def join_array_on_and(str_iter):
    '''Turns list into a string where items are separated by "," except the last,
    which uses "and" if it is at least three items. Pair joined by "and" only.'''
    return_val = ''
    if len(str_iter) <= 1:
        return_val = str_iter[0]
    elif len(str_iter) == 2:
        return_val = ' and '.join(str_iter)
    elif len(str_iter) > 2:
        return_val = ', '.join([*str_iter[:-1], f'and {str_iter[-1]}'])
    return return_val


def get_validated_input(regex, arg, value='', attempts=3, req=True,
                        input_msg=None):
    '''For getting pywikibot user input that is validated against regex. Ignores case'''
    counter = 0
    if arg == 'ep' and input_msg is None:
        input_msg = 'Please enter valid episode id (CxNN)'
    elif arg == 'airdate' and input_msg is None:
        input_msg = 'Please enter valid airdate (YYYY-MM-DD)'
    elif input_msg is None:
        input_msg = f'Please enter valid {arg}'
    while counter < attempts and not re.match(regex, value, flags=re.IGNORECASE):
        value = pywikibot.input(input_msg)
        counter += 1
    if not re.match(regex, value):
        print(f'\nInvalid {arg} "{value}". Maximum attempts reached\n', file=sys.stderr)
        if req:
            sys.exit()
    return value


class Ep:
    '''for handling episode ids'''
    def __init__(self, episode_code, padding_limit=2):
        episode_code = episode_code.strip()
        assert re.match(EP_REGEX, episode_code, flags=re.IGNORECASE)
        self._code = episode_code
        self.code = self.standardize_code(episode_code)
        self.padding_limit = padding_limit
        self.max_letter = 'b'

    def __repr__(self):
        return self.code

    def __eq__(self, other):
        if isinstance(other, Ep):
            return self.code == other.code
        return False

    def __hash__(self):
        return hash(self.code)

    def standardize_code(self, code):
        '''Format standardized with single zero padding'''
        prefix, number = code.split('x')
        if number[-1].isdigit():
            number = int(number)
            standardized_code = 'x'.join([prefix, f"{number:02}"])
        else:
            number_1 = int(number[:-1])
            standardized_code = 'x'.join([prefix, f"{number_1:02}"]) + number[-1]
        return standardized_code

    @property
    def ends_in_letter(self):
        if self.code[-1].isdigit():
            return False
        else:
            return True

    @property
    def prefix(self):
        prefix = self.code.split('x')[0]
        return prefix

    @property
    def number(self):
        number = self.code.split('x')[-1]
        if self.ends_in_letter:
            number = int(number[:-1])
        else:
            number = int(number)
        return number

    @property
    def show(self):
        return EPISODE_DECODER[self.prefix][0]

    @property
    def list_page(self):
        return EPISODE_DECODER[self.prefix][1]

    @property
    def thumbnail_page(self):
        return EPISODE_DECODER[self.prefix][2]

    @property
    def navbox_name(self):
        return EPISODE_DECODER[self.prefix][3]

    @property
    def image_filename(self):
        filename = f"{self.code} Episode Thumb.jpg"
        return filename

    @property
    def wiki_code(self):
        wiki = f'{{{{ep|{self.code}}}}}'
        return wiki

    @property
    def wiki_vod(self):
        vod = f"{{{{Ep/YTURLSwitcher|ep={self.code}}}}}"
        return vod

    def generate_equivalent_codes(self):
        '''Get all equivalent valid episode codes up to the padding limit number of digits'''
        if len([x for x in str(self.number) if x.isdigit()]) >= self.padding_limit:
            code_list = [self.code]
        elif not self.ends_in_letter:
            code_list = ['x'.join([self.prefix, str(self.number).zfill(1+n)]) for n in range(self.padding_limit)]
        else:
            code_list = ['x'.join([self.prefix, str(self.number).zfill(1+n)]) + self.code[-1] for n in range(self.padding_limit)]
        return code_list

    def get_previous_episode(self):
        '''Cannot calculate across seasons (e.g., what was before 3x01). Handles letters (valid letters regex-limited).'''
        old_number = self.number - 1
        letter = ''
        if self.ends_in_letter and not self.code.endswith('a'):
            old_number = self.number
            suffix = self.code[-1]
            look_up = dict(zip(ascii_lowercase, ascii_lowercase[1:]+'a'))
            letter = next(k for k, v in look_up.items() if v == suffix)
        if old_number > 0 and (not self.ends_in_letter or self.code.endswith('a')):
            old_id = 'x'.join([self.prefix, f"{old_number:02}"])
            previous_episode = Ep(old_id)
        elif old_number > 0:
            old_id = 'x'.join([self.prefix, f"{old_number:02}"]) + letter
            previous_episode = Ep(old_id)
        else:
            # no previous id, because the first of its kind
            previous_episode = None
        return previous_episode


class Actors:
    def __init__(self, input_names, **kwargs):
        self._input_names = input_names
        self.link = kwargs.get('link', True)
        self.matched_only = kwargs.get('matched_only', True)
        self.link_unmatched = kwargs.get('link_unmatched', True)
        if len(input_names.strip()):
            self.name_list, self.name_string = self.actor_names_to_wiki_list()
        else:
            self.name_list = []
            self.name_string = ''

    def match_actors(self):
        actors = re.split('[^\w\s]+', self._input_names)
        matched_list = []
        for actor in actors:
            actor = actor.strip()
            #skip joining words
            if actor.lower() in ['and', 'also']:
                continue
            candidates = [x for x in ACTORS if actor.lower() in x.lower()]
            if len(candidates) == 1:
                match = candidates[0]
            elif len(candidates) > 1:
                if candidates:
                    pywikibot.output(f"Please clarify '{actor}': {candidates}")
                else:
                    pywikibot.output(f"No match for '{actor}'")
                continue
            elif self.matched_only:
                pywikibot.output(f"'{actor}' did not match an actor. Check spelling and use actor's full name")
                continue
            else:
                match = actor
            matched_list.append(match)
        return matched_list

    def make_actor_list_string(self, actor_list=None):
        if actor_list is None:
            actor_list = self.match_actors()
#         matched_actors = [x for x in actor_list if x in ACTORS]
        unmatched_actors = [x for x in actor_list if x not in ACTORS]
        actor_list = deepcopy(actor_list)

        for i, actor in enumerate(actor_list):
            if self.link or (self.link_unmatched and actor in unmatched_actors):
                actor_list[i] = f"[[{actor.strip()}]]"
            else:
                actor = actor

        actor_string = join_array_on_and(actor_list)

        return actor_string

    def actor_names_to_wiki_list(self, actor_list=None):
        if actor_list is None:
            actor_list = self.match_actors()
        if actor_list:
            actor_string = self.make_actor_list_string(actor_list=actor_list)
        else:
            actor_string = ''
        return actor_list, actor_string


def make_image_caption(ep: Ep, actors: Actors) -> str:
    '''For the caption field in the episode article.'''
    # 4-Sided Dive has separate caption conventions from other episode types.
    if ep.prefix == '4SD':
        caption = f' {{{{art official caption|nointro=true|subject=Thumbnail|screenshot=1|source={ep.wiki_code}}}}}'
    elif len(actors.name_list):
        caption = f' {ep.wiki_code} thumbnail featuring {actors.name_string}.'
    else:
        caption = f' {ep.wiki_code} thumbnail.'
    return caption


def make_image_file_description(ep: Ep, actors: Actors) -> str:
    """The description of the image thumbnail file to be uploaded."""
    actor_list = actors.name_string if actors.name_string else "the ''Critical Role'' cast"

    file_description = f"""== Summary ==
{ep.wiki_code} thumbnail featuring {actor_list}.

== Licensing ==
{{{{Fairuse}}}}

[[Category:{ep.thumbnail_page}]]"""
    return file_description


class YT:
    def __init__(self, yt_string):
        yt_string = yt_string.strip()
        self._entry = yt_string
        if re.search(YT_LINK_REGEX, yt_string):
            self.yt_id = re.search(YT_LINK_REGEX, yt_string)['yt_id']
        elif re.match(YT_ID_REGEX, yt_string):
            self.yt_id = yt_string
        else:
            self.yt_id = None

    @property
    def url(self):
        url = f"https://youtu.be/{self.yt_id}"
        return url

    @property
    def thumbnail_url(self):
        url = f"https://img.youtube.com/vi/{self.yt_id}/maxresdefault.jpg"
        return url


def convert_timezone(string, tz=TIMEZONE):
    '''Convert timezone abbreviation to tzinfo-formatted string.'''
    if string in ['PST', 'PDT', 'PT']:
        timezone = ZoneInfo('America/Los_Angeles')
    elif string in ['EST', 'EDT', 'ET']:
        timezone = ZoneInfo('America/New_York')
    else:
        timezone = tz
    return timezone


def convert_string_to_datetime(date_string, tz=TIMEZONE):
    '''Make a datetime object of the episode's airdate and/or airtime.'''
    date = None
    for regex, date_format in date_options:
        if re.search(regex, date_string):
            date_match = re.search(regex, date_string)
            date_string = date_match.group()
            if date_match.groupdict().get('tz_entry'):
                date_string = date_string.replace(date_match['tz_entry'], '').strip()
                timezone = convert_timezone(date_match['tz_entry'])
            else:
                timezone = tz
            date = datetime.strptime(date_string, date_format).replace(tzinfo=timezone)
            break
    return date


class Airdate:
    def __init__(self, input_date, tz=TIMEZONE):
        self.tz = tz
        if isinstance(input_date, datetime):
            self.datetime = input_date
        elif isinstance(input_date, str):
            self.datetime = convert_string_to_datetime(input_date, tz=tz)
        if self.datetime.tzinfo is None:
            self.datetime = self.datetime.replace(tzinfo=tz)

    @property
    def date(self):
        date_string = datetime.strftime(self.datetime.astimezone(tz=self.tz), '%Y-%m-%d')
        return date_string

    @property
    def time(self):
        time_string = datetime.strftime(self.datetime.astimezone(tz=self.tz), '%H:%M %Z')
        return time_string

    @property
    def date_and_time(self):
        datetime_string = datetime.strftime(self.datetime.astimezone(tz=self.tz), '%Y-%m-%d %H:%M %Z')
        return datetime_string


def remove_comments(wikicode, return_string=True):
    '''For an item of wikicode, strip out comments. Used to determine if an infobox value
    is truly empty.'''
    raw_value = str(wikicode)
    if wikicode.filter_comments():
        for comment in wikicode.filter_comments():
            value = raw_value.replace(str(comment), '')
    else:
        value = wikicode

    if return_string:
        value = str(value)
    return value

def wikify_html_string(html_string):
    '''Replace italics and bold html with equivalent wiki markup.'''
    # italics
    html_string = re.sub('</?i>', "''", html_string)

    # bold
    html_string = re.sub('</?b>', "'''", html_string)

    html_fixes = {
        '&amp;': '&',
        '&nbsp;': ' ',
        '&quot;': '"',
    }

    # escaped characters
    for html, fixed in html_fixes.items():
        html_string = html_string.replace(html, fixed)

    return html_string


def make_ngrams(text, ngram_min=2, ngram_max=4):
    '''for words in a string'''
    return list(everygrams(text.split(), ngram_min, ngram_max))


def sort_ngram_list(ngram_list):
    keyfunc = lambda x:len(x)
    data = sorted(ngram_list, key=keyfunc)
    ngram_dict = {k:list(g) for k, g in groupby(data,keyfunc)}
    return ngram_dict


def text_to_ngram_dict(text, ngram_min=4, ngram_max=None):
    if ngram_max is None:
        ngram_max = len(text.split())
    ngram_list = make_ngrams(text, ngram_min, ngram_max)
    ngram_dict = sort_ngram_list(ngram_list)
    return ngram_dict


class Transcript:
    def __init__(self, ep, yt, ext='txt', write_ts_file=False, **kwargs):
        self.ep = ep
        self.yt = yt
        self.ext = ext
        self.filename = f"{self.ep.code}.{self.ext}"
        self.write_ts_file = write_ts_file

    def download_and_build_transcript(self):
        self._raw_captions = self.captions_download()
        self.transcript = self.process_transcript(captions=self._raw_captions)

    def captions_download(self):
        captions = ''
        transcript_list = YouTubeTranscriptApi.list_transcripts(self.yt.yt_id)
        transcript = None
        try:
            transcript = transcript_list.find_manually_created_transcript(['en'])
            self.manual = True
        except NoTranscriptFound:
            try:
                transcript = transcript_list.find_generated_transcript(['en'])
                self.manual = False
            except NoTranscriptFound:
                pywikibot.output(f'Youtube video for {self.ep.code} does not have any English captions')
        if transcript:
            ts_dict = transcript.fetch()

            formatter = TextFormatter()
            captions = formatter.format_transcript(ts_dict)

        # Now we can write it out to a file.
        if self.write_ts_file:
            with open(f"{self.filename}", "w") as f:
                f.write(captions)

        return captions

    def process_captions(self, captions):
        '''Combine raw captions across line breaks to create transcript.'''
        fixed_lines = ['== Pre-show ==']
        line_in_progress = ''
        active_quote = False
        during_intro = False
        intro_done = False
        during_break = False
        break_taken = False

        for line in captions.splitlines():

            # ignore blank lines
            if not line.strip():
                continue

            # ignore the intro song (with predictable beginning and end), add Part I header
            if "♪ Critical (It's Thursday)" in line and not during_intro and not intro_done:
                during_intro = True
                continue
            elif during_intro and any([x in line.lower() for x in ['(flames', 'welcome back']]):
                during_intro = False
                intro_done = True
                line_in_progress += '\n\n== Part I ==\n\n'
                continue
            elif during_intro:
                continue

            # ignore the content of the break
            if (not break_taken and not during_break and (line.startswith('MATT:') or line_in_progress.startswith('MATT:'))
                and any([x in line.lower() for x in [
                'take our break', "we'll take a break", 'go to break', 'after our break',
                "we're going to break", 'after the break', "we're going to take a break",
                'after we take a break', "take an early break",
            ]])):
                during_break = True
                line += '\n\n<!-- BREAK BEGINS '
            elif (during_break and (line.startswith('MATT:') or line_in_progress.startswith('MATT:'))
                and 'welcome back' in line.lower()):
                during_break = False
                break_taken = True
                # if line_in_progress:
                #     line_in_progress = 'BREAK ENDS -->\n\n== Part II ==\n\n' + line_in_progress
                # else:
                line_in_progress += 'BREAK ENDS -->\n\n== Part II ==\n'
            elif during_break:
                pass

            # if ongoing quote, the first '"' can be ignored
            if active_quote and line.startswith('"'):
                line = line[1:]

            # handle quotation marks
            if not active_quote and line.count('"') % 2 != 0:
                active_quote = True

            # this indicates a person is speaking (and thus a new line begins)
            if re.search('^[A-Z].*?[A-Z]:', line):
                if line_in_progress:
                    fixed_lines.append(line_in_progress)
                line_in_progress = line

            # these are non-dialogue descriptions that get their own lines (if not in middle of quote)
            elif line.startswith('(') and not line_in_progress and not active_quote:
                fixed_lines.append(line_in_progress)
                line_in_progress = ''
                fixed_lines.append(line)

            # this is a continuation of the previous line. If quotation marks are even, the active quote is done.
            elif line_in_progress:
                line_in_progress = ' '.join([line_in_progress.strip(), line.strip()]).strip()
                if line_in_progress.count('"') % 2 == 0:
                    active_quote = False

            else:
                pass
        # add last line
        fixed_lines.append(line_in_progress)

        transcript = '\n\n'.join(fixed_lines)

        # replace curly quotes and apostrophes
        transcript = (transcript
                .replace('“', '"')
                .replace('”','"')
                .replace("‘", "'")
                .replace("’", "'")
                )
        return transcript

    def check_ts_names(self, transcript):
        '''For making sure that there are no typos in speakers' names. Returns error message if not.'''
        error_warning = ''
        transcript_names = ' '.join([x.split(':')[0] for x in transcript.splitlines() if ':' in x])

        # the only lowercase word before the colon should be 'and'
        try:
            assert set(re.findall('[a-z]+', transcript_names)) == {'and'}
        except AssertionError:
            errors = [x for x in set(re.findall('[a-z]+', transcript_names)) if x != 'and']
            error_warning += f"Words besides 'and' in lower case for speaker names: {errors}" + '\n'

        # all uppercase words should be names in CR_UPPER
        try:
            assert set(re.findall('[A-Z]+', transcript_names)).issubset(SPEAKER_TAGS)
        except AssertionError:
            names = [x for x in set(re.findall('[A-Z]+', transcript_names)) if x not in SPEAKER_TAGS]
            error_warning += f"Some speaker names potentially misspelled: {names}" + '\n'

        return error_warning

    def flag_duplicates(self, transcript):
        during_break = False
        for line in transcript.splitlines():
            # don't worry about music
            if '♪' in line:
                continue

            # ignore lines during breaks
            if 'BREAK BEGINS' in line:
                during_break = True
            if 'BREAK ENDS' in line:
                during_break = False
                continue
            elif during_break:
                continue

            ngram_dict = text_to_ngram_dict(line)

            duplicate_ngrams = {k: [x for x in v if Counter(v)[x] > 1] for k,v in reversed(ngram_dict.items())
                                if len(set(v)) != len(v)}
            longest_ngrams = []
            dupe_ngrams = [ngram for v in duplicate_ngrams.values() for ngram in set(v)]
            for ngram in dupe_ngrams:
                if not any([set(ngram).issubset(x) for x in longest_ngrams]):
                    longest_ngrams.append(ngram)

            new_line = line
            for ngram in longest_ngrams:
                repeated_sentence = ' '.join(ngram)
                first_idx = new_line.find(repeated_sentence)
                second_idx = new_line.rfind(repeated_sentence)
                distance_between_lines = second_idx-(first_idx+len(repeated_sentence))
                if (-1 < distance_between_lines < 3 and line.count(repeated_sentence) == 2
                    and (repeated_sentence[0].lower() == repeated_sentence[0] or
                         repeated_sentence[-1] not in ['!', '?', '.'])):
                    new_line = f'{repeated_sentence}<!-- potential duplicate -->'.join(new_line.rsplit(repeated_sentence, 1))
                else:
                    new_line = f'{repeated_sentence}<!-- should not be a duplicate -->'.join(new_line.rsplit(repeated_sentence, 1))
            if new_line != line:
                transcript = transcript.replace(line, new_line)
        return transcript

    def process_errors(self, ts):
        '''Can add more processes later if needed.'''
        errors_comments = ''

        # verify that actor names are correct
        errors_comments += self.check_ts_names(ts)

        # add commented_out error messages to top of transcript
        if errors_comments:
            errors_comments = ''.join(['<!--', errors_comments, '-->\n\n'])
            ts = errors_comments + ts

        return ts

    def process_transcript(self, captions):
        # Step 1: remove and replace html markup
        captions = wikify_html_string(captions)

        # Step 2: Combine lines and remove extraneous quotation marks
        ts = self.process_captions(captions)

        # Step 3: Flag repeated phrases in-line
        ts = self.flag_duplicates(ts)

        # Step 4: add commented_out error messages to top of transcript
        ts = self.process_errors(ts)

        # Step 5: add navigation
        ts = '{{Transcript-Nav}}\n__FORCETOC__\n\n' + ts + '\n{{Transcript-Nav}}'

        if self.write_ts_file:
            with open(f'{self.ep.code}_fixed.{self.ext}', 'w') as f:
                f.write(ts)

        # autogenerated captions require different processing (TBD)

        return ts

def does_value_exist(infobox_obj, param_name):
    '''On a wiki, a parameter's value is blank if it either a) just whitespace or b) a comment.
    Removes whitespace and comments to see whether the value remaining is an empty string.'''
    has_param = infobox_obj.has_param(param_name)
    value = infobox_obj[param_name].value if has_param else ''
    simplified_val = remove_comments(value).strip()
    is_nonempty_val = bool(simplified_val)
    return (has_param and value and is_nonempty_val)


class EpisodeBot(
    # Refer to pywikobot.bot for generic bot classes
    SingleSiteBot,  # A bot only working on one site
    ConfigParserBot,  # A bot which reads options from scripts.ini setting file
    ExistingPageBot,  # CurrentPageBot which only treats existing pages
    AutomaticTWSummaryBot,  # Automatically defines summary; needs summary_key
):
    """
    :ivar summary_key: Edit summary message key. The message that should be
        used is placed on /i18n subdirectory. The file containing these
        messages should have the same name as the caller script (i.e. basic.py
        in this case). Use summary_key to set a default edit summary message.
    :type summary_key: str
    """

    use_redirects = False  # treats non-redirects only
    summary_key = 'basic-changing'

    update_options = {
        'summary': 'Updating newly-released episode page (via pywikibot)',
        'yt': None, # YT object
        'runtime': None,  # how long the episode goes for
        'old_ep_name': None,  # the old placeholder name of the episode
        'new_ep_name': None,  # the new official name of the episode
        'new_page_name': None,  # if different from episode title (usually 'A' vs 'A (episode)')
        'ep': None,  # Ep object
        'image_name': None,  # unless specified, builds automatically from make_image_filename(ep_id)
        'actors': None, # Actors object. list of actors in thumbnail image (optional)
        'host': None,  # the host (4SD) or GM (one-shot, defaults to Matt)
        'game_system': None,  # rules for gameplay (one-shot, defaults to Dungeons & Dragons)
        'airdate': None,  # usually from episode page, used to update list of episodes
        'airtime': None,  # usually from episode page
        'episode_summary': None,  # taken from list of episodes to add to episode page
        'summary_only': None,  # for only adding episode_summary to episode page
        'airdate_dict': None,  # for using airdates to determine 4SD-C3 episodes
        'array_dicts': None, # for converting episode codes into page names and episode titles
        'all': None,  # run: -update_page, -move, -upload, -ep_list, -yt_switcher, -ep_array, -transcript, -redirects, -navbox
        'update_page': None,  # update the contents of the episode page (may still need to access for info)
        'move': None,  # move page (only if new page name exists & is different from old one)
        'upload': None,  # upload the YouTube video thumbnail
        'main_page': None,  # check the main page has the latest thumbnail
        'ep_list': None,  # add to/update list of episodes
        'airdate_order': None,  # add to/update the airdate order
        'yt_switcher': None,  # add to/update the yt url switcher
        'ep_array': None,  # add to/update the ep array
        'transcript': None,  # create episode transcript page (auto-skips TRANSCRIPT_EXCLUSIONS)
        'transcript_list': None,  # add transcript page to list of transcripts (auto-skips TRANSCRIPT_EXCLUSIONS)
        'redirects': None,  # add/update redirects from ep_id to new_page_name
        'navbox': None,  # add ep_id to the appropriate navbox template
        '4SD': None,  # add 4SD param to 3xNN pages (4SD only)
    }

    def get_wikicode(self):
        text = self.current_page.text
        wikicode = mwparserfromhell.parse(text)
        return wikicode

    def get_infobox(self, wikicode=None):
        if wikicode is None:
            wikicode = self.get_wikicode()
        return next(x for x in wikicode.filter_templates() if x.name.matches(INFOBOX))

    def move_page(self) -> None:
        move_summary = 'Moving page to new episode name (via pywikibot)'

        # get the new target title
        if not (self.opt.new_page_name or self.opt.new_ep_name):
            target_title = pywikibot.input("Please enter the new name of the episode")
        elif not self.opt.new_page_name:
            target_title = self.opt.new_ep_name
        else:
            target_title = self.opt.new_page_name

         # make sure doesn't conflict with existing page (handling redirects separately)
        target_page = pywikibot.Page(self.site, target_title)
        if target_page.exists() and not target_page.isRedirectPage():
            add_end = pywikibot.input_yn(f"{self.opt.new_page_name} already exists. Add ' (episode)' to page name?")
            if add_end:
                self.opt.new_page_name = target_title + " (episode)"
            else:
                new_name = pywikibot.input(f"Please enter new page name for {target_title}")
                self.opt.new_page_name = new_name
        elif target_page.exists():
            overwrite = pywikibot.input_yn(f"{self.opt.new_page_name} is a redirect. Overwrite?")
            if overwrite:
                new_name = pywikibot.input(f"Please enter new page name for {target_title}")
                self.opt.new_page_name = new_name
        else:
            self.opt.new_page_name = self.opt.new_ep_name

        move_it = pywikibot.input_yn(f"Move [[{self.current_page.title()}]] to [[{self.opt.new_page_name}]]?")
        if move_it:
            pywikibot.output(f"Moving page from [[{self.current_page.title()}]] to [[{self.opt.new_page_name}]]")
            self.current_page.move(self.opt.new_page_name,
                                   reason=move_summary,
                                   )
            pywikibot.output('Page move complete.')
        else:
            pywikibot.output('Page move skipped.')

    def update_summary(self, wikicode=None):
        '''For adding the episode summary to the intro paragraph of the episode article.
        It can be added after it is retrieved from an existing list of episodes entry parameter,
        or it can be passed into the opening command (presumably, from YouTube description).
        '''
        if self.opt.summary_only:
            self.current_page = pywikibot.Page(self.site, self.opt.new_page_name)

        if wikicode is None:
            wikicode = self.get_wikicode()
        else:
            text = str(wikicode)

        # if there's an episode summary not included in the text, create new article text
        if self.opt.episode_summary and self.opt.episode_summary not in str(wikicode):
            old_intro = str(wikicode.get_sections(include_lead=True, flat=True)[0])
            new_intro = old_intro.rstrip() + ' ' + self.opt.episode_summary + '\n\n'
            new_text = str(wikicode).replace(old_intro, new_intro)
        else:
            new_text = text

        # have editor decide whether to add on the summary or not
        if new_text != text:
            pywikibot.showDiff(text, new_text)
            do_it = pywikibot.input_yn('Continue with summary addition?')
            if do_it:
                wikicode = mwparserfromhell.parse(new_text)
                if self.opt.summary_only:
                    self.put_current(new_text, summary=self.opt.summary)
            else:
                pass

        return wikicode

    def treat_page(self) -> None:
        """Load the episode page, change the relevant fields, save it, move it."""
        ep = self.opt.ep

        if not self.opt.old_ep_name:
            self.opt.old_ep_name = self.current_page.title()

        old_ep_name = self.opt.old_ep_name
        self.current_page = pywikibot.Page(self.site, old_ep_name)
        wikicode = deepcopy(self.get_wikicode())
        infobox = self.get_infobox(wikicode=wikicode)

        infobox['VOD'] = ep.wiki_vod

        if self.opt.runtime and not does_value_exist(infobox, param_name='Runtime'):
            infobox['Runtime'] = ' ' + self.opt.runtime.lstrip()
        # get the airdate & airtime so it can be used later, or prompt if infobox conflicts w/user entry
        if infobox.has_param('Airdate') and self.opt.airdate:
            if Airdate(infobox['Airdate'].value.strip()).date == self.opt.airdate.date:
                pass
            else:
                airdate_1 = Airdate(infobox['Airdate'].value.strip()).date
                airdate_2 = self.opt.airdate.date
                if len(airdate_1) and airdate_1 != airdate_2:
                    new_airdate_string = get_validated_input(arg='airdate', regex=DATE_REGEX, input_msg=f'Infobox airdate {airdate_1} does not match entered airdate {airdate_2}. Enter airdate (YYYY-MM-DD):')
                    new_airdate = Airdate(new_airdate_string)
                    self.opt.airdate.datetime = self.opt.airdate.datetime.replace(**{x: getattr(new_airdate.datetime, x) for x in ['day', 'month', 'year']})
                    infobox['Airdate'] = new_airdate.date
                else:
                    infobox['Airdate'] = self.opt.airdate
        elif infobox.has_param('Airdate') and not self.opt.airdate:
            self.opt.airdate = Airdate(infobox['Airdate'].value.strip())
        else:
            self.opt.airdate = ""
        if infobox.has_param('Airtime') and not self.opt.airtime:
            # add airtime to airdate object
            self.opt.airtime = Airdate(infobox['Airtime'].value.strip())
            if self.opt.airtime:
                self.opt.airdate = Airdate(datetime.combine(
                    self.opt.airdate.datetime.date(),
                    self.opt.airtime.datetime.timetz()))
        else:
            self.opt.airtime = ""

        # if image field is already filled in beyond comments, cancel thumbnail procedure
        if does_value_exist(infobox, param_name='Image'):
            pywikibot.output(f"Value '{(remove_comments(infobox['Image'].value)).strip()}' in image field detected; thumbnail will not be uploaded")
            self.opt.upload = False
        elif self.opt.image_name:
            infobox['Image'] = ' ' + self.opt.image_name.lstrip()
        else:
            infobox['Image'] = ' ' + ep.image_filename

        # only write caption if field not filled in or missing
        if not infobox.has_param('Caption') or not does_value_exist(infobox, param_name='Caption'):
            infobox['Caption'] = make_image_caption(actors=self.opt.actors, ep=ep)

        if not any([x.name.matches(ep.navbox_name.replace('Template:', '')) for x in wikicode.filter_templates()]):
            wikicode.append('\n' + f"{{{{{ep.navbox_name.replace('Template:', '')}}}}}")

        if self.opt.episode_summary:
            wikicode = self.update_summary(wikicode=wikicode)

        text = str(wikicode)

        if self.opt.update_page:
            self.put_current(text, summary=self.opt.summary)

        if (self.opt.move or self.opt.all) and self.opt.new_page_name != self.opt.old_ep_name:
            self.move_page()


class EpArrayBot(EpisodeBot):
    '''Change the display value for ep_array and add page title as value.
    If an entry for the episode does not already exist, it will create one after prev_ep_id.'''

    def get_array_dicts(self):
        if not self.opt.array_dicts:
            self.opt.array_dicts = self.make_array_dicts()
        return self.opt.array_dicts

    def make_array_dicts(self):
        self.current_page = pywikibot.Page(self.site, EP_ARRAY)
        array_dicts = []
        text = self.current_page.text

        for x in re.finditer(ARRAY_ENTRY_REGEX, text):
            y = x.groupdict()
            if not y['pagename']:
                y['pagename'] = ''
            if y['altTitles']:
                y['altTitles'] = re.findall('"(.*?)"', y['altTitles'])
            else:
                y['altTitles'] = []
            array_dicts.append(y)
        return array_dicts

    def dict_to_entry(self, array_dict):
        '''for turning one of these dicts into a string'''
        entry = ''
        for k, v in array_dict.items():
            if not v:
                continue
            elif k == 'epcode':
                entry += f'    ["{v}"] = {{' + '\n'
            elif isinstance(v, str):
                entry += f'        ["{k}"] = "{v}"' + ',\n'
            elif isinstance(v, list):
                list_string = ', '.join([f'"{x}"' for x in v])
                entry += f'        ["{k}"] = {{{list_string}}}' + ',\n'
            else:
                raise
        entry += '    },\n'
        return entry

    def build_full_array_page(self, array_dicts):
        array_string = 'return {\n'
        for array_dict in array_dicts:
            entry = self.dict_to_entry(array_dict)
            array_string += entry
        dict_string += '}'
        return dict_string

    def get_current_dict(self, array_dicts):
        '''Get array dict for current episode'''
        ep = self.opt.ep
        current_entry = next((x for x in array_dicts if x['epcode'] ==
            ep.code), '')
        return current_entry

    # replace self.opt.ep.get_prev_episode().code
    def get_previous_dict(self, array_dicts):
        prev_ep_code = self.opt.ep.get_prev_episode().code
        prev_entry = next((x for x in array_dicts if x['epcode'] ==
            prev_ep_code), '')
        return prev_entry

    def build_new_array_dict(self):
        '''Creating values for the fields that would populate an episode entry.'''
        ep = self.opt.ep
        if ep.prefix == '4SD':
            display_title = "''4-Sided Dive'': " + self.opt.new_ep_name
        else:
            display_title = self.opt.new_ep_name

        if self.opt.old_ep_name not in [self.opt.new_ep_name, self.opt.new_page_name]:
            ep_values = [self.opt.old_ep_name.lower()]
        else:
            ep_values = []

        if self.opt.new_page_name != display_title:
            pagename = self.opt.new_page_name
        else:
            pagename = ''

        array_dict = {
            'epcode': ep.code,
            'title': display_title,
            'pagename': pagename,
            'altTitles': ep_values,
        }
        return array_dict

    def update_new_dict(self, new_dict, current_dict):
        '''Add the existing altTitles together, but assume new_dict is otherwise correct.'''
        new_dict['altTitles'] = list(dict.fromkeys(new_dict['altTitles'] + current_dict.get('altTitles')))
        return new_dict

    def treat_page(self):
        self.current_page = pywikibot.Page(self.site, EP_ARRAY)
        text = self.current_page.text
        ep = self.opt.ep

        current_entry = next((x for x in re.split('\n    \},\n',
                            text) if re.search(f'\["{ep.code}"\]', x)),
                            '')
        if current_entry:
            current_entry += '\n    },\n'
            array_dicts = self.get_array_dicts()
            current_dict = self.get_current_dict(array_dicts=array_dicts)
        else:
            prev_entry = next((x for x in re.split('\n    \},\n',
                            text) if re.search(f'\["{ep.get_previous_episode().code}"\]', x)),
                            '') + '\n    },\n'
            current_dict = {}

        new_dict = self.build_new_array_dict()
        new_dict = self.update_new_dict(new_dict, current_dict)

        # Make sure that for 3xNN episode codes it is also "c3 latest"
        if ep.prefix == '3' and 'c3 latest' not in new_dict['altTitles']:
            text = re.sub('c3 latest(, )?', '', text)
            new_dict['altTitles'].append('c3 latest')

        new_entry = self.dict_to_entry(new_dict)

        if current_entry:
            text = text.replace(current_entry, new_entry)
        else:
            text = text.replace(prev_entry, '\n'.join([prev_entry, new_entry]))

        self.put_current(text, summary=f"Updating {ep.code} entry (via pywikibot)")


class YTSwitcherBot(EpisodeBot):
    '''Add yt_link as value by updating or creating entry'''
    def treat_page(self):
        self.current_page = pywikibot.Page(self.site, YT_SWITCHER)
        page = self.current_page
        text = page.text
        ep = self.opt.ep
        yt = self.opt.yt
        prev_ep = ep.get_previous_episode()

        # if it already exists as an entry, substitute in yt_link
        if ep.code in text:
            text = re.sub(fr'\["{ep.code}"\]\s*=.*', fr'["{ep.code}"] = "{yt.url}",', text)

        # if previous episode is already there, append after it
        elif prev_ep.code in text:
            prev_entry = next(x for x in text.splitlines()
                if any([y in x for y in prev_ep.generate_equivalent_codes()]))
            new_entry = f'    ["{ep.code}"]  = "{yt.url}",'
            text = text.replace(prev_entry,
                                '\n'.join([prev_entry, new_entry])
                                )
        # otherwise, append episode to the end of the list
        else:
            text = text.replace('["default"] = ""',
                                f'["{ep.code}"]  = "{yt.url}",\n    ["default"] = ""')

        self.put_current(text, summary=f"Adding youtube link for {ep.code} (via pywikibot)")


class EpListBot(EpisodeBot):
    '''For updating a list of episodes with a brand-new entry or new values for the current episode.'''

    def build_episode_entry_dict(self):
        ep = self.opt.ep
        '''Creating values for the fields that would populate an episode entry.'''
        if self.opt.host:
            host = self.opt.host.make_actor_list_string()
        else:
            host = ''
        if self.opt.ep.prefix == '4SD':
            color = '6f4889'
        else:
            color = ''
        if self.opt.ep.prefix == 'OS':
            game_system = self.opt.game_system
        else:
            game_system = ''
        entry_dict = {
            'no': str(ep.number),
            'ep': ep.wiki_code,
            'airdate': self.opt.airdate.date,
            'VOD': ep.wiki_vod,
            'runtime': self.opt.runtime,
            'aux1': host,
            'aux2': game_system,
            'summary': self.opt.episode_summary,
            'color': color,
        }
        return entry_dict

    def build_episode_entry(self):
        '''Create the string for a brand new episode entry.'''
        entry_dict = self.build_episode_entry_dict()

        ep_entry = "{{Episode table entry\n"
        for k, v in entry_dict.items():
            if v:
                ep_entry += f'|{k} = {v}' + '\n'
        ep_entry += '}}'

        return ep_entry

    def treat_page(self):
        '''Also has the option of getting an existing episode summary from the page.'''
        ep = self.opt.ep
        prev_ep = ep.get_previous_episode()

        list_page_name = ep.list_page
        if not list_page_name:
            list_page_name = pywikibot.input(f"Please enter name of list of episodes page for {ep.code}")

        self.current_page = pywikibot.Page(self.site, list_page_name)
        wikicode = deepcopy(self.get_wikicode())
        text = str(wikicode)
        # if previous episode isn't there, search episode num - 1 until find one (otherwise none)
        while prev_ep and (prev_ep.code.lower() not in text.lower()):
            prev_ep = prev_ep.get_previous_episode(prev_ep.code)

        # create new table entry from scratch if it doesn't exist yet, inserting after previous episode
        if not re.search(fr'\|\s*ep\s*=\s*{{{{(E|e)p\|{ep.code}}}}}', text):
            ep_entry = self.build_episode_entry()
            previous_entry_wiki = next((x for x in wikicode.filter_templates()
                if x.name.matches('Episode table entry') and prev_ep.code in x['ep']), '')
            if previous_entry_wiki:
                previous_entry = ''.join(['|' + str(x) for x in previous_entry_wiki.params]) + '}}'
                if previous_entry in text:
                    text = text.replace(previous_entry, '\n'.join([previous_entry, ep_entry]))
                else:
                    pywikibot.output(f"Episode table entry for {prev_ep.code} not formatted correctly; cannot insert {ep.code} entry")
            elif '}}<section end="episodes" />' in text:
                text = text.replace('}}<section end="episodes" />',
                                    ep_entry + '\n}}<section end="episodes" />')
            elif '<!-- Place new entries ABOVE this line -->' in text:
                text = text.replace('<!-- Place new entries ABOVE this line -->',
                                    ep_entry + '\n<!-- Place new entries ABOVE this line -->')
            else:
                pywikibot.output("No previous entry or end-of-section marker to append to")
        # if the table entry exists, update any individual params to the new ones in ep_entry_dict
        else:
            ep_entry_dict = self.build_episode_entry_dict()
            existing_entry = next(x for x in wikicode.filter_templates()
                if x.name.matches('Episode table entry') and ep.code in x['ep'])
            for k, v in ep_entry_dict.items():
                if v and not (existing_entry.has_param(k) and existing_entry[k].value.strip() == v):
                    if len(str(v).strip()):
                        existing_entry[k] = v
                    else:
                        existing_entry[k] = ' \n' # adding wiki standard whitespace padding
                else:
                    pass  # any values already in the table & not in the newly-created entry will be kept

            # offer the episode summary if available and if episode page is to be updated
            if not self.opt.episode_summary and self.opt.update_page and len(existing_entry['summary'].value.strip()):
                eplist_summary = existing_entry['summary'].value.strip()
                summ = pywikibot.input_yn(f'\n{eplist_summary}\nUse above existing episode list entry summary on episode page?')
                if summ:
                    self.opt.episode_summary = eplist_summary
                else:
                    pass
            text = str(wikicode)

        self.put_current(text, summary=f"Updating entry for {ep.code} (via pywikibot)")


class TranscriptBot(EpisodeBot):
    '''For creating the transcript page by downloading and processing youtube captions.'''

    def build_transcript(self):
        ts = Transcript(ep=self.opt.ep, yt=self.opt.yt)
        ts.download_and_build_transcript()
        transcript = ts.transcript
        return transcript

    def treat_page(self):
        url = self.opt.new_page_name + '/Transcript'
        self.current_page = pywikibot.Page(self.site, url)
        if self.current_page.exists() and self.current_page.text:
            pywikibot.output(f'Transcript page already exists for {self.opt.new_page_name}; transcript creation skipped')
        else:
            transcript = self.build_transcript()
            self.put_current(transcript, summary=f"Creating {self.opt.ep.code} transcript (via pywikibot)")


class TranscriptListBot(EpisodeBot):
    '''For updating the list of transcripts with the transcript of the newest episode.'''
    def build_transcript_entry(self):
        transcript_entry = f"""* {self.opt.ep.wiki_code} [[{self.opt.new_page_name}/Transcript|Transcript]]"""
        return transcript_entry

    def treat_page(self):
        ep = self.opt.ep
        self.current_page = pywikibot.Page(self.site, TRANSCRIPTS_LIST)
        text = self.current_page.text
        ep_entry = self.build_transcript_entry()

        # create new entry from scratch if it doesn't exist yet, inserting after previous episode
        if ep.code not in text:
            ep_entry = self.build_transcript_entry()
            prev_ep = ep.get_previous_episode()
            # if previous episode isn't there, search episode num - 1 until find one (otherwise none)
            while prev_ep and (prev_ep.code not in text):
                prev_ep = prev_ep.get_previous_episode(prev_ep.code)
            prev_ep_entry = next((x for x in text.splitlines() if prev_ep.code in x), '== Miscellaneous ==')
            text = text.replace(prev_ep_entry,
                                '\n'.join([prev_ep_entry, ep_entry]))

            self.put_current(text, summary=f"Add entry for {ep.code} (via pywikibot)")
        # if it exists, replace entry with current values if needed
        else:
            current_entry = next((x for x in text.splitlines() if ep.code in x), None)
            text = text.replace(current_entry, ep_entry)
            self.put_current(text, summary=f"Updating entry for {ep.code} (via pywikibot)")


class RedirectFixerBot(EpisodeBot):
    '''Insures all viable CxNN redirects exist and point at new_page_name.'''
    use_redirects: True

    def treat_page(self):
        ep = self.opt.ep
        for code in ep.generate_equivalent_codes():
            self.current_page = pywikibot.Page(self.site, code)
            text = f"#REDIRECT [[{self.opt.new_page_name}]]"
            self.put_current(text, summary="Updating/creating episode redirects (via pywikibot)")


class NavboxBot(EpisodeBot):
    '''Makes sure the episode code appears on the accompanying navbox'''

    def treat_page(self):
        ep = self.opt.ep
        prev_ep = ep.get_previous_episode()
        navbox_name = ep.navbox_name
        self.current_page = pywikibot.Page(self.site, navbox_name)
        wikicode = deepcopy(self.get_wikicode())
        if ep.code not in str(wikicode):
            navbox = next(x for x in wikicode.filter_templates() if x.name.matches('Navbox'))
            ep_list = next(p for p in navbox.params if prev_ep.code in p)
            if prev_ep.wiki_code in ep_list:
                ep_list.value.replace(prev_ep.wiki_code, f'{prev_ep.wiki_code} • {ep.wiki_code}')
            elif prev_ep.code in ep_list:
                ep_list.value.replace(prev_ep.code, f'{prev_ep.code} • {ep.code}')
        self.put_current(str(wikicode), summary=f"Adding {ep.code} to navbox (via pywikibot)")


class AirdateBot(EpisodeBot):
    '''For updating the airdate module with the newest episode's airdate.'''

    def build_airdate_entry(self):
        if self.opt.airtime:
            airdate = self.opt.airdate.date_and_time
        else:
            airdate = self.opt.airdate.date
        airdate_entry = f'''    {{epCode = "{self.opt.ep.code}", date = "{airdate}"}},'''
        return airdate_entry

    def parse_airdate_page(self):
        airdate_module_regex = '\{epCode = "(?P<ep_code>.*?)", date = "(?P<airdate_entry>.*)"\}'
        self.current_page = pywikibot.Page(self.site, AIRDATE_ORDER)
        text = self.current_page.text

        airdate_dict = {}
        for ad in re.finditer(airdate_module_regex, text):
            ep_code = ad.group('ep_code')
            airdate = Airdate(ad['airdate_entry'])
            if ep_code == self.opt.ep.code and airdate.datetime != self.opt.airdate.datetime:
                pywikibot.output(f'Airdate on {self.current_page.title()} does not match for \
{self.opt.ep.code}: <<yellow>>{airdate.date_and_time}<<default>> vs <<yellow>>{self.opt.airdate.date_and_time}<<default>>')
                return None
            airdate_dict[ep_code] = airdate
        airdate_dict = {k: v for k, v in sorted(airdate_dict.items(), key=lambda item: item[1].date_and_time)}

        return airdate_dict

    def get_airdate_dict(self):
        if self.opt.airdate_dict is None:
            self.opt.airdate_dict = self.parse_airdate_page()
        airdate_dict = self.opt.airdate_dict

        # add current episode if applicable
        if self.opt.ep.code not in airdate_dict:
            airdate_dict[self.opt.ep.code] = self.opt.airdate

        return airdate_dict

    def get_previously_aired_episode(self):
        '''Sort the episodes by airdate in reverse. Find the most recent episode that is older than current'''
        airdate_dict = self.get_airdate_dict()

        reversed_airdate_dict = dict(sorted(airdate_dict.items(),
                                            key=lambda item: item[1].date_and_time,
                                            reverse=True,
                                            ))
        last_earlier_ep_id = next(iter([k for k, v in reversed_airdate_dict.items()
            if v.datetime < self.opt.airdate.datetime]))
        return last_earlier_ep_id

    def get_latest_episodes_by_type(self):
        '''For every prefix in CURRENT_PREFIXES, get the most recently aired episode'''
        airdate_dict = self.get_airdate_dict()
        aired = {k: v for k, v in airdate_dict.items() if v.datetime <= datetime.now().astimezone()}
        latest_episodes = [next((Ep(k) for k in reversed(aired.keys())
            if Ep(k).prefix == prefix), Ep(f"{prefix}x01")) for prefix in CURRENT_PREFIXES]
        return latest_episodes

    def treat_page(self):
        ep = self.opt.ep
        self.current_page = pywikibot.Page(self.site, AIRDATE_ORDER)
        text = self.current_page.text

        # create new entry from scratch
        new_entry = self.build_airdate_entry()

        # save airdate_dict to options
        airdate_dict = self.get_airdate_dict()
        if not airdate_dict:
            pywikibot.output('Airdate module process canceled due to date mismatch.')
            return None
        self.opt.airdate_dict = airdate_dict

        if ep.code not in text:
            prev_ep = Ep(self.get_previously_aired_episode())
            prev_entry = next(x for x in text.splitlines() if prev_ep.code in x)
            text = text.replace(prev_entry,
                                '\n'.join([prev_entry, new_entry])
                                )
        else:
            current_entry = next(x for x in text.splitlines() if ep.code in x)
            text = text.replace(current_entry, new_entry)

        self.put_current(text, summary=f"Adding airdate for {ep.code} (via pywikibot)")


class Connect4SDBot(AirdateBot, EpArrayBot):
    '''For updating C3 episode pages with the first 4SD episode after their airdate.'''

    def get_connected_episodes(self, restrict_c3=True):
        '''For constructing the list of (C3) episodes connected to the current 4SD episode.'''
        airdate_dict = self.get_airdate_dict()
        if not airdate_dict:
            pywikibot.output('4SD connector process has been canceled due to date mismatch.')
            return None
        array_dicts = self.get_array_dicts()
        ep_4SD = self.opt.ep
        prev_4SD = self.opt.ep.get_previous_episode()
        eps = list(airdate_dict.keys())
        if ep_4SD.code in eps:
            affected_episodes = eps[(eps.index(prev_4SD.code)+1):eps.index(ep_4SD.code)]
        else:
            affected_episodes = eps[(eps.index(prev_4SD.code)+1):]
        if restrict_c3:
            affected_episodes = [x for x in affected_episodes if Ep(x).prefix == '3']
        affected_pages = ([array_dict['pagename'] if array_dict.get('pagename')
                           else array_dict['title'] for array_dict in array_dicts
                           if array_dict['epcode'] in affected_episodes])
        return affected_pages

    def update_episode_page(self):
        '''Procedure for updating a single episode page's 4SD parameter.'''
        ep = self.opt.ep
        wikicode = deepcopy(self.get_wikicode())
        infobox = self.get_infobox(wikicode=wikicode)
        if not infobox.has_param('4SD') or not does_value_exist(infobox, param_name='4SD'):
            infobox.add('4SD', ep.wiki_code, showkey=None,
                         before='Podcast', preserve_spacing=True)
        self.put_current(str(wikicode), summary="Adding 4SD to infobox (via pywikibot)")

    def treat_page(self):
        assert self.opt.ep.prefix == '4SD'
        # self.get_needed_dicts()
        affected_pages = self.get_connected_episodes()
        if not affected_pages:
            return None
        for page in affected_pages:
            self.current_page = pywikibot.Page(self.site, page)
            self.update_episode_page()


class MainPageBot(AirdateBot, EpArrayBot):
    '''For checking that the articles are the latest on the main page
    NOTE: Depends on airdate module to determine latest episode
    '''
    def check_for_latest_episodes(self, latest_episodes, text):
        pass

    def treat_page(self):
        # self.get_needed_dicts()
        # airdate_dict = self.get_airdate_dict()
        array_dicts = self.get_array_dicts()
        latest_episodes = self.get_latest_episodes_by_type()
        self.current_page = pywikibot.Page(self.site, 'Main Page')
        text = self.current_page.text
        all_ok = True
        for ep in latest_episodes:
            valid_ep = next(x for x in array_dicts
                if x['epcode'].lower() == ep.code.lower())
            valid_codes = [valid_ep['title'], valid_ep['epcode']]
            if valid_ep.get('pagename'):
                valid_codes.append(valid_ep['pagename'])
            if valid_ep.get('alt_titles'):
                valid_codes += valid_ep['alt_titles']
            if any([x for x in valid_codes if x.lower() in text.lower()]):
                pass
            else:
                all_ok = False
                pywikibot.output(f"Latest episode of {ep.show} missing from main page: <<yellow>>{ep.code}<<default>>")

        if all_ok:
            pywikibot.output(f'All latest episodes {latest_episodes} already on main page')


def main(*args: str) -> None:
    """
    Process command line arguments and invoke bot.
    If args is an empty list, sys.argv is used.
    :param args: command line arguments
    """
    options = {}
    # Process global arguments to determine desired site
    local_args = pywikibot.handle_args(args)

    # get global page name and set as local options['old_ep_name']
    page = ''
    for arg in local_args:
        arg, _, value = arg.partition(':')
        if arg[1:] == 'page':
            page = value.strip()
            options['old_ep_name'] = page

    if not options.get('old_ep_name'):
        print('''\nNo page given. Please add a pagename with `-page:"PAGENAME" and try again.\n''', file=sys.stderr)
        sys.exit()

    # This factory is responsible for processing command line arguments
    # that are also used by other scripts and that determine on which pages
    # to work on.
    gen_factory = pagegenerators.GeneratorFactory()

    # Process pagegenerators arguments
    local_args = gen_factory.handle_args(local_args)

    # Parse script-specific command line arguments
    for arg in local_args:
        arg, _, value = arg.partition(':')
        option = arg[1:]
        if option in ['ep_id', 'ep']:
            value = get_validated_input(arg='ep', value=value, regex=EP_REGEX)
            options['ep'] = Ep(value)
        elif option in ['yt_id', 'yt']:
            value = get_validated_input(arg=option, value=value, regex=YT_ID_REGEX)
            options['yt'] = YT(value)
        elif option in ['actors', 'host']:
            if option == 'actors':
                options[option] = Actors(value)
            elif option == 'host':
                options[option] = Actors(value, link=False)
        elif option == 'airdate':
            if re.search(DATE_2_REGEX, value):
                pass
            else:
                value = get_validated_input(arg=option, value=value, regex=DATE_REGEX)
            options['airdate'] = Airdate(value)
        elif option == 'airtime':
            options['airtime'] = Airdate(value)
        elif option in ('summary', 'actors', 'runtime', 'new_ep_name', 'episode_summary'):
            if not value:
                value = pywikibot.input('Please enter a value for ' + arg)
            options[option] = value
        # take the remaining options as booleans.
        else:
            options[option] = True

    # add airtime to airdate if both were entered by user
    if options.get('airdate') and options.get('airtime'):
        options['airdate'] = Airdate(datetime.combine(options['airdate'].datetime.date(),
                                                      options['airtime'].datetime.timetz()))

    # handle which things to run if all is selected, and set to False any not yet defined
    for task in ['update_page', 'move', 'upload', 'ep_list', 'yt_switcher', 'ep_array',
                 'main_page', 'airdate_order', 'transcript', 'transcript_list', 'redirects',
                 'navbox', '4SD']:
        if options.get('all'):
            options[task] = True
        elif not options.get(task):
            options[task] = False

    # get user input for required values that were not passed in.
    # only required if certain tasks will be conducted
    required_options = ['ep', 'yt', 'new_ep_name', 'runtime', 'actors']
    for req in required_options:
        if req not in options:
            if req == 'yt' and any([options.get(x) for x in ['update_page', 'ep_list', 'yt_list', 'transcript']]):
                value = get_validated_input(arg='yt', regex=YT_ID_REGEX, input_msg="Please enter 11-digit YouTube ID for the video")
                options[req] = YT(value)
            elif req == 'new_ep_name':
                if any([options.get(x) for x in ['update_page', 'move']]):
                    value = pywikibot.input(f"If {options['old_ep_name']} will be moved, enter new page name")
                else:
                    value = ''
                if len(value.strip()):
                    options[req] = value
                else:
                    options[req] = options['old_ep_name']
            elif req == 'actors' and any([options.get(x) for x in ['update_page', 'upload']]):
                value = pywikibot.input(f"Optional: L-R actor order in {options['ep']} thumbnail (first names ok)")
                options[req] = Actors(value)
            elif req == 'runtime' and any([options.get(x) for x in ['update_page', 'ep_list']]):
                value = get_validated_input(arg='runtime', regex='\d{1,2}:\d{1,2}(:\d{1,2})?', input_msg="Please enter video runtime (HH:MM:SS or MM:SS)")
                options['runtime'] = value
            elif req in ['ep']:
                value = get_validated_input(arg=req, value=value, regex=EP_REGEX)

    # default new page name is same as new episode name (and page being parsed)
    if not options.get('new_page_name'):
        options['new_page_name'] = options['new_ep_name']

    # if 4SD, make sure host is provided. If one-shot, default host/DM/GM to Matt.
    if options['ep'].prefix == '4SD' and not options.get('host'):
        host = pywikibot.input(f"4-Sided Dive host for {options['ep'].code} (first name ok)")
        options['host'] = Actors(host, link=False)
    if options['ep'].prefix == 'OS':
        host = next((options[x] for x in ['host', 'DM', 'GM', 'dm', 'gm']
            if options.get(x)), 'Matthew Mercer')
        options['host'] = Actors(host, link=False)

    # if one-shot, default game system is D&D.
    if options['ep'].prefix == 'OS' and not options.get('game_system'):
        options['game_system'] = 'Dungeons & Dragons'

    # The preloading option is responsible for downloading multiple
    # pages from the wiki simultaneously.
    gen = gen_factory.getCombinedGenerator(preload=True)

    # check if further help is needed
    if not pywikibot.bot.suggest_help(missing_generator=not gen):
        # pass generator and private options to the bots
        bot1 = EpisodeBot(generator=gen, **options)
        # if page is a redirect to new_page_name, warn user and cancel procedure
        page = pywikibot.Page(bot1.site, options['old_ep_name'])
        if page.isRedirectPage() and page.getRedirectTarget().title() == options['new_page_name']:
            pywikibot.output('\n' + f'The value after -page, "{options["old_ep_name"]}", is a redirect.')
            color = 'yellow'
            pywikibot.output(f'Please use <<{color}>>-page:"{page.getRedirectTarget().title()}"<<default>> and try again.' + '\n')
            return None
        bot1.run()

        # get the airdate info & new page name from episode page processing & moving
        if not options.get('old_ep_name'):
            options['old_ep_name'] = bot1.opt.old_ep_name
        if options.get('airdate') != bot1.opt.airdate:
            options['airdate'] = bot1.opt.airdate
        if not options.get('airdate'):
            options['airdate'] = bot1.opt.airdate
        if not options.get('airtime'):
            options['airtime'] = bot1.opt.airtime
            if options['airtime']:
                options['airdate'] = Airdate(datetime.combine(
                    options['airdate'].datetime,
                    options['airtime'].datetime.timetz()))
        if options.get('new_page_name') != bot1.opt.new_page_name:
            options['new_page_name'] = bot1.opt.new_page_name
        # if image thumbnail field was filled in, do not upload.
        if bot1.opt.upload is False:
            options['upload'] = False

        if options.get('upload'):
            description = make_image_file_description(ep=options['ep'],
                                                      actors=options.get('actors'),
                                                      )
            summary = f"{options['ep'].code} episode thumbnail (uploaded via pywikibot)"
            filename = options['ep'].image_filename
            thumbnail_bot = UploadRobot(
                generator=gen,
                url=options['yt'].thumbnail_url,
                description=description,
                use_filename=filename,
                summary=summary,
                verify_description=True,
            )
            thumbnail_bot.run()

        if options.get('ep_array'):
            bot2 = EpArrayBot(generator=gen, **options)
            bot2.treat_page()
            options['array_dicts'] = bot2.opt.array_dicts

        if options.get('yt_switcher'):
            bot3 = YTSwitcherBot(generator=gen, **options)
            bot3.treat_page()

        if options.get('ep_list'):
            bot4 = EpListBot(generator=gen, **options)
            bot4.treat_page()
            if bot4.opt.episode_summary and not options.get('episode_summary') and options.get('update_page'):
                options['episode_summary'] = bot4.opt.episode_summary
                tinybot = EpisodeBot(generator=gen,
                                     summary=f"Adding {options['ep'].code} summary (via pywikibot)",
                                     summary_only=True,
                                     **{k: v for k, v in options.items() if k in ['episode_summary',
                                                                                  'new_page_name']})
                tinybot.update_summary()
                # TO DO: run teeny episodebot with one option for summary

        if options.get('transcript'):
            if options['ep'].prefix in TRANSCRIPT_EXCLUSIONS:
                pywikibot.output(f'Skipping transcript page creation for {options["ep"].show} episode')
            else:
                bot5 = TranscriptBot(generator=gen, **options)
                bot5.treat_page()

        if options.get('transcript_list'):
            if options['ep'].prefix in TRANSCRIPT_EXCLUSIONS:
                pywikibot.output(f'Skipping transcript list update for {options["ep"].show} episode')
            else:
                bot6 = TranscriptListBot(generator=gen, **options)
                bot6.treat_page()

        if options.get('redirects'):
            bot7 = RedirectFixerBot(generator=gen, **options)
            bot7.treat_page()

        if options.get('navbox'):
            bot8 = NavboxBot(generator=gen, **options)
            bot8.treat_page()

        if options.get('airdate_order'):
            if not options.get('airdate'):
                airdate_string = pywikibot.input('Please enter episode airdate (YYYY-MM-DD)')
                options['airdate'] = Airdate(airdate_string)
            bot9 = AirdateBot(generator=gen, **options)
            bot9.treat_page()
            options['airdate_dict'] = bot9.opt.airdate_dict

        if options['ep'].prefix == '4SD' and options.get('4SD'):
            bot10 = Connect4SDBot(generator=gen, **options)
            bot10.treat_page()
            if not options.get('array_dicts'):
                options['array_dicts'] = bot10.opt.array_dicts
            if not options.get('airdate_dict'):
                options['airdate_dict'] = bot10.opt.airdate_dict

        if options.get('main_page'):
            bot11 = MainPageBot(generator=gen, **options)
            bot11.treat_page()


if __name__ == '__main__':
    try:
        main()
    except QuitKeyboardInterrupt:
        pywikibot.info('\nUser quit vod bot run.')