Last active
August 17, 2021 16:04
-
-
Save boscacci/383cc125b022fe81d82952afde8aadc2 to your computer and use it in GitHub Desktop.
Extract Scene Headings from Screenplay
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
def extract_scene_headings(film_script_txtfile_path="scripts/no_country.txt", mode=1): | |
# A couple options on regex patterns, depending on script format. Might need tweaks per script | |
film_scene_heading_regexp_1 = "(?<=INT. |EXT. ).*(?=,)" | |
film_scene_heading_regexp_2 = "(?<=INT. |EXT. ).*(?= -)" | |
if mode == 1: | |
regexp = film_scene_heading_regexp_1 | |
elif mode == 2: | |
regexp = film_scene_heading_regexp_2 | |
# Open film script .txt file | |
with open(film_script_txtfile_path, "r") as f: | |
film_string = f.readlines() | |
places = [ | |
re.findall(regexp, line) | |
for line in film_string | |
if re.findall(regexp, line) | |
] | |
return [place[0] for place in places] if places else None |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment