boscacci/scene_heading_extractor.py

## scene_heading_extractor.py
import re

def extract_scene_headings(film_script_txtfile_path="scripts/no_country.txt", mode=1):
    # A couple options on regex patterns, depending on script format. Might need tweaks per script
    film_scene_heading_regexp_1 = "(?<=INT. |EXT. ).*(?=,)"
    film_scene_heading_regexp_2 = "(?<=INT. |EXT. ).*(?= -)"

    if mode == 1:
        regexp = film_scene_heading_regexp_1
    elif mode == 2:
        regexp = film_scene_heading_regexp_2

    # Open film script .txt file
    with open(film_script_txtfile_path, "r") as f:
        film_string = f.readlines()

    places = [
        re.findall(regexp, line)
        for line in film_string
        if re.findall(regexp, line)
    ]

    return [place[0] for place in places] if places else None
	import re

	def extract_scene_headings(film_script_txtfile_path="scripts/no_country.txt", mode=1):
	# A couple options on regex patterns, depending on script format. Might need tweaks per script
	film_scene_heading_regexp_1 = "(?<=INT. \|EXT. ).*(?=,)"
	film_scene_heading_regexp_2 = "(?<=INT. \|EXT. ).*(?= -)"

	if mode == 1:
	regexp = film_scene_heading_regexp_1
	elif mode == 2:
	regexp = film_scene_heading_regexp_2

	# Open film script .txt file
	with open(film_script_txtfile_path, "r") as f:
	film_string = f.readlines()

	places = [
	re.findall(regexp, line)
	for line in film_string
	if re.findall(regexp, line)
	]

	return [place[0] for place in places] if places else None