Skip to content

Instantly share code, notes, and snippets.

@turicas
Created May 4, 2023 00:05
Show Gist options
  • Save turicas/7b9b1213bdee98af74356c836f03a445 to your computer and use it in GitHub Desktop.
Save turicas/7b9b1213bdee98af74356c836f03a445 to your computer and use it in GitHub Desktop.
Filter repeated captions on SRT files
# pip install srt
import argparse
import srt
parser = argparse.ArgumentParser()
parser.add_argument("input_filename")
parser.add_argument("output_filename")
args = parser.parse_args()
with open(args.input_filename) as fobj:
new_subtitles = []
last_text, start, last_stop, counter = None, None, None, 1
for caption in srt.parse(fobj):
text = caption.content
if last_text is None or last_text != text:
if last_text != text and last_text is not None:
new_subtitles.append(
srt.Subtitle(index=counter, start=start, end=last_stop, content=last_text)
)
counter += 1
last_printed = (start, last_stop, last_text)
start = caption.start
last_text = text
last_stop = caption.end
elif last_text == text:
last_stop = caption.end
if last_printed != (start, last_stop, last_text):
new_subtitles.append(
srt.Subtitle(index=counter, start=start, end=last_stop, content=last_text)
)
counter += 1
with open(args.output_filename, mode="w") as fobj:
fobj.write(srt.compose(new_subtitles))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment