Skip to content

Instantly share code, notes, and snippets.

@BryantD
Created February 9, 2023 03:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save BryantD/9eb4f7f2214ddbf7324dcd0ffd4e3cd6 to your computer and use it in GitHub Desktop.
Save BryantD/9eb4f7f2214ddbf7324dcd0ffd4e3cd6 to your computer and use it in GitHub Desktop.
Meet Transcript Processor
#!/usr/bin/env python3
import argparse
from textwrap import wrap
def main():
parser = argparse.ArgumentParser()
parser.add_argument("file", help="File to process")
args = parser.parse_args()
speaker = ""
bulk_text = ""
with open(args.file) as fp:
for line in fp:
line = line.strip()
if line:
token = line.split()[0]
if token in ("WEBVTT", "Kind:", "Language:"):
continue
elif "-->" in line:
continue
elif line[0] == "(" and line[-1] == ")":
new_speaker = line[1:-1]
if speaker != new_speaker:
bulk_text = bulk_text.strip()
if bulk_text:
if bulk_text[-1] not in (".", ",", "?", "-"):
# Handle interruptions:
# if a line doesn't end in punctuation, add a dash
bulk_text += " --"
if bulk_text[0] == bulk_text[0].lower():
# Handle follow-on statements:
# if a line doesn't start with a capital letter,
# it's probably a follow-on
bulk_text = "-- " + bulk_text
for i in wrap(
f"{speaker}: {bulk_text}", subsequent_indent=" "
):
print(i)
print("")
bulk_text = ""
speaker = new_speaker
else:
if line == "-":
# single dashs on a line alone is how Meet transcripts
# indicate an interruption
if bulk_text and bulk_text[-1] in (",", "-"):
bulk_text = bulk_text[0:-1] + " --"
else:
bulk_text += " " + line
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment