Skip to content

Instantly share code, notes, and snippets.

@yauh
Created January 22, 2023 13:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save yauh/deeac87eeee83716b48515c3c224258b to your computer and use it in GitHub Desktop.
Save yauh/deeac87eeee83716b48515c3c224258b to your computer and use it in GitHub Desktop.
This file takes a whisper generated VTT file and makes it compatible with loads of podcasts hosts out there
#!/usr/bin/env python3
import argparse
import re
def correct_timestamp(timestamp): # will return a timestamp with HH:MM:SS.sss
print("subroutine for " + timestamp)
timestamp_split = timestamp.split(":")
if len(timestamp_split) == 2:
timestamp_split.insert(0, "00")
hh_mm_ss_sss = timestamp_split[0] + ":" + timestamp_split[1] + ":" + timestamp_split[2]
return hh_mm_ss_sss
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--infile', type=argparse.FileType('r', encoding='UTF-8'),
required=True)
parser.add_argument('-o', '--outfile', type=argparse.FileType('w', encoding='UTF-8'),
required=False)
args = parser.parse_args()
infile_name = args.infile.name
print("reading from file: " + infile_name)
if args.outfile is None:
outfile_name = args.infile.name + ".out"
print("no outfile given")
else:
outfile_name = args.outfile.name
print("writing to file: " + outfile_name)
# configuration settings
infile = open(infile_name, 'r')
outfile = open(outfile_name, 'w')
cue_counter = 1
re_search_mask = "^(.*) --> (.*)$"
while True:
# Get next line from file
line = infile.readline()
# if line is empty
# end of file is reached
if not line:
break
match = re.search(re_search_mask, line)
start_timecode = match.group(1) if match else None
end_timecode = match.group(2) if match else None
print(start_timecode, end_timecode)
if start_timecode is not None:
# add HH padding if missing from timecode
timecode_start = correct_timestamp(start_timecode)
timecode_end = correct_timestamp(end_timecode)
outfile.write(str(cue_counter) + "\n")
cue_counter += 1
outfile.write(timecode_start + " --> " + timecode_end + "\n")
else:
print("else")
outfile.write(line.strip() + "\n")
args.infile.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment