Skip to content

Instantly share code, notes, and snippets.

@ramhiser
Created September 10, 2015 02:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ramhiser/7d2f378d43b2268d1afe to your computer and use it in GitHub Desktop.
Save ramhiser/7d2f378d43b2268d1afe to your computer and use it in GitHub Desktop.
Convert CMU Sphinx closed-captioning auto alignment to WebVTT format
#!/usr/bin/env python
import argparse
import sys
import time
from itertools import izip, count
def parse_sphinx_line(line):
'''Parse a line from Sphinx's closed captioning alignment'''
line_split = line.split()
# line_split is of length 2
if not line_split:
word = time_begin = time_end = None
elif line_split[0] in ('-', '+'):
word = line_split[1]
time_begin = time_end = None
else:
word, time_bin = line_split
time_bin = time_bin.replace('[', '').replace(']', '').split(':')
time_begin, time_end = map(int, time_bin)
return {'word': word, 'time_begin': time_begin, 'time_end': time_end}
def millseconds2timestamp(ms):
# Source: http://stackoverflow.com/a/21787689/234233
ms = int(ms)
s, ms = divmod(ms, 1000)
return '{}.{:03d}'.format(time.strftime('%H:%M:%S', time.gmtime(s)), ms)
def vtt_timestamp(time_begin, time_end):
'''Converts timestamps (milliseconds) to WebVTT output
Example: 00:00:01.800 --> 00:00:03.600'''
time_begin = millseconds2timestamp(time_begin)
time_end = millseconds2timestamp(time_end)
return ' '.join([time_begin, '-->', time_end])
def vtt_chunk(words, begin, end):
words = [w for w in words if w]
chunk_timestamp = vtt_timestamp(begin, end)
chunk_sentence = ' '.join(words)
return ''.join([chunk_timestamp, '\n', chunk_sentence, '\n\n'])
def main():
descr = 'Convert Sphinx auto-alignment to WebVTT Closed Captioning format'
parser = argparse.ArgumentParser(description=descr)
parser.add_argument('--sphinx', '-s',
help='Sphinx auto-alignment output',
type=str,
required=True)
parser.add_argument('--output', '-o',
help='Closed captions (WebVTT). Default: %(default)s',
type=str,
default='closed-caption.vtt')
parser.add_argument('--chunks', '-c',
help='Chunks of time (ms) . Default: %(default)s',
type=int,
default=2000)
args = parser.parse_args(sys.argv[1:])
iter_timebin = izip(count(1, args.chunks), count(args.chunks, args.chunks))
with open(args.sphinx, "r") as sphinx_f:
with open(args.output, "w") as vtt_f:
# WebVTT header
vtt_f.write('WEBVTT\n\n')
chunk_begin, chunk_end = next(iter_timebin)
chunk_words = []
for line in sphinx_f:
if line:
parsed_line = parse_sphinx_line(line)
if parsed_line['time_begin'] is None or \
parsed_line['time_begin'] < chunk_end:
chunk_words.append(parsed_line['word'])
else:
if chunk_words:
chunk_str = vtt_chunk(words=chunk_words,
begin=chunk_begin,
end=chunk_end)
vtt_f.write(chunk_str)
chunk_begin, chunk_end = next(iter_timebin)
chunk_words = []
if chunk_words:
chunk_str = vtt_chunk(words=chunk_words,
begin=chunk_begin,
end=chunk_end)
vtt_f.write(chunk_str)
if __name__ == "__main__":
main()
@ramhiser
Copy link
Author

The output from CMU Sphinx is described in this blog post.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment