Skip to content

Instantly share code, notes, and snippets.

@mbirth
Forked from adammw/tt2srt.py
Last active May 6, 2019
Embed
What would you like to do?
Timed Text Captions to SRT Subtitles converter script
#!/usr/bin/env python
# Usage: python tt2srt.py source.xml output.srt
# FROM: https://gist.github.com/adammw/915259
from xml.dom.minidom import parse
import sys
def fixTime(time):
# 00:00:00.000
return time.replace(".", ",")
i=1
dom = parse(sys.argv[1])
out = open(sys.argv[2], 'w')
styling = dom.getElementsByTagName("tt:styling")[0]
styles = {}
for style in styling.childNodes:
if style.nodeName == 'tt:style':
try:
if style.attributes["tts:color"]:
styles[style.attributes['xml:id'].value] = style.attributes['tts:color'].value
except:
pass
body = dom.getElementsByTagName("tt:body")[0]
paras = body.getElementsByTagName("tt:p")
for para in paras:
out.write(str(i) + "\n")
out.write(fixTime(para.attributes['begin'].value) + ' --> ' + fixTime(para.attributes['end'].value) + "\n")
for child in para.childNodes:
if child.nodeName == 'tt:span':
fontOpen = False
if child.attributes['style']:
styleid = child.attributes['style'].value
if styleid in styles:
out.write('<font color="' + styles[styleid] + '">')
fontOpen = True
for child2 in child.childNodes:
if child2.nodeName == 'tt:br':
out.write("\n")
elif child2.nodeName == '#text':
rawText = unicode(child2.data).encode('utf-8').strip()
if len(rawText) > 0:
out.write(rawText)
if fontOpen:
out.write('</font>')
elif child.nodeName == 'tt:br':
out.write("\n")
out.write("\n\n")
i += 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment