Skip to content

Instantly share code, notes, and snippets.

@seraphyn
Forked from mbirth/tt2srt.py
Created Feb 22, 2016
Embed
What would you like to do?
Timed Text Captions to SRT Subtitles converter script
#!/usr/bin/env python
# Usage: python tt2srt.py source.xml output.srt
# FROM: https://gist.github.com/adammw/915259
from xml.dom.minidom import parse
import sys
def fixTime(time):
parts = time.split(':')
frames = int(parts[3]) # 00..24
millisec = frames*40
time = ":".join(parts[0:3]) + ',' + '%03d' % millisec
return time
i=1
dom = parse(sys.argv[1])
out = open(sys.argv[2], 'w')
styling = dom.getElementsByTagName("styling")[0]
styles = {}
for style in styling.childNodes:
if style.nodeName == 'style':
if style.attributes['tts:color']:
styles[style.attributes['id'].value] = style.attributes['tts:color'].value
body = dom.getElementsByTagName("body")[0]
paras = body.getElementsByTagName("p")
for para in paras:
out.write(str(i) + "\n")
out.write(fixTime(para.attributes['begin'].value) + ' --> ' + fixTime(para.attributes['end'].value) + "\n")
for child in para.childNodes:
if child.nodeName == 'span':
fontOpen = False
if child.attributes['style']:
styleid = child.attributes['style'].value
if styleid in styles:
out.write('<font color="' + styles[styleid] + '">')
fontOpen = True
for child2 in child.childNodes:
if child2.nodeName == 'br':
out.write("\n")
elif child2.nodeName == '#text':
rawText = unicode(child2.data).encode('utf-8').strip()
if len(rawText) > 0:
out.write(rawText)
if fontOpen:
out.write('</font>')
elif child.nodeName == 'br':
out.write("\n")
out.write("\n\n")
i += 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment