Skip to content

Instantly share code, notes, and snippets.

@leonardo-fernandes
Last active April 19, 2016 15:29
Show Gist options
  • Save leonardo-fernandes/25253548ae8929ee7664 to your computer and use it in GitHub Desktop.
Save leonardo-fernandes/25253548ae8929ee7664 to your computer and use it in GitHub Desktop.
Convert youtube timedtext XML fromat to SRT subtitles
#!/usr/bin/env python
# Usage: python tt2srt.py source.xml output.srt
# Download the .xml file from youtube by using the Network tab
# on the browser Developer Tools, and searching for requests to
# the "timedtext" endpoint
from xml.dom.minidom import parse
import sys
def formatSrtTime(milliseconds):
sec, milli = divmod(milliseconds, 1000)
m, s = divmod(int(sec), 60)
h, m = divmod(m, 60)
return "{:02}:{:02}:{:02},{}".format(h,m,s,milli)
i=1
dom = parse(sys.argv[1])
out = open(sys.argv[2], 'w')
body = dom.getElementsByTagName("body")[0]
paras = body.getElementsByTagName("p")
for para in paras:
out.write(str(i) + "\n")
out.write(formatSrtTime(int(para.attributes['t'].value)) + ' --> ' + formatSrtTime(int(para.attributes['t'].value) + int(para.attributes['d'].value)) + "\n")
for child in para.childNodes:
if child.nodeName == 'br':
out.write("\n")
elif child.nodeName == '#text':
out.write(unicode(child.data).encode('utf-8'))
out.write("\n\n")
i += 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment