Skip to content

Instantly share code, notes, and snippets.

@augustomen
Created February 29, 2016 02:43
Show Gist options
  • Save augustomen/bea456c182b447b820fb to your computer and use it in GitHub Desktop.
Save augustomen/bea456c182b447b820fb to your computer and use it in GitHub Desktop.
Converts a Youtube closed captions xml to srt
#!/usr/bin/python
import re
import sys
from datetime import datetime, timedelta
pattern = re.compile(r'<text start="([\d\.]+)" dur="([\d\.]+)">(.*?)</text>')
with open(sys.argv[1]) as fin:
content = fin.read().decode('utf-8')
dini = datetime(2010, 1, 1)
with open(sys.argv[1] + '.srt', 'w') as fout:
alist = pattern.findall(content)
for i, (start, duration, text) in enumerate(alist, start=1):
start = float(start)
dstart = dini + timedelta(seconds=start)
duration = float(duration)
dend = dstart + timedelta(seconds=duration)
fout.write(u'{}\n{} --> {}\n{}\n\n'.format(
i,
dstart.strftime('%H:%M:%S,%f')[:-3],
dend.strftime('%H:%M:%S,%f')[:-3],
text.strip(),
).encode('utf-8'))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment