Skip to content

Instantly share code, notes, and snippets.



Last active Jan 25, 2018
What would you like to do?
YouTube XML Caption Format to SRT
#!/usr/bin/env python3
# -*- encoding:utf-8 -*-
# Install: pip install lxml
# Usage: python INPUT_FILE.xml >
import re
import sys
from decimal import Decimal
import html
from lxml import html as lxml_html
filename = sys.argv[1]
with open(filename, 'rb') as f:
root = lxml_html.fromstring(
def extract_s_ms(value):
if '.' in value:
s, ms = value.split('.')
s, ms = (value, 0)
return Decimal(s), Decimal(ms)
def convert_to_h_m_s(value):
rem_m, s = divmod(value, 60)
h, m = divmod(rem_m, 60)
return h, m, s
for (i, element) in enumerate(root):
start_time_s, start_time_ms = extract_s_ms(element.attrib['start'])
duration_s, duration_ms = extract_s_ms(element.attrib['dur'])
end_time_s, end_time_ms = start_time_s + duration_s, start_time_ms + duration_ms
s_h, s_m, s_s = convert_to_h_m_s(start_time_s)
e_h, e_m, e_s = convert_to_h_m_s(end_time_s)
text = html.unescape(element.text)
{s_h:02.2}:{s_m:02.2}:{s_s:02.2},{start_time_ms:03} --> {e_h:02.2}:{e_m:02.2}:{e_s:02.2},{end_time_ms:03}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.