Last active
November 12, 2022 11:26
-
-
Save glowinthedark/71226c1d76abb7731bb0ec431f2922bf to your computer and use it in GitHub Desktop.
Convert TTML2 subtitles (https://www.w3.org/TR/2021/CR-ttml2-20210309/) to SRT (https://en.wikipedia.org/wiki/SubRip)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# DEPENDENCIES | |
# pip3 install srt lxml | |
# TTML2 to SRT subtitle converter | |
# USAGE: | |
# ttml2srt.py matrix.ttml2 | |
import sys | |
import xml.etree.ElementTree as ET | |
from pathlib import Path | |
import srt | |
from lxml import etree | |
from srt import srt_timestamp_to_timedelta | |
def to_string(tree): | |
return etree.tostring(tree, pretty_print=True) | |
if __name__ == '__main__': | |
subs = [] | |
i = 0 | |
root = ET.parse(sys.argv[1]) | |
nodes = root.findall('//{http://www.w3.org/ns/ttml}p') | |
for node in nodes: | |
start = srt_timestamp_to_timedelta(node.attrib['begin']) | |
end = srt_timestamp_to_timedelta(node.attrib['end']) | |
text = node.text | |
i += 1 | |
subs.append(srt.Subtitle(index=i, start=start, end=end, content=text)) | |
all_subs = srt.compose(subs) | |
print(all_subs) | |
output_file = Path(sys.argv[1]).with_suffix('.srt') | |
with open(output_file, 'w', encoding='utf8') as fo: | |
fo.write(all_subs) | |
print(f"Wrote file {output_file}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment