Skip to content

Instantly share code, notes, and snippets.

@abdusco
Last active February 1, 2020 19:38
Show Gist options
  • Save abdusco/0bed8e42f0eadb6b954fa28a667e0d51 to your computer and use it in GitHub Desktop.
Save abdusco/0bed8e42f0eadb6b954fa28a667e0d51 to your computer and use it in GitHub Desktop.
Convert TTML to SRT
import re
import sys
from textwrap import dedent
from typing import List
import bs4
def extract_subs(xml_path: str) -> List[str]:
with open(xml_path) as fin:
soup = bs4.BeautifulSoup(fin.read(), 'lxml')
subs = []
for i, s in enumerate(soup.select('[begin]'), start=1):
text, begin, end = s.text.strip(), s['begin'], s['end']
text = re.sub(r'\n{2,}', '\n', text)
srt_item = '\n'.join([str(i), f'{begin} --> {end}', text, '\n'])
subs.append(srt_item)
return subs
def main():
if len(sys.argv) < 3:
print(dedent('''\
usage: python ttml2srt.py src.xml out.srt
'''))
return
src_path, out_path = sys.argv[1:]
subs = extract_subs(src_path)
with open(out_path, 'w') as fout:
for item in subs:
fout.write(item)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment