Created
February 9, 2018 10:55
-
-
Save zottelbeyer/7d7eac2ae6cae9b601f0fb8692cdf278 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# Converts German Mediathek (ARD, ZDF, etc.) XML Subtitles into SRT Subtitle format. | |
# 2018-02-09 | |
# Version 0.0.1 | |
import re, sys | |
from bs4 import BeautifulSoup | |
if len(sys.argv) < 2: | |
print("Usage: subtitle_Mediathek_SRT.py <Subtitlefile>.xml") | |
sys.exit(2) | |
s = open(sys.argv[1]).read() | |
s = s.replace('<tt:br/>', '\n') | |
soup = BeautifulSoup(s, "lxml") | |
srt = '' | |
for p_tag in soup.find_all('tt:p', limit=100000): | |
# get ID | |
srt += str(int(p_tag.get('xml:id')[3:])+1) + '\n' | |
# find begin/endtime | |
srt += "%s --> %s" % (p_tag.get('begin'), p_tag.get('end')) | |
# replace double newline with single newline. | |
srt += p_tag.text.replace('\n\n','\n') | |
srt += '\n\n' | |
f = open(sys.argv[1][:-4]+".srt", 'w') | |
f.write(srt) | |
f.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment