Created
August 9, 2019 17:58
-
-
Save hengstchon/a117973861320e1d332afca1e2ea89ad to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
from bs4 import BeautifulSoup | |
if len(sys.argv) < 2: | |
print("Need file name!") | |
sys.exit(2) | |
with open(sys.argv[1]) as f: | |
s = f.read() | |
parse_time = lambda time: time.replace('.', ',') | |
def do_num(soup): | |
return str(int(p_tag['xml:id'][3:]) + 1) + '\n' | |
def do_time(soup): | |
begin = parse_time(p_tag['begin']) | |
end = parse_time(p_tag['end']) | |
return f'{begin} --> {end}\n' | |
def do_text(soup): | |
text = '' | |
for child in soup.find_all('tt:span'): | |
text += (child.string + '\n') | |
return text | |
srt = '' | |
soup = BeautifulSoup(s, 'lxml') | |
for div_tag in soup.find_all('tt:div'): | |
for p_tag in div_tag.find_all('tt:p'): | |
num = do_num(p_tag) | |
time = do_time(p_tag) | |
text = do_text(p_tag) | |
srt += (num + time + text + '\n') | |
with open(sys.argv[1][:-4] + ".srt", 'w') as f: | |
f.write(srt) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment