Skip to content

Instantly share code, notes, and snippets.

@hengstchon
Created August 9, 2019 17:58
Show Gist options
  • Save hengstchon/a117973861320e1d332afca1e2ea89ad to your computer and use it in GitHub Desktop.
Save hengstchon/a117973861320e1d332afca1e2ea89ad to your computer and use it in GitHub Desktop.
import sys
from bs4 import BeautifulSoup
if len(sys.argv) < 2:
print("Need file name!")
sys.exit(2)
with open(sys.argv[1]) as f:
s = f.read()
parse_time = lambda time: time.replace('.', ',')
def do_num(soup):
return str(int(p_tag['xml:id'][3:]) + 1) + '\n'
def do_time(soup):
begin = parse_time(p_tag['begin'])
end = parse_time(p_tag['end'])
return f'{begin} --> {end}\n'
def do_text(soup):
text = ''
for child in soup.find_all('tt:span'):
text += (child.string + '\n')
return text
srt = ''
soup = BeautifulSoup(s, 'lxml')
for div_tag in soup.find_all('tt:div'):
for p_tag in div_tag.find_all('tt:p'):
num = do_num(p_tag)
time = do_time(p_tag)
text = do_text(p_tag)
srt += (num + time + text + '\n')
with open(sys.argv[1][:-4] + ".srt", 'w') as f:
f.write(srt)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment