Skip to content

Instantly share code, notes, and snippets.

@augustomen
Created November 19, 2017 02:11
Show Gist options
  • Save augustomen/7309b747ab4e9c03d377ee9a97a25240 to your computer and use it in GitHub Desktop.
Save augustomen/7309b747ab4e9c03d377ee9a97a25240 to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
import re
import sys
NEWLINE = '\r\n'
TIME_STR = re.compile(r'\d{2}\:\d{2}\:\d{2}[\,\.]\d{0,3}\s*--\>\s*\d{2}\:\d{2}\:\d{2}[\,\.]\d{0,3}')
TAGS_PATTERN = re.compile(r'\<[^\>]*\>')
def strip_tags(text):
return TAGS_PATTERN.sub(u'', text or u'').strip()
def translate_srt(from_lang, to_lang, input_file, output):
import mtranslate
script = open(input_file, 'rb').read().split(NEWLINE + NEWLINE)
chunk1 = []
chunk2 = []
for i, item in enumerate(script):
lines = item.replace('\xe2\x99\xaa', '\'').split(NEWLINE)
chunk1.append(NEWLINE.join(lines[:2]))
chunk2.append(strip_tags('|'.join(lines[2:])))
if len(chunk1) >= 50 or i == len(script) - 1:
translated = mtranslate.translate(
' ||| '.join(chunk2), to_language=to_lang, from_language=from_lang)
translated = translated.encode('utf-8').split('|||')
# Try to fix
translated += ['...'] * (len(chunk1) - len(translated))
for i1, i2 in zip(chunk1, translated):
output.write(i1)
output.write(NEWLINE)
output.write(NEWLINE.join(s.strip() for s in i2.split('|')))
output.write(NEWLINE)
output.write(NEWLINE)
output.flush()
chunk1 = []
chunk2 = []
if __name__ == '__main__':
if len(sys.argv) != 4:
sys.stderr.write('''Usage:
python translate_srt.py <from_language> <to_language> input_file
''')
sys.exit(1)
translate_srt(sys.argv[1], sys.argv[2], sys.argv[3], sys.stdout)
sys.exit(0)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment