Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save glowinthedark/8860363c519b0f86470d63d420a38625 to your computer and use it in GitHub Desktop.
Save glowinthedark/8860363c519b0f86470d63d420a38625 to your computer and use it in GitHub Desktop.
Convert SRT subtitles between Chinese simplified and traditional
#!/usr/bin/env python3
import argparse
import sys
from pathlib import Path
import srt
from opencc import OpenCC
simp2trad = OpenCC('s2twp')
trad2simp = OpenCC('tw2sp')
def to_pinyin(chin):
return ' '.join([seg[0] for seg in pinyin(chin)])
if __name__ == '__main__':
parser = argparse.ArgumentParser(prog='SimpTradConv',
description='SRT Chinese simplified-traditional converter',
usage="""
Convert between Chinese simplified and traditional:
\t{0} subs-chinese-simp.srt -t
Custom output file name:
\t{0} subs-chinese-simp.srt -t -o custom-traditional.srt
""".format(Path(sys.argv[0]).name))
parser.add_argument('srt_file',
metavar='srt_file',
help='SRT-file')
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument('--simp-to-trad', '-t',
action='store_true',
default=False)
group.add_argument('--trad-to-simp', '-s',
action='store_true',
default=False)
parser.add_argument('--output-file', '-o',
default=None,
help='Output filename')
parser.add_argument('--encoding', '-e',
default=None,
help='Input file encoding')
args = parser.parse_args(sys.argv[1:])
print(args)
infile = args.srt_file
orig_path = Path(infile)
converted_subs = []
with orig_path.open(encoding=args.encoding or 'utf-8') as fi:
subs = srt.parse(fi)
sub: srt.Subtitle
for sub in subs:
orig_content = sub.content
if args.simp_to_trad:
sub.content = simp2trad.convert(orig_content)
suffix='trad'
elif args.trad_to_simp:
sub.content = trad2simp.convert(orig_content)
converted_subs.append(sub)
print(srt.compose(converted_subs))
if not args.output_file:
generated_srt_file = orig_path.parent / (f'{orig_path.stem}_{args.simp_to_trad and "TRAD" or "SIMP"}{orig_path.suffix}')
else:
generated_srt_file = Path(args.output_file)
with generated_srt_file.open(mode='w', encoding='utf-8') as fout:
fout.write(srt.compose(converted_subs))
print(f'Generated file: {generated_srt_file.absolute()}')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment