Last active
February 7, 2023 18:22
-
-
Save glowinthedark/8860363c519b0f86470d63d420a38625 to your computer and use it in GitHub Desktop.
Convert SRT subtitles between Chinese simplified and traditional
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import argparse | |
import sys | |
from pathlib import Path | |
import srt | |
from opencc import OpenCC | |
simp2trad = OpenCC('s2twp') | |
trad2simp = OpenCC('tw2sp') | |
def to_pinyin(chin): | |
return ' '.join([seg[0] for seg in pinyin(chin)]) | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser(prog='SimpTradConv', | |
description='SRT Chinese simplified-traditional converter', | |
usage=""" | |
Convert between Chinese simplified and traditional: | |
\t{0} subs-chinese-simp.srt -t | |
Custom output file name: | |
\t{0} subs-chinese-simp.srt -t -o custom-traditional.srt | |
""".format(Path(sys.argv[0]).name)) | |
parser.add_argument('srt_file', | |
metavar='srt_file', | |
help='SRT-file') | |
group = parser.add_mutually_exclusive_group(required=True) | |
group.add_argument('--simp-to-trad', '-t', | |
action='store_true', | |
default=False) | |
group.add_argument('--trad-to-simp', '-s', | |
action='store_true', | |
default=False) | |
parser.add_argument('--output-file', '-o', | |
default=None, | |
help='Output filename') | |
parser.add_argument('--encoding', '-e', | |
default=None, | |
help='Input file encoding') | |
args = parser.parse_args(sys.argv[1:]) | |
print(args) | |
infile = args.srt_file | |
orig_path = Path(infile) | |
converted_subs = [] | |
with orig_path.open(encoding=args.encoding or 'utf-8') as fi: | |
subs = srt.parse(fi) | |
sub: srt.Subtitle | |
for sub in subs: | |
orig_content = sub.content | |
if args.simp_to_trad: | |
sub.content = simp2trad.convert(orig_content) | |
suffix='trad' | |
elif args.trad_to_simp: | |
sub.content = trad2simp.convert(orig_content) | |
converted_subs.append(sub) | |
print(srt.compose(converted_subs)) | |
if not args.output_file: | |
generated_srt_file = orig_path.parent / (f'{orig_path.stem}_{args.simp_to_trad and "TRAD" or "SIMP"}{orig_path.suffix}') | |
else: | |
generated_srt_file = Path(args.output_file) | |
with generated_srt_file.open(mode='w', encoding='utf-8') as fout: | |
fout.write(srt.compose(converted_subs)) | |
print(f'Generated file: {generated_srt_file.absolute()}') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment