Last active
June 30, 2020 09:08
-
-
Save pansila/c5284eb84fc0e17fe84fae8aa879bd76 to your computer and use it in GitHub Desktop.
格式化并合并多个子句到一个整句。需要安装python3,运行方式:`python format_srt.py your_srt_file.srt`
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
import argparse | |
OUT_FILE = 'output.srt' | |
def run(srt_file): | |
LineList = [] | |
timestamp_line = False | |
with open(srt_file, 'r',encoding="utf-8") as srtfile: | |
for line in srtfile: | |
if len(line) > 4: | |
# remove audio only subtitles | |
line = line.replace('{\\an8}', '').replace('."', '".') | |
# remove audio only subtitles | |
if '[' in line and ']' in line: | |
continue | |
if '-->' in line: | |
# remove time range only subtitles | |
if timestamp_line: | |
LineList.pop(-1) | |
timestamp_line = True | |
else: | |
# merge lines that are broken up into multilines | |
if not timestamp_line: | |
LineList[-1] += ' ' + line.strip() | |
continue | |
timestamp_line = False | |
LineList.append(line.rstrip()) | |
results = [] | |
clauses = [] | |
lineNum = 1 | |
time_range_start = None | |
time_range_end = None | |
for line in LineList: | |
if ' --> ' in line: | |
start, end = line.split(' --> ') | |
if time_range_start is None: | |
time_range_start = start | |
time_range_end = end | |
continue | |
clauses.append(line) | |
if line.endswith('.') or line.endswith('?') or line.endswith('!'): | |
time_range = time_range_start + ' --> ' + time_range_end | |
time_range_start = None | |
time_range_end = None | |
results.append(str(lineNum)) | |
results.append(time_range) | |
results.append(' '.join(clauses)) | |
results.append('') | |
lineNum += 1 | |
clauses = [] | |
with open(OUT_FILE, 'w', encoding="utf-8") as srtfileout: | |
srtfileout.write('\n'.join(results)) | |
print('Formatted srt file is saved to "{}"'.format(OUT_FILE)) | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser() | |
parser.add_argument('srt_file', help='the srt file to format') | |
args = parser.parse_args() | |
run(args.srt_file) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment