Skip to content

Instantly share code, notes, and snippets.

@oxixes
Last active January 20, 2022 18:10
Show Gist options
  • Save oxixes/ab1018b80a6812cc45fa4870eb208da7 to your computer and use it in GitHub Desktop.
Save oxixes/ab1018b80a6812cc45fa4870eb208da7 to your computer and use it in GitHub Desktop.

This is a script that converts all ASS subtitles in MKV files to SRT.

Usage: python main.py -p PATH_TO_VIDEO_FOLDER

Python 3.5 or above is required, and the script also requires mkvtoolnix binaries (mkvmerge and mkvextract) to be in PATH (a folder contaning them can also be specified with the -mkv or --mkvtoolnix-path flag).

Note: Install requirements.txt before running.

License: MIT

import sys
import argparse
import os
import subprocess
import json
import asstosrt
import shutil
def main():
parser = argparse.ArgumentParser(description="Python Script to convert ASS subtitles to SRT in MKV files.")
parser.add_argument('-p', '--path', help="Path to the folder where the MKV files are located (or MKV file).", required=True)
parser.add_argument('-o', '--output', help="Path to the output folder (or output MKV file). (Default: [current directory]/output)", default="output")
parser.add_argument('-s', '--subs-path', help="Path to the temporary folder where the subtitles are going to be stored. (Default: [current directory]/subs)", default="subs")
parser.add_argument('--no-delete', help="Keep the temporary subtitle folder after the script is finished.", action='store_true')
parser.add_argument('--keep-ass', help="Keep only ASS files in the temporary subtitle folder and delete converted ones.", action='store_true')
parser.add_argument('--keep-fonts', help="The resulting MKV files will contain fonts.", action='store_true')
parser.add_argument('-mkv', '--mkvtoolnix-path', help="Path to mkvtoolnix binary folder.", default="")
args = parser.parse_args()
files = []
if args.path.endswith('mkv'):
if not os.path.isfile(args.path):
print("Error: path is not a valid file")
quit()
else:
files.append(os.path.basename(args.path))
args.path = os.path.dirname(args.path)
else:
if not os.path.isdir(args.path):
print("Error: path is not a valid directory")
quit()
else:
files += os.listdir(args.path)
if args.mkvtoolnix_path != "" and not os.path.isdir(args.mkvtoolnix_path):
print("Error: mkvtoolnix path is not a valid directory")
quit()
subtitles = {}
exe_str = ""
if os.name == 'nt': # Check for Windows to add ".exe" at the end of the path of executables
exe_str += ".exe"
if not os.path.exists(args.subs_path):
os.makedirs(args.subs_path)
if (not args.output.endswith(".mkv") or len(files) > 1) and not os.path.exists(args.output):
os.makedirs(args.output)
for file in files:
if file.endswith(".mkv"):
print(f"Processing {file}...")
subtitles[file] = {"ass": [], "others": []}
result = subprocess.run(
[os.path.join(args.mkvtoolnix_path, f"mkvmerge{exe_str}"), "-J", os.path.join(args.path, file)],
shell=False, stdout=subprocess.PIPE)
raw_info = result.stdout.decode('utf-8')
json_info = json.loads(raw_info)
for track in json_info["tracks"]:
append_to = ""
if track["codec"] == "SubStationAlpha":
append_to = "ass"
elif track["type"] == "subtitles":
append_to = "others"
else:
continue
track_name = ""
if "track_name" in track["properties"]:
track_name = track["properties"]["track_name"]
subtitles[file][append_to].append((track["id"], track["properties"]["language"], track["properties"]["default_track"],
track["properties"]["forced_track"], track_name))
if append_to == "ass":
subtitle_filename = os.path.join(args.subs_path, os.path.splitext(file)[0] + "_" + str(track["id"]))
subprocess.run(
[os.path.join(args.mkvtoolnix_path, f"mkvextract{exe_str}"), os.path.join(args.path, file),
"tracks", str(track["id"]) + ":" + subtitle_filename + ".ass"],
shell=False, stdout=subprocess.DEVNULL)
ass_file = open(subtitle_filename + ".ass", "r", encoding="utf-8", errors="ignore")
srt_data = asstosrt.convert(ass_file)
ass_file.close()
srt_data = patch_duplicated_dialogues(srt_data)
srt_file = open(subtitle_filename + ".srt", "w+", encoding="utf-8")
srt_file.write(srt_data)
srt_file.close()
output_file = ""
if args.output.endswith(".mkv") and not len(files) > 1:
output_file = args.output
else:
output_file = os.path.join(args.output, file)
if os.path.exists(output_file):
output_file = os.path.join(os.path.dirname(output_file), os.path.splitext(os.path.basename(output_file))[0] + "_p.mkv")
command = [os.path.join(args.mkvtoolnix_path, f"mkvmerge{exe_str}"),
"-o", output_file, "-S", "-M", os.path.join(args.path, file)]
if args.keep_fonts:
command = [os.path.join(args.mkvtoolnix_path, f"mkvmerge{exe_str}"),
"-o", output_file, "-S", os.path.join(args.path, file)]
for track in subtitles[file]["ass"]:
subtitle_filename = os.path.join(args.subs_path, os.path.splitext(file)[0] + "_" + str(track[0]) + ".srt")
command += ["--language", "0:" + track[1]]
command += ["--default-track", "0:" + str(int(track[2]))]
command += ["--forced-track", "0:" + str(int(track[3]))]
if track[4] != "":
command += ["--track-name", "0:" + track[4]]
command += [subtitle_filename]
for track in subtitles[file]["others"]:
command += ["-A", "-D", "-B", "-M", "--no-chapters", "--no-global-tags"] # Remove everything that is not a subtitle track
command += ["-s", str(track[0])]
command += [os.path.join(args.path, file)]
subprocess.run(command, shell=False, stdout=subprocess.DEVNULL)
if not args.no_delete:
if not args.keep_ass:
shutil.rmtree(args.subs_path)
else:
for file in os.listdir(args.subs_path):
if file.endswith(".srt"):
os.remove(os.path.join(args.subs_path, file))
print("Done!")
def patch_duplicated_dialogues(srt_data):
data_dialogues = srt_data.split("\r\n\r\n")
data_dialogues = list(filter(None, data_dialogues)) # Remove empty strings from list
checked_parts = []
indexes = []
i = 0
for dialogue in data_dialogues:
dialogue_parts = dialogue.split("\r\n")
if len(dialogue_parts) < 3:
indexes.append(i)
i += 1
break
if (dialogue_parts[1], dialogue_parts[2]) in checked_parts:
indexes.append(i)
else:
checked_parts.append((dialogue_parts[1], dialogue_parts[2]))
i += 1
for index in sorted(indexes, reverse=True):
del data_dialogues[index]
str_patched = ""
i = 0
for dialogue in data_dialogues:
str_patched += dialogue
if i != len(data_dialogues) - 1:
str_patched += "\r\n\r\n"
i += 1
return str_patched
if __name__ == "__main__":
if not sys.version_info >= (3, 5):
print("Python 3.5 or above is required.")
quit()
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment