Created
February 5, 2025 04:53
-
-
Save aarondill/c31cafa75d766d1ee81747d73edcb8e5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
from pathlib import Path | |
import subprocess | |
import json | |
from argparse import ArgumentParser | |
from datetime import datetime | |
def error(text: str): | |
return print(f"\033[91mError: {text}\033[0m") | |
def warn(text: str): | |
return print(f"\033[93mWarning: {text}\033[0m") | |
def extract_all_subtitles(file: Path): | |
result = subprocess.run(["mkvmerge", "-J", file], capture_output=True, text=True) | |
if result.returncode != 0: # Check if the command was successful | |
return error(f"cannot process file {file} {result.stderr}") | |
# Parse the JSON response | |
mkv_info = json.loads(result.stdout) | |
mkv_info["tracks"] = sorted( | |
[track for track in mkv_info["tracks"] if track["type"] == "subtitles"], | |
key=lambda x: x["id"], | |
) | |
# sort tracks by track id (to ensure stable order) | |
if len(mkv_info["tracks"]) == 0: | |
return warn(f"no subtitles found in {file}") | |
english_tracks = [ | |
track | |
for track in mkv_info["tracks"] | |
if track["properties"]["language"] == "eng" | |
] | |
if len(english_tracks) == 0 and len(mkv_info["tracks"]) == 1: | |
if "language" in mkv_info["tracks"][0]["properties"] and not ( | |
mkv_info["tracks"][0]["properties"]["language"] in ("eng", "und") | |
): # The only track has a language property but it's not English or undefined | |
return warn(f"no English track found in {file}") | |
# If there is no English track and there is only one track, use the first track | |
english_tracks = [mkv_info["tracks"][0]] | |
elif len(english_tracks) == 0: | |
english_tracks = [ | |
track | |
for track in mkv_info["tracks"] | |
if (not "language" in track["properties"]) | |
or track["properties"]["language"] == "und" | |
] | |
track = english_tracks[0] | |
# prefer flag_hearing_impaired if there's more than one english track | |
if len(english_tracks) > 1: | |
hearing_impaired = [ | |
track | |
for track in english_tracks | |
if "flag_hearing_impaired" in track["properties"] | |
and track["properties"]["flag_hearing_impaired"] == True | |
] or [ | |
track | |
for track in english_tracks | |
if "track_name" in track["properties"] | |
and "sdh" in track["properties"]["track_name"].casefold() | |
] | |
if len(hearing_impaired) >= 1: | |
track = hearing_impaired[0] | |
if len(hearing_impaired) > 1: | |
warn( | |
f"multiple hearing impaired tracks found. Using first track ({track['id']})" | |
) | |
else: # If there's multiple English tracks, use the second track. Usually this is the hearing impaired track. | |
track = english_tracks[1] | |
warn(f"multiple English tracks found. Using second track ({track['id']})") | |
elif len(english_tracks) == 0: | |
return warn(f"no English track found in {file}") | |
track_codec = track["codec"] | |
track_language = track["properties"]["language"] | |
print( | |
f"Setting default: track {track['id']} codec: {track_codec} language: {track_language}" | |
) | |
subprocess.run( | |
[ | |
"mkvpropedit", | |
"--edit", | |
f"track:={track['properties']['uid']}", | |
"--set", | |
"flag-default=1", | |
file, | |
] | |
) | |
if __name__ == "__main__": | |
parser = ArgumentParser() | |
parser.add_argument("filename", nargs="+") | |
args = parser.parse_args() | |
for i, file in enumerate(map(Path, args.filename)): | |
print(f"\nfile {i+1}/{len(args.filename)}: {file}") | |
extract_all_subtitles(file) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment