Skip to content

Instantly share code, notes, and snippets.

@aarondill
Created February 5, 2025 04:53
Show Gist options
  • Save aarondill/c31cafa75d766d1ee81747d73edcb8e5 to your computer and use it in GitHub Desktop.
Save aarondill/c31cafa75d766d1ee81747d73edcb8e5 to your computer and use it in GitHub Desktop.
import argparse
from pathlib import Path
import subprocess
import json
from argparse import ArgumentParser
from datetime import datetime
def error(text: str):
return print(f"\033[91mError: {text}\033[0m")
def warn(text: str):
return print(f"\033[93mWarning: {text}\033[0m")
def extract_all_subtitles(file: Path):
result = subprocess.run(["mkvmerge", "-J", file], capture_output=True, text=True)
if result.returncode != 0: # Check if the command was successful
return error(f"cannot process file {file} {result.stderr}")
# Parse the JSON response
mkv_info = json.loads(result.stdout)
mkv_info["tracks"] = sorted(
[track for track in mkv_info["tracks"] if track["type"] == "subtitles"],
key=lambda x: x["id"],
)
# sort tracks by track id (to ensure stable order)
if len(mkv_info["tracks"]) == 0:
return warn(f"no subtitles found in {file}")
english_tracks = [
track
for track in mkv_info["tracks"]
if track["properties"]["language"] == "eng"
]
if len(english_tracks) == 0 and len(mkv_info["tracks"]) == 1:
if "language" in mkv_info["tracks"][0]["properties"] and not (
mkv_info["tracks"][0]["properties"]["language"] in ("eng", "und")
): # The only track has a language property but it's not English or undefined
return warn(f"no English track found in {file}")
# If there is no English track and there is only one track, use the first track
english_tracks = [mkv_info["tracks"][0]]
elif len(english_tracks) == 0:
english_tracks = [
track
for track in mkv_info["tracks"]
if (not "language" in track["properties"])
or track["properties"]["language"] == "und"
]
track = english_tracks[0]
# prefer flag_hearing_impaired if there's more than one english track
if len(english_tracks) > 1:
hearing_impaired = [
track
for track in english_tracks
if "flag_hearing_impaired" in track["properties"]
and track["properties"]["flag_hearing_impaired"] == True
] or [
track
for track in english_tracks
if "track_name" in track["properties"]
and "sdh" in track["properties"]["track_name"].casefold()
]
if len(hearing_impaired) >= 1:
track = hearing_impaired[0]
if len(hearing_impaired) > 1:
warn(
f"multiple hearing impaired tracks found. Using first track ({track['id']})"
)
else: # If there's multiple English tracks, use the second track. Usually this is the hearing impaired track.
track = english_tracks[1]
warn(f"multiple English tracks found. Using second track ({track['id']})")
elif len(english_tracks) == 0:
return warn(f"no English track found in {file}")
track_codec = track["codec"]
track_language = track["properties"]["language"]
print(
f"Setting default: track {track['id']} codec: {track_codec} language: {track_language}"
)
subprocess.run(
[
"mkvpropedit",
"--edit",
f"track:={track['properties']['uid']}",
"--set",
"flag-default=1",
file,
]
)
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument("filename", nargs="+")
args = parser.parse_args()
for i, file in enumerate(map(Path, args.filename)):
print(f"\nfile {i+1}/{len(args.filename)}: {file}")
extract_all_subtitles(file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment