aarondill/subtitle.py

## subtitle.py
import argparse
from pathlib import Path
import subprocess
import json
from argparse import ArgumentParser
from datetime import datetime


def error(text: str):
    return print(f"\033[91mError: {text}\033[0m")


def warn(text: str):
    return print(f"\033[93mWarning: {text}\033[0m")


def extract_all_subtitles(file: Path):
    result = subprocess.run(["mkvmerge", "-J", file], capture_output=True, text=True)
    if result.returncode != 0:  # Check if the command was successful
        return error(f"cannot process file {file} {result.stderr}")

    # Parse the JSON response
    mkv_info = json.loads(result.stdout)
    mkv_info["tracks"] = sorted(
        [track for track in mkv_info["tracks"] if track["type"] == "subtitles"],
        key=lambda x: x["id"],
    )
    # sort tracks by track id (to ensure stable order)
    if len(mkv_info["tracks"]) == 0:
        return warn(f"no subtitles found in {file}")

    english_tracks = [
        track
        for track in mkv_info["tracks"]
        if track["properties"]["language"] == "eng"
    ]
    if len(english_tracks) == 0 and len(mkv_info["tracks"]) == 1:
        if "language" in mkv_info["tracks"][0]["properties"] and not (
            mkv_info["tracks"][0]["properties"]["language"] in ("eng", "und")
        ):  # The only track has a language property but it's not English or undefined
            return warn(f"no English track found in {file}")
        # If there is no English track and there is only one track, use the first track
        english_tracks = [mkv_info["tracks"][0]]
    elif len(english_tracks) == 0:
        english_tracks = [
            track
            for track in mkv_info["tracks"]
            if (not "language" in track["properties"])
            or track["properties"]["language"] == "und"
        ]

    track = english_tracks[0]
    # prefer flag_hearing_impaired if there's more than one english track
    if len(english_tracks) > 1:
        hearing_impaired = [
            track
            for track in english_tracks
            if "flag_hearing_impaired" in track["properties"]
            and track["properties"]["flag_hearing_impaired"] == True
        ] or [
            track
            for track in english_tracks
            if "track_name" in track["properties"]
            and "sdh" in track["properties"]["track_name"].casefold()
        ]
        if len(hearing_impaired) >= 1:
            track = hearing_impaired[0]
            if len(hearing_impaired) > 1:
                warn(
                    f"multiple hearing impaired tracks found. Using first track ({track['id']})"
                )
        else:  # If there's multiple English tracks, use the second track. Usually this is the hearing impaired track.
            track = english_tracks[1]
            warn(f"multiple English tracks found. Using second track ({track['id']})")
    elif len(english_tracks) == 0:
        return warn(f"no English track found in {file}")

    track_codec = track["codec"]
    track_language = track["properties"]["language"]
    print(
        f"Setting default: track {track['id']} codec: {track_codec} language: {track_language}"
    )
    subprocess.run(
        [
            "mkvpropedit",
            "--edit",
            f"track:={track['properties']['uid']}",
            "--set",
            "flag-default=1",
            file,
        ]
    )


if __name__ == "__main__":
    parser = ArgumentParser()
    parser.add_argument("filename", nargs="+")
    args = parser.parse_args()
    for i, file in enumerate(map(Path, args.filename)):
        print(f"\nfile {i+1}/{len(args.filename)}: {file}")
        extract_all_subtitles(file)
	import argparse
	from pathlib import Path
	import subprocess
	import json
	from argparse import ArgumentParser
	from datetime import datetime


	def error(text: str):
	return print(f"\033[91mError: {text}\033[0m")


	def warn(text: str):
	return print(f"\033[93mWarning: {text}\033[0m")


	def extract_all_subtitles(file: Path):
	result = subprocess.run(["mkvmerge", "-J", file], capture_output=True, text=True)
	if result.returncode != 0: # Check if the command was successful
	return error(f"cannot process file {file} {result.stderr}")

	# Parse the JSON response
	mkv_info = json.loads(result.stdout)
	mkv_info["tracks"] = sorted(
	[track for track in mkv_info["tracks"] if track["type"] == "subtitles"],
	key=lambda x: x["id"],
	)
	# sort tracks by track id (to ensure stable order)
	if len(mkv_info["tracks"]) == 0:
	return warn(f"no subtitles found in {file}")

	english_tracks = [
	track
	for track in mkv_info["tracks"]
	if track["properties"]["language"] == "eng"
	]
	if len(english_tracks) == 0 and len(mkv_info["tracks"]) == 1:
	if "language" in mkv_info["tracks"][0]["properties"] and not (
	mkv_info["tracks"][0]["properties"]["language"] in ("eng", "und")
	): # The only track has a language property but it's not English or undefined
	return warn(f"no English track found in {file}")
	# If there is no English track and there is only one track, use the first track
	english_tracks = [mkv_info["tracks"][0]]
	elif len(english_tracks) == 0:
	english_tracks = [
	track
	for track in mkv_info["tracks"]
	if (not "language" in track["properties"])
	or track["properties"]["language"] == "und"
	]

	track = english_tracks[0]
	# prefer flag_hearing_impaired if there's more than one english track
	if len(english_tracks) > 1:
	hearing_impaired = [
	track
	for track in english_tracks
	if "flag_hearing_impaired" in track["properties"]
	and track["properties"]["flag_hearing_impaired"] == True
	] or [
	track
	for track in english_tracks
	if "track_name" in track["properties"]
	and "sdh" in track["properties"]["track_name"].casefold()
	]
	if len(hearing_impaired) >= 1:
	track = hearing_impaired[0]
	if len(hearing_impaired) > 1:
	warn(
	f"multiple hearing impaired tracks found. Using first track ({track['id']})"
	)
	else: # If there's multiple English tracks, use the second track. Usually this is the hearing impaired track.
	track = english_tracks[1]
	warn(f"multiple English tracks found. Using second track ({track['id']})")
	elif len(english_tracks) == 0:
	return warn(f"no English track found in {file}")

	track_codec = track["codec"]
	track_language = track["properties"]["language"]
	print(
	f"Setting default: track {track['id']} codec: {track_codec} language: {track_language}"
	)
	subprocess.run(
	[
	"mkvpropedit",
	"--edit",
	f"track:={track['properties']['uid']}",
	"--set",
	"flag-default=1",
	file,
	]
	)


	if __name__ == "__main__":
	parser = ArgumentParser()
	parser.add_argument("filename", nargs="+")
	args = parser.parse_args()
	for i, file in enumerate(map(Path, args.filename)):
	print(f"\nfile {i+1}/{len(args.filename)}: {file}")
	extract_all_subtitles(file)