Skip to content

Instantly share code, notes, and snippets.

@Zuescho
Forked from majora2007/removeNonEnglish.py
Last active November 5, 2022 21:29
Show Gist options
  • Save Zuescho/27494569e1508c891f861e3a5e50c570 to your computer and use it in GitHub Desktop.
Save Zuescho/27494569e1508c891f861e3a5e50c570 to your computer and use it in GitHub Desktop.
Remove foreign language audio and subtitle tracks from mkv files in bulk
#!/usr/bin/python3
# Removes non-LANG audio tracks and subtitles from mkv files in a directory.
# Original script by greenbender at https://forum.videohelp.com/threads/343271-BULK-remove-non-English-tracks-from-MKV-container
# Modified by Joseph Milazzo for updated MkvMerge commands. at https://gist.github.com/majora2007/724354d081627cfd96c24b8eefef4ec3
# 12/3/2021: Updated to Python 3.9 and made multilanguage by vidarak at https://gist.github.com/majora2007/724354d081627cfd96c24b8eefef4ec3?permalink_comment_id=4015518#gistcomment-4015518
# 2022-11-05: Added option to remove eng audio / Hardcoded path to folder because the input is always fucked.
import os
import re
import sys
import subprocess
import json
def print_to_stderr(*a):
print(*a, file = sys.stderr)
# Change LANG to what subs you want to keep
# Change LANG2 to what audio tracks you want to keep
LANG = ["eng","jpn"]
LANG2 = ["jpn"]
# set this to the path for mkvmerge
MKVMERGE = "C:\Program Files\MKVToolNix\mkvmerge.exe"
AUDIO_RE = re.compile(
r"Track ID (\d+): audio \([A-Z0-9_/]+\) [number:\d+ uid:\d+ codec_id:[A-Z0-9_/]+ codec_private_length:\d+ language:([a-z]{3})")
SUBTITLE_RE = re.compile(
r"Track ID (\d+): subtitles \([A-Z0-9_/]+\) [number:\d+ uid:\d+ codec_id:[A-Z0-9_/]+ codec_private_length:\d+ language:([a-z]{3})(?: track_name:\w*)? default_track:[01]{1} forced_track:([01]{1})")
# Hardcoded this to a folder because the input variable would not work, set this to the folder containing the files
in_dir = "C:\\Users\\Zuescho\\Documents\\Publii\\TestPy"
for root, dirs, files in os.walk(in_dir):
for f in files:
# filter out non mkv files
if not f.endswith(".mkv"):
continue
# path to file
path = os.path.join(root, f)
# build command line
cmd = [MKVMERGE, "-J", path]
# get mkv info
mkvmerge = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = mkvmerge.communicate()
if mkvmerge.returncode != 0:
print_to_stderr("mkvmerge failed to identify "+ path)
continue
# find audio and subtitle tracks
audio = []
subtitle = []
info_json = json.loads(stdout)
tracks = info_json['tracks']
for track in tracks:
track['properties']['id'] = track['id']
if track['type'] == 'audio':
audio.append(track)
elif track['type'] == 'subtitles':
subtitle.append(track)
# filter out files that don't need processing
if len(audio) < 2 and len(subtitle) < 2:
print_to_stderr("nothing to do for " + path)
continue
# filter out tracks that don't match the language
audio_lang = list(filter(lambda a: a['properties']['language'] in LANG2, audio))
subtitle_lang = list(filter(lambda a: a['properties']['language'] in LANG, subtitle))
# filter out files that don't need processing
if audio_lang == audio and subtitle_lang == subtitle:
print_to_stderr("nothing to do for " + path)
continue
# filter out files that don't need processing
if len(audio_lang) == 0 and len(subtitle_lang) == 0:
print_to_stderr("no tracks with that language in " + path)
continue
# build command line
cmd = [MKVMERGE, "-o", path + ".temp"]
if len(audio_lang):
cmd += ["--audio-tracks", ",".join([str(a['id']) for a in audio_lang])]
for i in range(len(audio_lang)):
cmd += ["--default-track", ":".join([str(audio_lang[i]['id']), "0" if i else "1"])]
if len(subtitle_lang):
cmd += ["--subtitle-tracks", ",".join([str(s['id']) for s in subtitle_lang])]
for i in range(len(subtitle_lang)):
cmd += ["--default-track", ":".join([str(subtitle_lang[i]['id']), "0"])]
cmd += [path]
# process file
print_to_stderr("Processing " + path + "...")
mkvmerge = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = mkvmerge.communicate()
if mkvmerge.returncode != 0:
print_to_stderr("Failed")
print(stdout)
continue
print_to_stderr("Succeeded")
# overwrite file
os.remove(path) # Don't overwrite
os.rename(path + ".temp", path)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment