Skip to content

Instantly share code, notes, and snippets.

@jpaulickcz
Forked from majora2007/removeNonEnglish.py
Last active February 10, 2021 10:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jpaulickcz/55e07c8d654feaf8618137716fc60bb5 to your computer and use it in GitHub Desktop.
Save jpaulickcz/55e07c8d654feaf8618137716fc60bb5 to your computer and use it in GitHub Desktop.
Removes foreign language audio and subtitle tracks from mkv files in bulk. Edited for disposable use on Linux.
#!/usr/bin/python
# Removes non-LANG audio tracks and subtitles from mkv files in a directory.
# Original script by greenbender at https://forum.videohelp.com/threads/343271-BULK-remove-non-English-tracks-from-MKV-container
# Modified by Joseph Milazzo for updated MkvMerge commands.
# Download
# wget https://gist.githubusercontent.com/jpaulickcz/55e07c8d654feaf8618137716fc60bb5/raw/removeNonEnglish.py && chmod +x removeNonEnglish.py && apt install mkvtoolnix -y
# Use
# ./removeNonEnglish.py $DIR
# Cleanup
# rm removeNonEnglish.py
import os
import re
import sys
import StringIO
import subprocess
import json
# Only this language will be kept
LANG = "eng"
# set this to the path for mkvmerge (apt install mkvtoolnix -y)
MKVMERGE = "/usr/bin/mkvmerge"
AUDIO_RE = re.compile(r"Track ID (\d+): audio \([A-Z0-9_/]+\) [number:\d+ uid:\d+ codec_id:[A-Z0-9_/]+ codec_private_length:\d+ language:([a-z]{3})")
SUBTITLE_RE = re.compile(r"Track ID (\d+): subtitles \([A-Z0-9_/]+\) [number:\d+ uid:\d+ codec_id:[A-Z0-9_/]+ codec_private_length:\d+ language:([a-z]{3})(?: track_name:\w*)? default_track:[01]{1} forced_track:([01]{1})")
if len(sys.argv) < 2:
print "Please supply an input directory"
sys.exit()
in_dir = sys.argv[1]
for root, dirs, files in os.walk(in_dir):
for f in files:
# filter out non mkv files
if not f.endswith(".mkv"):
continue
# path to file
path = os.path.join(root, f)
# build command line
cmd = [MKVMERGE, "-J", path]
# get mkv info
mkvmerge = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = mkvmerge.communicate()
if mkvmerge.returncode != 0:
print >> sys.stderr, "mkvmerge failed to identify", path
continue
# find audio and subtitle tracks
audio = []
subtitle = []
info_json = json.loads(stdout)
tracks = info_json['tracks']
for track in tracks:
track['properties']['id'] = track['id']
if track['type'] == 'audio':
audio.append(track)
elif track['type'] == 'subtitles':
subtitle.append(track)
# filter out files that don't need processing
if len(audio) < 2 and len(subtitle) < 2:
print >> sys.stderr, "nothing to do for", path
continue
# filter out tracks that don't match the language
audio_lang = filter(lambda a: a['properties']['language']==LANG, audio)
subtitle_lang = filter(lambda a: a['properties']['language']==LANG, subtitle)
# filter out files that don't need processing
if len(audio_lang) == 0 and len(subtitle_lang) == 0:
print >> sys.stderr, "no tracks with that language in", path
continue
# build command line
cmd = [MKVMERGE, "-o", path + ".temp"]
if len(audio_lang):
cmd += ["--audio-tracks", ",".join([str(a['id']) for a in audio_lang])]
for i in range(len(audio_lang)):
cmd += ["--default-track", ":".join([str(audio_lang[i]['id']), "0" if i else "1"])]
if len(subtitle_lang):
cmd += ["--subtitle-tracks", ",".join([str(s['id']) for s in subtitle_lang])]
for i in range(len(subtitle_lang)):
cmd += ["--default-track", ":".join([str(subtitle_lang[i]['id']), "0"])]
cmd += [path]
# process file
print >> sys.stderr, "Processing", path, "...",
mkvmerge = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = mkvmerge.communicate()
if mkvmerge.returncode != 0:
print >> sys.stderr, "Failed"
print(stdout)
continue
print >> sys.stderr, "Succeeded"
# overwrite file
os.remove(path) # Don't overwrite
os.rename(path + ".temp", path)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment