Skip to content

Instantly share code, notes, and snippets.

@bbbradsmith
Last active June 10, 2023 18:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bbbradsmith/6814196ea48ff38cfe4ff17b64f4d82c to your computer and use it in GitHub Desktop.
Save bbbradsmith/6814196ea48ff38cfe4ff17b64f4d82c to your computer and use it in GitHub Desktop.
Plex subtitle file fixer and cleanup
#!/usr/bin/env python3
#
# This scrips attempts to find suitable subtitles in video collections,
# and will copy the best candidate into the same folder as the video,
# with the same filename as the video with the subtitle's extension.
#
# If a video already has a subtitle file in this place, it will not overwrite it.
#
# I can also delete directories and files as automatic cleanup.
#
# Subtitle candidates are currently ranked by these criteria, in order of priority:
# 1. Contains the video's base filename in its path or filename.
# 2. Contains a substring that may indicate a preferred language (e.g. "en").
# 3. Contains the most data. (Prefer more descriptive text.)
#
# This will operate on the current folder.
#
import os
import shutil
PREVIEW = False # True to prevent any changes to files.
CANDIDATES = False # True to show all ranked subtitle candidates.
STRICT_SIBLINGS = True # Require exact filename match for subtitles if there are multiple videos in the same folder.
CLEANUP = True # Remove some unneeded files after fixing subtitles.
# These lists must be lowercase.
VIDEO_EXT = (".mp4",".mkv",".avi",".flv",".mpeg",".mov",".m4v") # video file types
SUB_EXT = (".srt",".smi",".ssa",".ass",".vtt") # subtitle file types
LANG_RANK = ["en","eng","english"] # rank language preference by contained substrings, later=preferred
CLEAN_EXT = tuple(x.lower() for x in ( # remove files with these suffixes
".exe",".nfo",
"rarbg.txt",
"source.txt",
"www.YTS.MX.jpg",
"www.YTS.LT.jpg",
"www.YTS.RE.jpg",
"www.YTS.TO.jpg",
"YTSProxies.com.txt",
"YIFYStatus.com.txt",
"WWW.YIFY-TORRENTS.COM.jpg",
"[TGx]Downloaded from torrentgalaxy.to .txt",
"NEW upcoming releases by Xclusive.txt"
))
CLEAN_DIRS = () # remove directories with these exact names
#CLEAN_DIRS = ("subs") # This could remove "Subs" folders, but it seems better to keep them just in case.
def subfix(path,strict_siblings=False,preview=False,candidates=False):
# gather video list
videos = []
for (root,dirs,files) in os.walk(path):
for f in files:
if f.lower().endswith(VIDEO_EXT):
videos.append(os.path.join(root,f))
# for each video select best subtitle
for v in videos:
print("Video: %s" % v)
(vroot,vfile) = os.path.split(v)
(vbase,vext) = os.path.splitext(vfile)
vbasel = vbase.lower()
sibling = False
if strict_siblings: # check for siblings
count = 0
for f in os.listdir(vroot):
if f.lower().endswith(VIDEO_EXT):
count += 1
if (count > 1):
sibling = True
# gather all subs in the video folder or below
subs = []
for (sroot,sdirs,sfiles) in os.walk(vroot):
for f in sfiles:
if f.lower().endswith(SUB_EXT):
p = os.path.join(sroot,f)
if (not sibling) or (vbasel in p.lower()): # siblings require exact match
subs.append(p)
# rank the subs
subranks = []
for i in range(len(subs)):
s = subs[i]
sl = s.lower()
# first rank by containing the base name
rank_base = 0
if vbasel in sl:
rank_base = 1
# next rank by containing language string
rank_lang = 0
for l in range(len(LANG_RANK)):
if LANG_RANK[l] in sl:
rank_lang = l + 1
# next rank by filesize
rank_fs = os.path.getsize(s)
# build rank
subranks.append((rank_base,rank_lang,rank_fs))
# choose the best ranked
best_sub_index = 0
best_rank = (0,0,0)
for i in range(len(subs)):
r = subranks[i]
if r > best_rank:
best_sub_index = i
best_rank = r
# list candidates
if candidates:
for i in range(len(subs)):
s = subs[i]
r = subranks[i]
descrip = " best" if (i == best_sub_index) else ""
print(" ??? > %s (%d,%d,%d)%s" % tuple([s]+list(r)+[descrip]))
# apply if found
if len(subs) == 0:
print(" NONE.")
else:
s = subs[best_sub_index]
descrip = "---->"
if not preview:
(sbase,sext) = os.path.splitext(s)
of = os.path.join(vroot,vbase + sext)
if os.path.exists(of):
descrip = "SKIP>"
else:
try:
shutil.copyfile(s,of)
except:
descrip = "ERR!>"
print(" %s %s" % (descrip,s))
def cleanup(path,preview=False):
clean_files = []
clean_dirs = []
for (root,dirs,files) in os.walk(path):
for d in dirs:
if d.lower() in CLEAN_DIRS:
clean_dirs.append(os.path.join(root,d))
for f in files:
if f.lower().endswith(CLEAN_EXT):
clean_files.append(os.path.join(root,f))
if len(clean_dirs) > 0:
print("Cleanup directories:")
for d in clean_dirs:
print(" > %s" % d)
if not preview:
try:
shutil.rmtree(d)
except:
print(" >> ERROR!")
if len(clean_files) > 0:
print("Cleanup files:")
for f in clean_files:
print(" > %s" % f)
if not preview:
try:
os.remove(f)
except:
print(" >> ERROR!")
if (len(clean_dirs) + len(clean_files)) == 0:
print("Cleanup found nothing to remove.");
subfix(".",STRICT_SIBLINGS,PREVIEW,CANDIDATES)
cleanup(".",PREVIEW)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment