Skip to content

Instantly share code, notes, and snippets.

@vofik
Created November 1, 2011 02:06
Show Gist options
  • Save vofik/1329651 to your computer and use it in GitHub Desktop.
Save vofik/1329651 to your computer and use it in GitHub Desktop.
Plex scanner/stacker improvements
#
# Copyright (c) 2010 Plex Development Team. All rights reserved.
#
import re, os, os.path
import Media, VideoFiles, Stack, Utils
SeriesScanner = __import__('Plex Series Scanner')
nice_match = '(.+) [\(\[]([1-2][0-9]{3})[\)\]]'
standalone_tv_regexs = [ '(.*?)( \(([0-9]+)\))? - ([0-9])+x([0-9]+)(-[0-9]+[Xx]([0-9]+))? - (.*)' ]
# Scans through files, and add to the media list.
def Scan(path, files, mediaList, subdirs, language=None, **kwargs):
# Scan for video files.
VideoFiles.Scan(path, files, mediaList, subdirs)
# Check for DVD rips.
paths = Utils.SplitPath(path)
video_ts = ContainsFile(files, 'video_ts.ifo')
if video_ts is None:
video_ts = ContainsFile(files, 'video_ts.bup')
if len(paths) >= 1 and len(paths[0]) > 0 and video_ts is not None:
print "Found a DVD"
name = year = None
# Now find the name.
if paths[-1].lower() == 'video_ts' and len(paths) >= 2:
# Easiest case.
(name, year) = VideoFiles.CleanName(paths[-2])
else:
# Work up until we find a viable candidate.
backwardsPaths = paths
backwardsPaths.reverse()
for p in backwardsPaths:
if re.match(nice_match, p):
(name, year) = VideoFiles.CleanName(p)
break
if name is None:
# Use the topmost path.
(name, year) = VideoFiles.CleanName(paths[0])
movie = Media.Movie(name, year)
# Add the video_ts file first.
movie.parts.append(video_ts)
biggestFile = None
biggestSize = 0
for i in files:
if os.path.splitext(i)[1].lower() == '.vob' and os.path.getsize(i) > biggestSize:
biggestSize = os.path.getsize(i)
biggestFile = i
# Add the biggest part so that we can get thumbnail/art/analysis from it.
if biggestFile is not None:
movie.parts.append(biggestFile)
if len(movie.parts) > 0:
movie.guid = checkNfoFile(movie.parts[0], 1)
mediaList.append(movie)
# Check for Bluray rips.
elif len(paths) >= 3 and paths[-1].lower() == 'stream' and paths[-2].lower() == 'bdmv':
(name, year) = VideoFiles.CleanName(paths[-3])
movie = Media.Movie(name, year)
for i in files:
movie.parts.append(i)
mediaList.append(movie)
else:
# Make movies!
for i in files:
file = os.path.basename(i)
(name, year) = VideoFiles.CleanName(os.path.splitext(file)[0])
# If it matches a TV show, don't scan it as a movie.
tv = False
for rx in SeriesScanner.episode_regexps[0:-1]:
if re.match(rx, name):
print "The file", file, "looked like a TV show so we're skipping it (", rx, ")"
tv = True
if tv == False:
# OK, it's a movie
movie = Media.Movie(name, year)
movie.source = VideoFiles.RetrieveSource(file)
movie.parts.append(i)
mediaList.append(movie)
# Stack the results.
Stack.Scan(path, files, mediaList, subdirs)
# Clean the folder name and try a match on the folder.
if len(path) > 0:
folderName = os.path.basename(path).replace(' ', ' ').replace(' ','.')
(cleanName, year) = VideoFiles.CleanName(folderName)
if len(mediaList) == 1 and re.match(nice_match, cleanName):
res = re.findall(nice_match, cleanName)
mediaList[0].name = res[0][0]
mediaList[0].year = res[0][1]
elif len(mediaList) == 1 and (len(cleanName) > 1 or year is not None):
mediaList[0].name = cleanName
mediaList[0].year = year
# Check for a folder with multiple 'CD' subfolders and massage
foundCDsubdirs = {}
for s in subdirs:
m = re.search(r'(?:cd|dvd|part|pt|disk|disc)[ \\.-]*([0-9]+)', os.path.basename(s).lower())
if m:
foundSubSubDirs = False
for subsubdir in os.listdir(s):
if os.path.isdir(os.path.join(s, subsubdir)):
subm = re.search(r'(?:cd|dvd|part|pt|disk|disc)[ \\.-]*([0-9]+)', subsubdir.lower())
if subm:
foundSubSubDirs = True
foundCDsubdirs[m.groups(1)[0] + '-' + subm.groups(1)[0]] = os.path.join(s, subsubdir)
if foundSubSubDirs == False:
foundCDsubdirs['0-' + m.groups(1)[0]] = s
# More than one cd subdir, let's stack and whack subdirs.
if len(foundCDsubdirs) > 1:
name, year = VideoFiles.CleanName(os.path.basename(path))
movie = Media.Movie(name, year)
movie.guid = checkNfoFile(os.path.dirname(foundCDsubdirs.values()[0]), 1)
keys = foundCDsubdirs.keys()
keys.sort()
for key in keys:
d = foundCDsubdirs[key]
subFiles = []
for f in os.listdir(d):
subFiles.append(os.path.join(d,f))
VideoFiles.Scan(d, subFiles, mediaList, [])
if foundSubSubDirs:
if os.path.dirname(d) in subdirs:
subdirs.remove(os.path.dirname(d))
else:
subdirs.remove(d)
movie.parts += subFiles
if len(movie.parts) > 0:
mediaList.append(movie)
# See if we can find a GUID.
for mediaItem in mediaList:
if mediaItem.guid is None:
mediaItem.guid = checkNfoFile(mediaItem.parts[0], len(mediaList))
if len(mediaList) == 1:
if mediaList[0].source is None:
mediaList[0].source = VideoFiles.RetrieveSource(path)
# If the subdirectories indicate that we're inside a DVD, when whack things other than audio and video.
whack = []
if 'video_ts' in [Utils.SplitPath(s)[-1].lower() for s in subdirs]:
for dir in subdirs:
d = os.path.basename(dir).lower()
if d not in ['video_ts', 'audio_ts']:
whack.append(dir)
# Finally, if any of the subdirectories match a TV show, don't enter!
for dir in subdirs:
for rx in standalone_tv_regexs:
res = re.findall(rx, dir)
if len(res):
whack.append(dir)
for w in whack:
subdirs.remove(w)
def ContainsFile(files, file):
for i in files:
if os.path.basename(i).lower() == file.lower():
return i
return None
def checkNfoFile(file, fileCount):
try:
path = None
# Depending on how many media files we have, check differently.
if fileCount == 1:
# Look for any NFO file.
for f in os.listdir(os.path.dirname(file)):
if f[-4:].lower() == '.nfo':
path = os.path.join(os.path.dirname(file), f)
break
else:
# Look for a sidecar NFO file.
path = os.path.splitext(file)[0] + '.nfo'
if path is not None and os.path.exists(path):
nfoText = open(path).read()
m = re.search('(tt[0-9]+)', nfoText)
if m:
return m.groups(1)[0]
except:
print "Warning, couldn't read NFO file."
return None
import Media, VideoFiles
import os.path, difflib
import re
def compareFilenames(elem):
return elem.parts[0].lower()
def Scan(dir, files, mediaList, subdirs):
# Go through the files and see if any of them need to be stacked.
stack_dict = {}
stackDiffs = r'[\da-n]' # These are the characters we are looking for being different across stackable filenames
stackSuffixes = r'(?:cd|dvd|part|pt|disk|disc|scene)\.?(?:\d+)?$'
scenePrefixes = r'(?:^scene.\d+|scene.\d+$)'
# Sort the mediaList by filename, so we can do our compares properly
mediaList[:] = sorted(mediaList, key=compareFilenames)
# check for monotonically increasing numeric or alphabetic filenames
count = 0
monotonicSeries = False
for mediaItem in mediaList[:-1]:
# if it didn't start as a monotonic series, it's not going to become one
if count > 0 and monotonicSeries == False:
break
# if items were already stacked by other method, skip this attempt
if hasattr(mediaItem, 'stacked') and mediaItem.stacked == True:
continue
m1 = mediaList[count]
m2 = mediaList[count + 1]
f1 = os.path.basename(os.path.splitext(m1.parts[0])[0]).strip().lower()
f2 = os.path.basename(os.path.splitext(m2.parts[0])[0]).strip().lower()
initialA = re.search(r'(^\d+)', f1)
initialB = re.search(r'(^\d+)', f2)
terminalA = re.search(r'(\d+)$', f1)
terminalB = re.search(r'(\d+)$', f2)
# if the filenames both start, or both end with a digit,
# and the digit of the second filename is 1 larger than the one of the first filename, it's a series
if(((initialA and initialB) and (int(initialA.group(0)) == int(initialB.group(0)) - 1)) or
((terminalA and terminalB) and (int(terminalA.group(0)) == int(terminalB.group(0)) - 1))):
monotonicSeries = True
# if the filenames both start, or both end with a letter,
# and the letter seems to the correct one for this iteration if we started from "a",
# and the letter of the second filename is 1 larger than the one of the first filename, it's a series
if(monotonicSeries == False):
initialA = re.search(r'(^[a-y])', f1)
initialB = re.search(r'(^[a-y])', f2)
terminalA = re.search(r'([a-y])$', f1)
terminalB = re.search(r'([a-y])$', f2)
if(((initialA and initialB) and (ord(initialA.group(0)) == ord('a') + count and ord(initialA.group(0)) == ord(initialB.group(0)) - 1)) or
((terminalA and terminalB) and (ord(terminalA.group(0)) == ord('a') + count and ord(terminalA.group(0)) == ord(terminalB.group(0)) - 1))):
monotonicSeries = True
if monotonicSeries:
m1.name = dir
root = '_monotonic'
m1.stacked = True
if stack_dict.has_key(root):
stack_dict[root].append(m2)
# only mark the second item as stacked on last iteration, otherwise it'll break out of the loop in the start
if count == len(mediaList) - 1:
m2.stacked = True
else:
stack_dict[root] = [m1]
stack_dict[root].append(m2)
count += 1
# group scene-based movie splits into a stack
for mediaItem in mediaList:
# if items were already stacked by other method, skip this attempt
if hasattr(mediaItem, 'stacked') and mediaItem.stacked == True:
continue
f1 = os.path.basename(os.path.splitext(mediaItem.parts[0])[0]).lower()
if re.match(scenePrefixes, f1):
(name, year) = VideoFiles.CleanName(re.sub(scenePrefixes, '', f1))
root = '_scene'
mediaItem.name = name
if stack_dict.has_key(root):
stack_dict[root].append(mediaItem)
mediaItem.stacked = True
else:
stack_dict[root] = [mediaItem]
mediaItem.stacked = True
# Search for prefix-based part names.
count = 0
for mediaItem in mediaList[:-1]:
m1 = mediaList[count]
m2 = mediaList[count + 1]
# if items were already stacked by other method, skip this attempt
if hasattr(m1, 'stacked') and m1.stacked == True:
continue
f1 = os.path.basename(m1.parts[0])
f2 = os.path.basename(m2.parts[0])
opcodes = difflib.SequenceMatcher(None, f1, f2).get_opcodes()
if len(opcodes) == 3: # We only have one transform
(tag, i1, i2, j1, j2) = opcodes[1]
if tag == 'replace': # The transform is a replace
if (i2-i1 <= 2) and (j2-j1 <= 2): # The transform is only one character
if re.search(stackDiffs, f1[i1:i2].lower()): # That one character is 1-4 or a-n
root = f1[:i1].strip(' _-')
xOfy = False
if f1[i1+1:].lower().strip().startswith('of'): #check to see if this an x of y style stack, if so flag it
xOfy = True
#prefix = f1[:i1] + f1[i2:]
#(root, ext) = os.path.splitext(prefix)
# This is a special case for folders with multiple Volumes of a series (not a stacked movie) [e.g, Kill Bill Vol 1 / 2]
if not root.lower().strip().endswith('vol') and not root.lower().strip().endswith('volume'):
# Strip any suffixes like CD, DVD.
foundSuffix = False
suffixMatch = re.search(stackSuffixes, root.lower().strip())
if suffixMatch:
root = root[0:-len(suffixMatch.group(0))].strip(' _-')
foundSuffix = True
if foundSuffix or xOfy:
# Replace the name, which probably had the suffix.
(name, year) = VideoFiles.CleanName(root)
# pdb.set_trace()
mediaItem.name = name
m1.stacked = True
if stack_dict.has_key(root):
stack_dict[root].append(m2)
# only mark the second item as stacked on last iteration, otherwise it'll break out of the loop in the start
if count == len(mediaList) - 1:
m2.stacked = True
else:
stack_dict[root] = [m1]
stack_dict[root].append(m2)
count += 1
# combine stacks if possible
count = 0
stacks = stack_dict.keys()
for stack in stacks[:-1]:
s1 = stacks[count]
s2 = stacks[count + 1]
opcodes = difflib.SequenceMatcher(None, s1, s2).get_opcodes()
if len(opcodes) == 2: # We only have one transform
(tag, i1, i2, j1, j2) = opcodes[1]
if tag == 'replace': # The transform is a replace
if (i2-i1 == 1) and (j2-j1 == 1): # The transform is only one character
if re.search(stackDiffs, s1): # That one character is 1-4 or a-n
root = s1.lower().strip()
suffixMatch = re.search(stackSuffixes, root)
if suffixMatch:
root = root[0:-len(suffixMatch.group(0))].strip(' -')
(name, year) = VideoFiles.CleanName(root)
# merge existing two stacks into new root
for oldstack in [s1, s2]:
for media in stack_dict[oldstack]:
media.name = name
if stack_dict.has_key(root):
for media in stack_dict[oldstack]:
stack_dict[root].append(media)
else:
stack_dict[root] = stack_dict[oldstack]
del stack_dict[oldstack]
count += 1
# Now combine stacked parts
for stack in stack_dict.keys():
for media in stack_dict[stack][1:]:
stack_dict[stack][0].parts.append(media.parts[0])
mediaList.remove(media)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment