Skip to content

Instantly share code, notes, and snippets.

@Reboare
Created December 29, 2012 18:59
Show Gist options
  • Save Reboare/4408690 to your computer and use it in GitHub Desktop.
Save Reboare/4408690 to your computer and use it in GitHub Desktop.
Infer a movies attributes
'''
Infer a movies attributes by extracting data from the movie filename
@author: Josiah
'''
import re, datetime
from sys import argv, stdout
import logging
import os
logger = logging.getLogger(name = __name__)
VIDEO_CODECS = ("xvid", "x264", "h264", "divx")
RESOLUTIONS = ("720p", "1080p", "576p")
MEDIAS = ("DVDRip", "BRRip", "BluRay", "HDDvd", "HDTv", "BDRip", "VHS",
"Screener", "R5", "DVD")
GROUPS = ("ESiR", "REFiNED", "SEPTiC", "HDChina", "HiDt", "WiKi",
"HaB", "FraMeSToR", "LooKMaNe", "AMIABLE", "XTSF",
"NODLABS", "iNFAMOUS", "BLiND", "CHD", "DIMENSION",
"CiNEFiLE", "EbP", "SiNNERS", "SPARKS")
ATTRIBUTES = ("EC", "LIMITED", "", "NTSC", "Anniversary", "Edition", "Extended", )
def _search(filename, list_search):
"""search a filename for ocurrences of items contained in a given list
Arguments:
filename -- the movie filename
list-search -- the list to search
Return Values:
string -- the first ocurring string found"""
searched = [value for value in list_search
if value.lower() in filename.lower()]
length = len(searched) > 0
logger.debug("Attribute from %s matched %s" % (filename,
searched [0] if length else "nothing"))
return searched[0] if length else None
def _source(filename): return _search(filename, MEDIAS)
def _resolution(filename): return _search(filename, RESOLUTIONS)
def _codec(filename): return _search(filename, VIDEO_CODECS)
def _group(filename): return _search(filename, GROUPS)
def _year(filename):
"""returns any date information in a movie filename
Return Values:
int -- year in the range 1900 to current year + 1
"""
max_year = datetime.datetime.now().year + 1
matches = re.findall("[0-9]{4}(?![0-9])", filename)
matches = [int(year) for year in matches
if int(year)>=1900 and int(year)<=max_year]
if len(matches) == 0:
return None
result = matches[-1]
index = filename.rfind(str(result))
if not index > 0:
return None
logger.debug("Identified %s as being released in %i" % (filename, result))
return result
def _multi_part(filename):
"""parses a given filename and extracts part number or disc number
where appropriate
Arguments:
filename -- the movie filename
"""
#These need serious improvement
part = "[cC]+[dD]+[ ]*[0-9]{1}"
disk = "(?<![cC])[dD]+[]*[0-9]{1}"
disc_num = re.search(disk, filename)
part_num = re.search(part, filename)
disc_num = disc_num.group(0) if disc_num != None else None
part_num = part_num.group(0) if part_num != None else None
logger.debug("Identified %s as having disc number %s and part number %s" %
(filename, disc_num, part_num ))
return disc_num, part_num
def infer(filename):
"""break a given movie file name into parts
Arguments:
filename -- the movie title
Return Values:
dictionary -- title : the movie title
codec : the codec the file is encoded with
resolution : the given resolution e.g. 720p, 1080p
source : the movie source e.g. bluray, dvd
year : the movie release date
group : the encoder of the file
disc : the disc number for a multi-disc movie
part : the part number for a multi-part movie
"""
values = {"title": None,
"codec":_codec(filename),
"resolution":_resolution(filename),
"source":_source(filename),
"year":_year(filename),
"group": _group(filename),
"disc": None,
"part": None}
values['disc'], values['part'] = _multi_part(filename)
minimum_list = [val for val in values.itervalues() if val != None]
title = [filename]
for value in minimum_list:
title = filter(lambda x: x!="", title)
title = title[0].split(str(value))
title = title[0]
title = " ".join(re.split("[\.\- _]", title)).strip("() ./")
values['title'] = title
logger.info("Movie title identified as %s" % (title))
return values
def inferf(name, formatting = ("%s (%d)", "title, year")):
"""Hopefully at some point implemenet a proper format arguing.
This simply provides a shortcut to output a nicely formatted string form of infer"""
inferred = infer(name)
for key, value in inferred.iteritems():
locals()[key] = value
tup = tuple([locals()[each.strip()] for each in formatting[1].split(",")])
string = "{0} ({1})".format(inferred["title"] , inferred["year"])
return string
def splittriple(filepath):
"""Returns a triple of path, name, ext from a filepath"""
path, filejoin = os.path.split(filepath)
name, ext = os.path.splitext(filejoin)
return (path, name, ext)
def rename_file(filepath):
triplet = splittriple(filepath)
triplet[1] = format_infer(triplet[1])
destination = "".join(triplet)
os.rename(filepath, destination)
def main():
inferred = infer(argv[1])
for attribute in ["title", "year", "codec", "resolution",
"source", "disc", "part", "group"]:
if inferred[attribute] != None:
print "%s : %s" % (attribute, inferred[attribute])
if __name__ == "__main__":
import glob
for movie in glob.glob("G:\Videos\Movies\*.mkv"):
path, name, ext = splittriple(movie)
print inferf(name)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment