Skip to content

Instantly share code, notes, and snippets.

@elundmark
Last active February 13, 2020 04:33
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save elundmark/fa3746b8b090289d6f4449b4541999c4 to your computer and use it in GitHub Desktop.
Save elundmark/fa3746b8b090289d6f4449b4541999c4 to your computer and use it in GitHub Desktop.
Magic Media Sorter - Sort your media correctly! (Tested in: Python v3.8+ on Linux)
#!/usr/bin/env python3.8
"""Magic Media Sorter - Sort your media correctly!"""
import argparse
import operator
import sys
import re
def main(imported=None, importedList=None):
"""main function"""
app = {
"name": "Magic Media Sorter",
"desc": "".join([
"Sorts lines of media or other strings by episode/track number - ",
"Reads stdin or files, files is sorted seperately while stdin is sorted together. ",
"You can also import this script and use it like this: ",
"`mms.main(['--import'], ['part10', 'part1'])`"
]),
"version": "0.6.3",
"updated": "2020-02-12",
"author": "elundmark@posteo.se",
"license": "MIT; https://www.opensource.org/licenses/MIT",
}
# Handle arguments and options
argparser = argparse.ArgumentParser(
prog=app["name"],
description=" ".join([
app["name"],
app["version"],
"(" + app["updated"],
"<" + app["author"] + ">)",
app["desc"],
]),
allow_abbrev=False
)
argparser.add_argument("-V", "--version", action="version", version="%(prog)s " + app["version"])
argparser.add_argument(
"-R", "--raw", action="store_true",
help="Output calculated strings for debugging (Default: %(default)s)"
)
argparser.add_argument(
"-S", "--no-sorting", action="store_true",
help="Skip sorting, useful when importing from scripts and you want raw strings (Default: %(default)s)"
)
argparser.add_argument(
"-i", "--import", action="store_true",
help="Use when you import from other python scripts (Default: %(default)s)"
)
argparser.add_argument(
"-I", "--case", action="store_true",
help="Case sensitive (Default: %(default)s)"
)
argparser.add_argument(
"-b", "--basename", action="store_true",
help="Only sort by / apply to basename (Default: %(default)s)"
)
argparser.add_argument(
"-r", "--reverse", action="store_true",
help="Reverse the results (Default: %(default)s)"
)
argparser.add_argument(
"-H", "--with-filename", action="store_true",
help="Print the file name for each match to stderr (Default: %(default)s)"
)
# treat rest arguments as files to be read
argparser.add_argument("files", nargs="*", type=argparse.FileType("r"))
# make arguments object
if imported and importedList:
opts = argparser.parse_args(imported)
else:
opts = argparser.parse_args()
# caching all regular expressions here
patterns = {
"any_spaces": re.compile(r"\s+"),
"numbers": re.compile(r"\d+"),
# anything that isn't alpha-numeric
"special_chars": re.compile(r"[ /|\t\r\a\f\v\^\[\](){}<>?!&#@£$§\\\"'`:;,._%*+=~\-]+"),
# match all whitespace except newlines
"multiple_spaces": re.compile(r"[ ]+"),
# these ranges can be ignored
"common_number_skips":
re.compile(r"((?<=[ ])|^)(10bit|240p|360p|480p|720p|1080p|1280p|1440p|2160p|[hx][. ]?26[45])([ ]|$)"),
# Standard S02E02 format, older format 1x04, (part) 7 of 10 | 5of5
"ep_format_1": re.compile(r"((?<=[ ])|^)s?(\d+)(?:[ex]| ?of ?)(\d+)([ ]|$)", flags=re.IGNORECASE),
# Long format Season 1 Episode 4
"ep_format_2": re.compile(r"((?<=[ ])|^)season ?(\d+) ?episode ?(\d+)([ ]|$)", flags=re.IGNORECASE),
# ignore "The X" and sort as "X"
"the_slug": re.compile(r"(^|[ ])the ", flags=re.IGNORECASE),
## Season - pages - track patterns (eg A1 B1 for LPs, p1 for images, S09 for whole seasons or episodes)
# keep the word character
"paged_num_1": re.compile(r"((?<=[ ])|^)(ep|[a-zA-Z])(\d+)([ ]|$)", flags=re.IGNORECASE),
# keep the word character
"paged_num_2": re.compile(r"((?<=[ ])|^)(\d+)([a-zA-Z])([ ]|$)", flags=re.IGNORECASE),
# common numbering chars that can be trimmed
"paged_num_3": re.compile(r"((?<=[ ])|^)(\d+)(?:st|nd|rd|th)([ ]|$)", flags=re.IGNORECASE),
# pad these ranges with zeroes
"num_ranges": re.compile(r"((?<=[ ])|^)(\d+)([ ]|$)"),
}
wordNumbers = {
"small": {
"zero": 0,
"one": 1,
"two": 2,
"three": 3,
"four": 4,
"five": 5,
"six": 6,
"seven": 7,
"eight": 8,
"nine": 9,
"ten": 10,
"eleven": 11,
"twelve": 12,
"thirteen": 13,
"fourteen": 14,
"fifteen": 15,
"sixteen": 16,
"seventeen":17,
"eighteen": 18,
"nineteen": 19,
"twenty": 20,
"thirty": 30,
"forty": 40,
"fifty": 50,
"sixty": 60,
"seventy": 70,
"eighty": 80,
"ninety": 90,
},
"magnitude": {
"thousand": 1000,
"million": 1000000,
"billion": 1000000000,
"trillion": 1000000000000,
"quadrillion":1000000000000000,
"quintillion":1000000000000000000,
"sextillion": 1000000000000000000000,
},
}
def removeNones(v):
"""Filter function for text2num"""
if v is None:
return False
return True
def text2num(s):
"""Translate words to numbers - inline replacment"""
# Code borrowed from https://stackoverflow.com/questions/11980087
# only I need it to replace inline and leave everything else as is
startPos = None
# using the re module keeps any spaces in the beginning or end
a = patterns["multiple_spaces"].split(str(s))
n = 0
g = 0
for i, w in enumerate(a):
x = None
if w == "":
continue
if w in wordNumbers["small"]:
x = wordNumbers["small"][w]
g = g + x
elif w == "hundred":
g = g * 100
else:
if w in wordNumbers["magnitude"]:
x = wordNumbers["magnitude"][w]
n = n + g * x
g = 0
else:
n = 0
g = 0
if startPos is not None:
# 1) we reached the last word number and need to remove any previous
# words we counted as numbers - keeping the current position
for y in range(startPos, i - 1):
a[y] = None
startPos = None
if x is not None:
# we had a number
a[i] = str(n + g)
if startPos is None:
# set the beginning of a number series
startPos = i
else:
# just a word
a[i] = w
# remove all the places we removed at 1)
a = filter(removeNones, a)
return " ".join(a)
def countWidestNumberSeries(s):
"""Get width of longest number series"""
w = 0
matches = patterns["numbers"].findall(s)
for match in matches:
w = max(w, len(str(match)))
return w
def sorter(a):
"""Nuts and bolts"""
# global in this scope
num_max_width = 0
# cache for looping
rlen = range(len(a))
def padNumbers(m):
"""Fix most common meaningful number series"""
# 1) To make sort work, we make every series of numbers
# on all lines, the same width by padding all
# numbers with zeroes until they are all of equal width
nums = str(m.group(2))
rest = str("" if not m.group(3) else m.group(3))
nums_len = len(nums)
while nums_len < num_max_width:
nums = "0" + nums
nums_len += 1
return nums + rest
for i in rlen:
# parse and replace item as a list [ original, parsed ]
parsed = ""
parsed = a[i] if not opts.basename else a[i].split("/")[-1]
if not opts.case:
parsed = parsed.lower()
parsed = patterns["special_chars"].sub(" ", parsed)
parsed = patterns["common_number_skips"].sub(r"", parsed)
parsed = patterns["ep_format_1"].sub(r"\2 \3\4", parsed)
parsed = patterns["ep_format_2"].sub(r"\2 \3\4", parsed)
parsed = patterns["the_slug"].sub(r"\1", parsed)
parsed = patterns["paged_num_1"].sub(r"\2 \3\4", parsed)
parsed = patterns["paged_num_2"].sub(r"\2 \3\4", parsed)
parsed = patterns["paged_num_3"].sub(r"\2\3", parsed)
parsed = text2num(parsed)
a[i] = [ a[i], parsed ]
for i in rlen:
# calculate width for padding now
num_max_width = max(num_max_width, countWidestNumberSeries(a[i][1]))
for i in rlen:
# pad the number series with zeroes
a[i] = [ a[i][0], patterns["num_ranges"].sub(padNumbers, a[i][1]) ]
# now we can sort, by key 1, using operator is apparently faster than using a function
if not opts.no_sorting:
a.sort(key=operator.itemgetter(1), reverse=bool(opts.reverse))
# select "parsed string" if --raw (debugging)
for i in rlen:
a[i] = a[i][1] if opts.raw else a[i][0]
# list of lists
return a
if imported:
return sorter(importedList)
if len(opts.files) == 0:
# no files arguments, read from stdin or as list
args = list()
for line in sys.stdin:
# rstrip only newline
args.append(line.rstrip("\n"))
print("\n".join(sorter(args)))
else:
# read lines from files as one big list
# remember: file is already opened and ready to be read
for f in opts.files:
# reset args list
args = list()
if opts.with_filename:
# mimic grep and echo a filename (to stderr)
print("\n" + f.name + "\n", file=sys.stderr)
for line in f:
# rstrip only newline
args.append(line.rstrip("\n"))
f.close()
print("\n".join(sorter(args)))
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment