Skip to content

Instantly share code, notes, and snippets.

@kevinpostal
Last active April 10, 2018 21:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kevinpostal/bb2968f3100669091ae397126af78189 to your computer and use it in GitHub Desktop.
Save kevinpostal/bb2968f3100669091ae397126af78189 to your computer and use it in GitHub Desktop.
Attempt
import re
import itertools
import sys
from collections import Counter
from os import listdir
from os.path import isfile, join
filelist = []
fileset = set()
parse_number = lambda num: "%%0%sd" % len(num)
class LSS:
def __init__(self, location="."):
self.location = location
self.__process_filenames()
def __normalize_filename(self, filename):
return { "name": filename, "group": re.sub(r'\d+', '(\d+)', filename)}
def __process_filenames(self):
file_list = sorted(
[
self.__normalize_filename(f) for f in listdir(self.location)
if isfile(join(self.location, f))
],
key=lambda x: x['name'])
groups = itertools.groupby(file_list, lambda x: x['group'])
self.__process_groups(groups)
def __process_groups(self, groups):
for name, files in groups:
name = files.next()
fileNumbers = [
re.match(file['group'], file['name']).groups() for file in list(files)
]
if fileNumbers:
filelist.append({
"file": name.get("name"),
"group": name.get("group"),
"tuple": fileNumbers,
"digits": re.findall('\d+', name.get("name")),
"tuple_count": Counter(item[0] for item in fileNumbers),
"count": len(fileNumbers)
})
else:
filelist.append({"file": name.get("name"), "count": 1})
self.__process_occurrences(filelist)
def __process_occurrences(self, filelist):
for entry in filelist:
if entry.get("group"):
filename = entry.get("group")
fileconcat = entry.get("tuple")
for numbervalue, occurrences in entry.get("tuple_count").items():
if len(numbervalue) is 1:
format_number = lambda num: "%%0%sd" % len(num)
filename = entry.get("group")
digits = entry.get("digits")
for count, number_padding in enumerate(digits):
formated_file_name = re.sub(r'\(\\d.\)', numbervalue, filename, count=1)
final_name = re.sub(r'\(\\d.\)', number_padding, formated_file_name, count=1)
fileset.add((int(occurrences), final_name))
else:
filename = entry.get("file")
fileset.add((entry.get("count"), filename))
for count, filename in sorted(fileset, reverse=False, key=lambda tup: tup[1]):
print "%d %s" % (count, filename)
if __name__ == '__main__':
if len(sys.argv) > 1:
LSS(sys.argv[1])
else:
LSS()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment