renames files in given directory using a list of names
#!/usr/bin/env python3 | |
# coding=utf-8 | |
import difflib | |
import argparse | |
import io | |
import glob | |
import os | |
import itertools | |
import logging | |
import functools | |
VIDEO_EXTENSIONS = ('mpg', 'mpeg', 'mkv', 'avi') | |
FNAME_REPLACED_CHARS = (('_', ' '), ('-', ' ')) | |
MIN_SIMILARITY_SCORE = .7 | |
logging.basicConfig(level=logging.INFO) | |
def string_similarity(s1, s2): | |
return difflib.SequenceMatcher(None, s1, s2).ratio() | |
def vid_files_in_dir(path, exts=None): | |
exts = exts or VIDEO_EXTENSIONS | |
files = itertools.chain.from_iterable(glob.glob('%s/*.%s' % (path, one)) for one in exts) | |
return map(os.path.basename, files) | |
def replace_multi(s, pairs): | |
return functools.reduce(lambda x, kv: x.replace(*kv), pairs, s) | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser() | |
parser.add_argument('path', help='path to look for files') | |
parser.add_argument('names', help='path to file with new file names') | |
args = parser.parse_args() | |
path = os.path.abspath(args.path) | |
files = vid_files_in_dir(args.path) | |
new_names = io.open(args.names, encoding='utf8').read().splitlines() | |
#@TODO strip prefixes? (SxxEyy, ...)? | |
for f in files: | |
fname, ext = os.path.splitext(f) | |
fname = replace_multi(fname, FNAME_REPLACED_CHARS) # @TODO strip group, resolution and similar stuff? | |
scored = {name: string_similarity(fname, name) for name in new_names} | |
ok_scored = {k: v for k, v in scored.items() if v >= MIN_SIMILARITY_SCORE} | |
best = max(ok_scored.keys(), key=(lambda key: ok_scored[key])) if ok_scored else None | |
if not best: | |
logging.info('[-] No similar name found for "%s". Skipping.' % f) | |
continue | |
new_name = best+ext | |
q = '[*] Rename %s -> %s [y,n] ' % (f, new_name) | |
rename = input(q).lower().strip()[0] == 'y' | |
if rename: | |
old_namep, new_namep = os.path.join(path, f), os.path.join(path, new_name) | |
os.rename(old_namep, new_namep) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This comment has been minimized.
starenka % ./sweepy.py /tmp/pokus /tmp/pokus/columbo.txt
INFO:root:[-] No similar name found for "krize_identity.avi". Skipping.
INFO:root:[-] No similar name found for "Plany_poplach.avi". Skipping.
INFO:root:[-] No similar name found for "vrazda_na_videu.avi". Skipping.
[*] Rename dzungle_ve_sklenku.avi -> s02e02 Džungle ve skleníku.avi [y,n]