Last active
October 1, 2018 21:19
-
-
Save raspi/4cca6f5d039ad41835eccc0594ea22b3 to your computer and use it in GitHub Desktop.
Move files based on directory match list to matching directories
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/env/python | |
# -*- encoding: utf8 -*- | |
# | |
# Move files based on directory match list | |
# | |
# Example: | |
# /sorted directory has following directories: | |
# - foo | |
# - bar | |
# | |
# /tmp has following files and/or directories: | |
# - foo.mp4 | |
# - bar | |
# - quux.txt | |
# - foo bar.dat | |
# | |
# When using /sorted as a match directory and /tmp as source, following happens: | |
# - /tmp/foo.mp4 is moved to /sorted/foo/foo.mp4 | |
# - /tmp/bar is moved to /sorted/bar/bar | |
# - nothing happens with /tmp/quux.txt | |
# - /tmp/foo bar.dat is not moved because it has multiple matches (foo & bar) | |
# | |
# (c) Pekka Järvinen 2018- | |
import logging | |
import os | |
import sys | |
import re | |
import argparse | |
import datetime | |
import shutil | |
from pprint import pprint | |
__VERSION__ = "0.0.1" | |
__AUTHOR__ = u"Pekka Järvinen" | |
__YEAR__ = 2018 | |
__DESCRIPTION__ = u"Move files and directories which matches --match-directory names from --directory directory to --match-directory" | |
__EPILOG__ = u"%(prog)s v{0} (c) {1} {2}-".format(__VERSION__, __AUTHOR__, __YEAR__) | |
__EXAMPLES__ = [ | |
u'', | |
u'-' * 60, | |
u'Test what would happen:', | |
u' %(prog)s --dry-run --match-directory /home/user/sorted --directory /tmp/new', | |
u'Run:', | |
u' %(prog)s --verbose --match-directory /home/user/sorted --directory /tmp/new', | |
u'-' * 60, | |
] | |
class FullPaths(argparse.Action): | |
def __call__(self, parser, namespace, values, option_string=None): | |
setattr(namespace, self.dest, os.path.abspath(os.path.expanduser(values))) | |
def validate(dirname: str) -> str: | |
if not os.path.isdir(dirname): | |
raise argparse.ArgumentTypeError("'{}' is not a directory".format(dirname)) | |
return dirname | |
class MultiplePaths(FullPaths): | |
"""Multiple paths as argument""" | |
_dirs = [] | |
def __call__(self, parser, namespace, values, option_string=None): | |
if not isinstance(values, list): | |
raise argparse.ArgumentTypeError("invalid: {} {}".format(type(values), values)) | |
for i in values: | |
self._dirs.append(os.path.abspath(os.path.expanduser(i))) | |
setattr(namespace, self.dest, self._dirs) | |
class RegEx(argparse.Action): | |
"""Regular expression as an argument""" | |
def __call__(self, parser, namespace, values, option_string=None): | |
setattr(namespace, self.dest, re.compile(values)) | |
def validate(s:str) -> str: | |
try: | |
re.compile(s) | |
except re.error: | |
raise argparse.ArgumentTypeError("invalid regex: {}".format(s)) | |
return s | |
def fixCamel(s:str) -> str: | |
""" | |
fix camel case to separate words | |
Example FooBar -> Foo Bar | |
""" | |
n = "" | |
TYPE_UNKNOWN = 0 | |
TYPE_SPACE = 1 | |
TYPE_DIGIT = 2 | |
TYPE_ALPHA_LOWER = 3 | |
TYPE_ALPHA_UPPER = 4 | |
lastType = TYPE_UNKNOWN | |
currentType = TYPE_UNKNOWN | |
for idx,i in enumerate(s): | |
if i.isdigit(): | |
currentType = TYPE_DIGIT | |
elif i.isalpha(): | |
if i.islower(): | |
currentType = TYPE_ALPHA_LOWER | |
elif i.isupper(): | |
currentType = TYPE_ALPHA_UPPER | |
else: | |
currentType = TYPE_UNKNOWN | |
elif i in ["\t", " "]: # space | |
currentType = TYPE_SPACE | |
else: | |
currentType = TYPE_UNKNOWN | |
if idx == 0: # first | |
lastType = currentType | |
if lastType != currentType: | |
ok1 = lastType not in [TYPE_ALPHA_UPPER, TYPE_SPACE] | |
ok2 = currentType not in [TYPE_SPACE] | |
if ok1 and ok2: | |
# Add space | |
n += " " | |
lastType = currentType | |
n += i | |
return n | |
if __name__ == "__main__": | |
logging.basicConfig( | |
stream=sys.stdout, | |
level=logging.INFO, | |
format='%(asctime)s %(levelname)s: %(message)s', | |
datefmt="%H:%M:%S", | |
) | |
log = logging.getLogger(__name__) | |
parser = argparse.ArgumentParser( | |
description=__DESCRIPTION__, | |
epilog=__EPILOG__, | |
usage=os.linesep.join(__EXAMPLES__), | |
) | |
# More information | |
parser.add_argument('--verbose', '-v', action='count', required=False, default=0, dest='verbose', | |
help="Be verbose. -vvv..v Be more verbose.") | |
# Match directory | |
parser.add_argument('--match-directory', '-m', action=MultiplePaths, type=MultiplePaths.validate, dest='matchdirectory', required=True, nargs='+', | |
help='Use directory as matches') | |
# Directory to match | |
parser.add_argument('--directory', '-d', action=FullPaths, type=FullPaths.validate, dest='directory', required=True, | |
help='Directory where to find matches') | |
# Test run | |
parser.add_argument('--dry-run', action='store_true', default=False, dest='dry', required=False, | |
help='Dry run. Test what would happen.') | |
parser.add_argument('--space', '-s', action=RegEx, type=RegEx.validate, dest='space', required=False, | |
default=re.compile(r"\s*"), help=r"Space (' ') match regular expression in match directory names. For example: \s*") | |
parser.add_argument('--prefix', '-p', action=RegEx, type=RegEx.validate, dest='prefix', required=False, | |
default=re.compile(r"\b"), help=r"Prefix match regular expression in match directory names. For example: \b") | |
args = parser.parse_args() | |
if int(args.verbose) > 0: | |
logging.getLogger().setLevel(logging.DEBUG) | |
log.info("Being verbose") | |
if args.dry: | |
log.info("Dry run") | |
log.info("regex match: prefix:'{}' space:'{}'".format(args.prefix.pattern, args.space.pattern)) | |
log.debug("match directories: {}".format(args.matchdirectory)) | |
match_directories = [] | |
# get match directories list | |
for mdir in args.matchdirectory: | |
for f in os.listdir(mdir): | |
path = os.path.join(mdir, f) | |
if not os.path.isdir(path): | |
continue | |
# create match | |
_match = r"{}{}\b".format(args.prefix.pattern, re.escape(f)) | |
_match = _match.replace(r"\ ", args.space.pattern) | |
match = re.compile(_match, re.IGNORECASE | re.UNICODE) | |
match_directories.append({ | |
"path": path, | |
"match": match, | |
}) | |
log.debug("match '{}' added".format(match.pattern)) | |
matched = {} # path used as key | |
for f in os.listdir(args.directory): | |
path = os.path.join(args.directory, f) | |
matcher = f | |
matcher = re.sub(r"\W", " ", matcher) # all non-alphanumericals to spaces | |
matcher = matcher.replace("_", " ") | |
matcher = fixCamel(matcher) | |
matcher = re.sub(r"\s\s+", " ", matcher) # extra spaces to one space | |
matcher = matcher.strip() # remove white space | |
for m in match_directories: | |
if m["match"].search(matcher): | |
log.debug("Match: '{}' --> '{}' (orig: '{}')".format(m["match"].pattern, matcher, f)) | |
if path not in matched: | |
matched[path] = { | |
"name": f, | |
"matches": [], | |
} | |
matched[path]["matches"].append(m["path"]) | |
# no matches found | |
if len(matched.keys()) == 0: | |
log.info("No matches found!") | |
sys.exit(0) | |
log.info("Moving files..") | |
# move files | |
for f in matched: | |
if len(matched[f]["matches"]) != 1: | |
continue | |
dest = os.path.join(matched[f]["matches"][0], matched[f]["name"]) | |
while os.path.exists(dest): | |
now = datetime.datetime.now() | |
add = "{0}{1:02d}{2:02d} {3:02d}{4:02d}{5:02d}".format(now.year, now.month, now.day, now.hour, now.minute, now.second) | |
if os.path.isfile(dest): | |
log.warning("err: file '{}' exists! renaming..".format(dest)) | |
a = list(os.path.splitext(matched[f]["name"])) | |
a[0] = "{} {}".format(a[0], add) | |
dest = os.path.join(matched[f]["matches"][0], "".join(a)) | |
elif os.path.isdir(dest): | |
log.warning("err: directory '{}' exists! renaming..".format(dest)) | |
dest = os.path.join(matched[f]["matches"][0], "{} {}".format(matched[f]["name"], add)) | |
else: | |
log.critical("??") | |
sys.exit(1) | |
log.info("Moving '{}' -> '{}'".format(f, dest)) | |
if not args.dry: | |
# Actually move file/directory to destination match directory | |
shutil.move(f, dest) | |
# tell about multiple matches | |
for f in matched: | |
if len(matched[f]["matches"]) != 1: | |
log.warning("err: file/dir '{}' has multiple matches! move it manually or rename it!".format(f)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment