Skip to content

Instantly share code, notes, and snippets.

@raspi
Last active October 1, 2018 21:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save raspi/4cca6f5d039ad41835eccc0594ea22b3 to your computer and use it in GitHub Desktop.
Save raspi/4cca6f5d039ad41835eccc0594ea22b3 to your computer and use it in GitHub Desktop.
Move files based on directory match list to matching directories
#!/bin/env/python
# -*- encoding: utf8 -*-
#
# Move files based on directory match list
#
# Example:
# /sorted directory has following directories:
# - foo
# - bar
#
# /tmp has following files and/or directories:
# - foo.mp4
# - bar
# - quux.txt
# - foo bar.dat
#
# When using /sorted as a match directory and /tmp as source, following happens:
# - /tmp/foo.mp4 is moved to /sorted/foo/foo.mp4
# - /tmp/bar is moved to /sorted/bar/bar
# - nothing happens with /tmp/quux.txt
# - /tmp/foo bar.dat is not moved because it has multiple matches (foo & bar)
#
# (c) Pekka Järvinen 2018-
import logging
import os
import sys
import re
import argparse
import datetime
import shutil
from pprint import pprint
__VERSION__ = "0.0.1"
__AUTHOR__ = u"Pekka Järvinen"
__YEAR__ = 2018
__DESCRIPTION__ = u"Move files and directories which matches --match-directory names from --directory directory to --match-directory"
__EPILOG__ = u"%(prog)s v{0} (c) {1} {2}-".format(__VERSION__, __AUTHOR__, __YEAR__)
__EXAMPLES__ = [
u'',
u'-' * 60,
u'Test what would happen:',
u' %(prog)s --dry-run --match-directory /home/user/sorted --directory /tmp/new',
u'Run:',
u' %(prog)s --verbose --match-directory /home/user/sorted --directory /tmp/new',
u'-' * 60,
]
class FullPaths(argparse.Action):
def __call__(self, parser, namespace, values, option_string=None):
setattr(namespace, self.dest, os.path.abspath(os.path.expanduser(values)))
def validate(dirname: str) -> str:
if not os.path.isdir(dirname):
raise argparse.ArgumentTypeError("'{}' is not a directory".format(dirname))
return dirname
class MultiplePaths(FullPaths):
"""Multiple paths as argument"""
_dirs = []
def __call__(self, parser, namespace, values, option_string=None):
if not isinstance(values, list):
raise argparse.ArgumentTypeError("invalid: {} {}".format(type(values), values))
for i in values:
self._dirs.append(os.path.abspath(os.path.expanduser(i)))
setattr(namespace, self.dest, self._dirs)
class RegEx(argparse.Action):
"""Regular expression as an argument"""
def __call__(self, parser, namespace, values, option_string=None):
setattr(namespace, self.dest, re.compile(values))
def validate(s:str) -> str:
try:
re.compile(s)
except re.error:
raise argparse.ArgumentTypeError("invalid regex: {}".format(s))
return s
def fixCamel(s:str) -> str:
"""
fix camel case to separate words
Example FooBar -> Foo Bar
"""
n = ""
TYPE_UNKNOWN = 0
TYPE_SPACE = 1
TYPE_DIGIT = 2
TYPE_ALPHA_LOWER = 3
TYPE_ALPHA_UPPER = 4
lastType = TYPE_UNKNOWN
currentType = TYPE_UNKNOWN
for idx,i in enumerate(s):
if i.isdigit():
currentType = TYPE_DIGIT
elif i.isalpha():
if i.islower():
currentType = TYPE_ALPHA_LOWER
elif i.isupper():
currentType = TYPE_ALPHA_UPPER
else:
currentType = TYPE_UNKNOWN
elif i in ["\t", " "]: # space
currentType = TYPE_SPACE
else:
currentType = TYPE_UNKNOWN
if idx == 0: # first
lastType = currentType
if lastType != currentType:
ok1 = lastType not in [TYPE_ALPHA_UPPER, TYPE_SPACE]
ok2 = currentType not in [TYPE_SPACE]
if ok1 and ok2:
# Add space
n += " "
lastType = currentType
n += i
return n
if __name__ == "__main__":
logging.basicConfig(
stream=sys.stdout,
level=logging.INFO,
format='%(asctime)s %(levelname)s: %(message)s',
datefmt="%H:%M:%S",
)
log = logging.getLogger(__name__)
parser = argparse.ArgumentParser(
description=__DESCRIPTION__,
epilog=__EPILOG__,
usage=os.linesep.join(__EXAMPLES__),
)
# More information
parser.add_argument('--verbose', '-v', action='count', required=False, default=0, dest='verbose',
help="Be verbose. -vvv..v Be more verbose.")
# Match directory
parser.add_argument('--match-directory', '-m', action=MultiplePaths, type=MultiplePaths.validate, dest='matchdirectory', required=True, nargs='+',
help='Use directory as matches')
# Directory to match
parser.add_argument('--directory', '-d', action=FullPaths, type=FullPaths.validate, dest='directory', required=True,
help='Directory where to find matches')
# Test run
parser.add_argument('--dry-run', action='store_true', default=False, dest='dry', required=False,
help='Dry run. Test what would happen.')
parser.add_argument('--space', '-s', action=RegEx, type=RegEx.validate, dest='space', required=False,
default=re.compile(r"\s*"), help=r"Space (' ') match regular expression in match directory names. For example: \s*")
parser.add_argument('--prefix', '-p', action=RegEx, type=RegEx.validate, dest='prefix', required=False,
default=re.compile(r"\b"), help=r"Prefix match regular expression in match directory names. For example: \b")
args = parser.parse_args()
if int(args.verbose) > 0:
logging.getLogger().setLevel(logging.DEBUG)
log.info("Being verbose")
if args.dry:
log.info("Dry run")
log.info("regex match: prefix:'{}' space:'{}'".format(args.prefix.pattern, args.space.pattern))
log.debug("match directories: {}".format(args.matchdirectory))
match_directories = []
# get match directories list
for mdir in args.matchdirectory:
for f in os.listdir(mdir):
path = os.path.join(mdir, f)
if not os.path.isdir(path):
continue
# create match
_match = r"{}{}\b".format(args.prefix.pattern, re.escape(f))
_match = _match.replace(r"\ ", args.space.pattern)
match = re.compile(_match, re.IGNORECASE | re.UNICODE)
match_directories.append({
"path": path,
"match": match,
})
log.debug("match '{}' added".format(match.pattern))
matched = {} # path used as key
for f in os.listdir(args.directory):
path = os.path.join(args.directory, f)
matcher = f
matcher = re.sub(r"\W", " ", matcher) # all non-alphanumericals to spaces
matcher = matcher.replace("_", " ")
matcher = fixCamel(matcher)
matcher = re.sub(r"\s\s+", " ", matcher) # extra spaces to one space
matcher = matcher.strip() # remove white space
for m in match_directories:
if m["match"].search(matcher):
log.debug("Match: '{}' --> '{}' (orig: '{}')".format(m["match"].pattern, matcher, f))
if path not in matched:
matched[path] = {
"name": f,
"matches": [],
}
matched[path]["matches"].append(m["path"])
# no matches found
if len(matched.keys()) == 0:
log.info("No matches found!")
sys.exit(0)
log.info("Moving files..")
# move files
for f in matched:
if len(matched[f]["matches"]) != 1:
continue
dest = os.path.join(matched[f]["matches"][0], matched[f]["name"])
while os.path.exists(dest):
now = datetime.datetime.now()
add = "{0}{1:02d}{2:02d} {3:02d}{4:02d}{5:02d}".format(now.year, now.month, now.day, now.hour, now.minute, now.second)
if os.path.isfile(dest):
log.warning("err: file '{}' exists! renaming..".format(dest))
a = list(os.path.splitext(matched[f]["name"]))
a[0] = "{} {}".format(a[0], add)
dest = os.path.join(matched[f]["matches"][0], "".join(a))
elif os.path.isdir(dest):
log.warning("err: directory '{}' exists! renaming..".format(dest))
dest = os.path.join(matched[f]["matches"][0], "{} {}".format(matched[f]["name"], add))
else:
log.critical("??")
sys.exit(1)
log.info("Moving '{}' -> '{}'".format(f, dest))
if not args.dry:
# Actually move file/directory to destination match directory
shutil.move(f, dest)
# tell about multiple matches
for f in matched:
if len(matched[f]["matches"]) != 1:
log.warning("err: file/dir '{}' has multiple matches! move it manually or rename it!".format(f))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment