Last active
December 13, 2015 22:39
-
-
Save nanaze/4985938 to your computer and use it in GitHub Desktop.
Copies files into a directory by SHA hash. This allows flat archival of files without worrying about filename clashes.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
"""Utility to archive files by hash. | |
Gets the shasum of each file and copies the file to the specified dest_dir | |
with the hash inserted into the name. For example, IMG_011.JPG would become | |
IMG_011.ab98feb.JPG, etc This is useful for image files that might share the | |
same filename but have differing contents (such as after editing). | |
""" | |
__author__ = '(Nathan Naze)' | |
import logging | |
import argparse | |
import os | |
import sys | |
import subprocess | |
import shutil | |
def _GetArgsParser(): | |
"""Get the options parser.""" | |
parser = argparse.ArgumentParser(description='Copy into directory by path.') | |
parser.add_argument('--dest_dir') | |
parser.add_argument('paths', metavar='path', type=str, nargs='+', | |
help='Source paths to be copied.') | |
return parser | |
def _GetShaHash(path): | |
output = subprocess.check_output(['shasum', path]) | |
return output.split()[0] | |
def _CopyToHashFile(src_path, dir): | |
sha_hash = _GetShaHash(src_path) | |
logging.info('File %s shasum: %s', src_path, sha_hash) | |
basename = os.path.basename(src_path) | |
root, ext = os.path.splitext(basename) | |
hash_filename = '%s.%s%s' % (root, sha_hash, ext) | |
dest = os.path.join(dir, hash_filename) | |
if os.path.exists(dest): | |
logging.info('File exists, skipping. %s', dest) | |
return | |
logging.info('Copying file. %s to %s', src_path, dest) | |
shutil.copy2(src_path, dest) | |
def main(): | |
logging.basicConfig(format=(sys.argv[0] + ': %(message)s'), | |
level=logging.INFO) | |
parser = _GetArgsParser() | |
args = parser.parse_args() | |
dest_dir = args.dest_dir | |
if not dest_dir: | |
parser.print_help() | |
sys.exit(1) | |
return | |
dest_dir = os.path.expanduser(dest_dir) | |
if not os.path.isdir(dest_dir): | |
logging.error('Did not find destination directory %s', dest_dir) | |
parser.print_help() | |
sys.exit(1) | |
return | |
for path in args.paths: | |
path = os.path.expanduser(path) | |
_CopyToHashFile(path, dest_dir) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment