Skip to content

Instantly share code, notes, and snippets.

@nanaze
Last active December 13, 2015 22:39
Show Gist options
  • Save nanaze/4985938 to your computer and use it in GitHub Desktop.
Save nanaze/4985938 to your computer and use it in GitHub Desktop.
Copies files into a directory by SHA hash. This allows flat archival of files without worrying about filename clashes.
#!/usr/bin/env python
"""Utility to archive files by hash.
Gets the shasum of each file and copies the file to the specified dest_dir
with the hash inserted into the name. For example, IMG_011.JPG would become
IMG_011.ab98feb.JPG, etc This is useful for image files that might share the
same filename but have differing contents (such as after editing).
"""
__author__ = '(Nathan Naze)'
import logging
import argparse
import os
import sys
import subprocess
import shutil
def _GetArgsParser():
"""Get the options parser."""
parser = argparse.ArgumentParser(description='Copy into directory by path.')
parser.add_argument('--dest_dir')
parser.add_argument('paths', metavar='path', type=str, nargs='+',
help='Source paths to be copied.')
return parser
def _GetShaHash(path):
output = subprocess.check_output(['shasum', path])
return output.split()[0]
def _CopyToHashFile(src_path, dir):
sha_hash = _GetShaHash(src_path)
logging.info('File %s shasum: %s', src_path, sha_hash)
basename = os.path.basename(src_path)
root, ext = os.path.splitext(basename)
hash_filename = '%s.%s%s' % (root, sha_hash, ext)
dest = os.path.join(dir, hash_filename)
if os.path.exists(dest):
logging.info('File exists, skipping. %s', dest)
return
logging.info('Copying file. %s to %s', src_path, dest)
shutil.copy2(src_path, dest)
def main():
logging.basicConfig(format=(sys.argv[0] + ': %(message)s'),
level=logging.INFO)
parser = _GetArgsParser()
args = parser.parse_args()
dest_dir = args.dest_dir
if not dest_dir:
parser.print_help()
sys.exit(1)
return
dest_dir = os.path.expanduser(dest_dir)
if not os.path.isdir(dest_dir):
logging.error('Did not find destination directory %s', dest_dir)
parser.print_help()
sys.exit(1)
return
for path in args.paths:
path = os.path.expanduser(path)
_CopyToHashFile(path, dest_dir)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment