Skip to content

Instantly share code, notes, and snippets.

@jwineinger
Created September 1, 2013 05:49
Show Gist options
  • Save jwineinger/6402594 to your computer and use it in GitHub Desktop.
Save jwineinger/6402594 to your computer and use it in GitHub Desktop.
Just a script to move media files from one directory (recursively by default) into a different directory with date-based hierarchy. For JPG images, the EXIF data is read to determine the proper directory. If it cannot find the date via EXIF, then it falls back to the file's last modification time. It also uses the file's mtime for all other medi…
import os, argparse, hashlib
from datetime import datetime
from PIL import Image, ExifTags
from shutil import move
def modification_date(filename):
t = os.path.getmtime(filename)
return datetime.fromtimestamp(t)
def md5_for_file(f, block_size=2**20):
md5 = hashlib.md5()
while True:
data = f.read(block_size)
if not data:
break
md5.update(data)
return md5.digest()
EXIF_TAGS = {v:k for k,v in ExifTags.TAGS.iteritems()}
EXIF_DATE = EXIF_TAGS['DateTimeOriginal']
class ImageError(Exception): pass
class MediaMover(object):
IMAGE_TYPES = frozenset({'.jpg', '.png'})
MOVIE_TYPES = frozenset({'.mov', '.avi', '.mpg'})
ALLOWED_TYPES = frozenset(IMAGE_TYPES | MOVIE_TYPES)
def __init__(self, src_dir, dest_dir, no_recurse, skip_rm_prompt):
self.src_dir = src_dir
self.dest_dir = dest_dir
self.no_recurse = no_recurse
self.skip_rm_prompt = skip_rm_prompt
def get_dest_by_exif(self, path):
try:
img = Image.open(path)
except IOError:
raise ImageError(path)
try:
exif_data = img._getexif()
except AttributeError:
return
if not exif_data:
return
orig_str = exif_data.get(EXIF_DATE)
if orig_str:
try:
dt = datetime.strptime(orig_str, "%Y:%m:%d %H:%M:%S")
return dt.strftime("%Y"), dt.strftime("%m"), dt.strftime("%d")
except ValueError:
pass
def get_dest_by_file_mtime(self, path):
dt = modification_date(path)
return dt.strftime("%Y"), dt.strftime("%m"), dt.strftime("%d")
def get_filepaths(self):
if self.no_recurse:
for name in os.listdir(self.src_dir):
path = os.path.join(self.src_dir, name)
if os.path.isfile(path):
yield path
else:
print "skipping non-file %s" % name
else:
for root, dirnames, filenames in os.walk(self.src_dir):
for name in filenames:
filepath = os.path.join(root, name)
yield filepath
def move_media(self):
for filepath in self.get_filepaths():
filename = os.path.basename(filepath)
ext = os.path.splitext(filepath)[1].lower()
if ext not in self.ALLOWED_TYPES:
print "skipping", ext
continue
if ext in self.IMAGE_TYPES:
try:
dest_parts = self.get_dest_by_exif(filepath)
except ImageError:
print "%s may be corrupted, skipping" % filepath
continue
if not dest_parts:
dest_parts = self.get_dest_by_file_mtime(filepath)
elif ext in self.MOVIE_TYPES:
dest_parts = self.get_dest_by_file_mtime(filepath)
else:
raise Exception("huh? %s" % ext)
if not dest_parts:
print filepath, "couldn't get date"
continue
dest_path = os.path.join(self.dest_dir, *dest_parts)
if not os.path.isdir(dest_path):
if os.path.exists(dest_path):
raise Exception("Houston, we have a problem with %s" % dest_path)
os.makedirs(dest_path, 0755)
dest = os.path.join(dest_path, filename)
if os.path.exists(dest):
blocker_md5 = md5_for_file(open(dest, 'rb'))
mover_md5 = md5_for_file(open(filepath, 'rb'))
if blocker_md5 == mover_md5:
if not os.path.samefile(dest, filepath):
if self.skip_rm_prompt:
ans = 'y'
print "%s is a duplicate of %s" % (filepath, dest)
else:
ans = raw_input("%s is a duplicate of %s, remove it?" % (filepath, dest))
if ans == 'y':
print "removing...",
os.unlink(filepath)
print "done"
else:
print "not removing"
continue
else:
raise Exception("%s already exists" % dest)
print "moving %s -> %s" % (filepath, dest)
move(filepath, dest)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("src_dir")
parser.add_argument("dest_dir")
parser.add_argument("--no-recurse", action="store_true", help="Don't recurse into subdirectories of src_dir")
parser.add_argument("--skip-rm-prompt", action="store_true", help="Don't prompt before removing duplicates")
args = parser.parse_args()
MediaMover(**vars(args)).move_media()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment