Created
September 1, 2013 05:49
-
-
Save jwineinger/6402594 to your computer and use it in GitHub Desktop.
Just a script to move media files from one directory (recursively by default) into a different directory with date-based hierarchy. For JPG images, the EXIF data is read to determine the proper directory. If it cannot find the date via EXIF, then it falls back to the file's last modification time. It also uses the file's mtime for all other medi…
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os, argparse, hashlib | |
from datetime import datetime | |
from PIL import Image, ExifTags | |
from shutil import move | |
def modification_date(filename): | |
t = os.path.getmtime(filename) | |
return datetime.fromtimestamp(t) | |
def md5_for_file(f, block_size=2**20): | |
md5 = hashlib.md5() | |
while True: | |
data = f.read(block_size) | |
if not data: | |
break | |
md5.update(data) | |
return md5.digest() | |
EXIF_TAGS = {v:k for k,v in ExifTags.TAGS.iteritems()} | |
EXIF_DATE = EXIF_TAGS['DateTimeOriginal'] | |
class ImageError(Exception): pass | |
class MediaMover(object): | |
IMAGE_TYPES = frozenset({'.jpg', '.png'}) | |
MOVIE_TYPES = frozenset({'.mov', '.avi', '.mpg'}) | |
ALLOWED_TYPES = frozenset(IMAGE_TYPES | MOVIE_TYPES) | |
def __init__(self, src_dir, dest_dir, no_recurse, skip_rm_prompt): | |
self.src_dir = src_dir | |
self.dest_dir = dest_dir | |
self.no_recurse = no_recurse | |
self.skip_rm_prompt = skip_rm_prompt | |
def get_dest_by_exif(self, path): | |
try: | |
img = Image.open(path) | |
except IOError: | |
raise ImageError(path) | |
try: | |
exif_data = img._getexif() | |
except AttributeError: | |
return | |
if not exif_data: | |
return | |
orig_str = exif_data.get(EXIF_DATE) | |
if orig_str: | |
try: | |
dt = datetime.strptime(orig_str, "%Y:%m:%d %H:%M:%S") | |
return dt.strftime("%Y"), dt.strftime("%m"), dt.strftime("%d") | |
except ValueError: | |
pass | |
def get_dest_by_file_mtime(self, path): | |
dt = modification_date(path) | |
return dt.strftime("%Y"), dt.strftime("%m"), dt.strftime("%d") | |
def get_filepaths(self): | |
if self.no_recurse: | |
for name in os.listdir(self.src_dir): | |
path = os.path.join(self.src_dir, name) | |
if os.path.isfile(path): | |
yield path | |
else: | |
print "skipping non-file %s" % name | |
else: | |
for root, dirnames, filenames in os.walk(self.src_dir): | |
for name in filenames: | |
filepath = os.path.join(root, name) | |
yield filepath | |
def move_media(self): | |
for filepath in self.get_filepaths(): | |
filename = os.path.basename(filepath) | |
ext = os.path.splitext(filepath)[1].lower() | |
if ext not in self.ALLOWED_TYPES: | |
print "skipping", ext | |
continue | |
if ext in self.IMAGE_TYPES: | |
try: | |
dest_parts = self.get_dest_by_exif(filepath) | |
except ImageError: | |
print "%s may be corrupted, skipping" % filepath | |
continue | |
if not dest_parts: | |
dest_parts = self.get_dest_by_file_mtime(filepath) | |
elif ext in self.MOVIE_TYPES: | |
dest_parts = self.get_dest_by_file_mtime(filepath) | |
else: | |
raise Exception("huh? %s" % ext) | |
if not dest_parts: | |
print filepath, "couldn't get date" | |
continue | |
dest_path = os.path.join(self.dest_dir, *dest_parts) | |
if not os.path.isdir(dest_path): | |
if os.path.exists(dest_path): | |
raise Exception("Houston, we have a problem with %s" % dest_path) | |
os.makedirs(dest_path, 0755) | |
dest = os.path.join(dest_path, filename) | |
if os.path.exists(dest): | |
blocker_md5 = md5_for_file(open(dest, 'rb')) | |
mover_md5 = md5_for_file(open(filepath, 'rb')) | |
if blocker_md5 == mover_md5: | |
if not os.path.samefile(dest, filepath): | |
if self.skip_rm_prompt: | |
ans = 'y' | |
print "%s is a duplicate of %s" % (filepath, dest) | |
else: | |
ans = raw_input("%s is a duplicate of %s, remove it?" % (filepath, dest)) | |
if ans == 'y': | |
print "removing...", | |
os.unlink(filepath) | |
print "done" | |
else: | |
print "not removing" | |
continue | |
else: | |
raise Exception("%s already exists" % dest) | |
print "moving %s -> %s" % (filepath, dest) | |
move(filepath, dest) | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser() | |
parser.add_argument("src_dir") | |
parser.add_argument("dest_dir") | |
parser.add_argument("--no-recurse", action="store_true", help="Don't recurse into subdirectories of src_dir") | |
parser.add_argument("--skip-rm-prompt", action="store_true", help="Don't prompt before removing duplicates") | |
args = parser.parse_args() | |
MediaMover(**vars(args)).move_media() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment