Skip to content

Instantly share code, notes, and snippets.

@dirkjonker
Created January 11, 2018 07:53
Show Gist options
  • Save dirkjonker/546fc67eb06e76142482d77569d21ebe to your computer and use it in GitHub Desktop.
Save dirkjonker/546fc67eb06e76142482d77569d21ebe to your computer and use it in GitHub Desktop.
deduplicate and organise photos
import datetime
import hashlib
import pathlib
import sys
import piexif
PIC_ROOT = pathlib.Path.home() / 'Pictures'
create_dirs = set()
mv_files = set()
new_files = set()
del_files = set()
error_files = set()
def gethash(p):
h = hashlib.new('md5')
h.update(p.open('rb').read())
return h.hexdigest()
def get_mtime(p):
return datetime.datetime.fromtimestamp(p.stat().st_mtime)
def get_exiftime(p):
i = piexif.load(p.as_posix())
d = i['Exif'][36867].decode()
return datetime.datetime.strptime(d, '%Y:%m:%d %H:%M:%S')
def get_file_date(p):
try:
return get_exiftime(p)
except:
return get_mtime(p)
def process(p):
d = get_file_date(p)
new_dir = PIC_ROOT / d.strftime('%Y/%m/%d')
if not new_dir.is_dir():
create_dirs.add(new_dir)
new_file = new_dir / p.name
if new_file.is_file() or new_file in new_files:
if gethash(new_file) == gethash(p):
del_files.add(p)
else:
error_files.add((p, new_file, 'identical name, different hash'))
else:
mv_files.add((p, new_file))
new_files.add(new_file)
def dry_run():
for d in sorted(create_dirs):
print('Would create directory {}'.format(d))
for orig, new in sorted(mv_files):
print('Would move {} to {}'.format(orig, new))
for f in sorted(del_files):
print('Would remove duplicate file {}'.format(f))
for source, target, reason in sorted(error_files):
print("error: {}: source {} target: {}".format(reason, source, target))
print('Total number of files to move:', len(mv_files))
print('Total number of files to delete:', len(del_files))
def main():
if len(sys.argv) != 2:
print('Please provide directory to scan for images')
exit(1)
search_root = pathlib.Path(sys.argv[1])
if not search_root.is_dir():
print('{} is not a directory'.format(search_root))
exit(1)
for pic in search_root.rglob('*.JPG'):
process(pic)
dry_run()
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment