Skip to content

Instantly share code, notes, and snippets.

@CarstVaartjes
Last active August 12, 2017 12:30
Show Gist options
  • Save CarstVaartjes/bbeede5b4212aba72457e3dc32b86905 to your computer and use it in GitHub Desktop.
Save CarstVaartjes/bbeede5b4212aba72457e3dc32b86905 to your computer and use it in GitHub Desktop.
Deduplicate photo directories based on the basename + size
# pip install exifread
import os
import shutil
import exifread
def get_date(filename):
try:
with open(filename, 'rb') as fh:
tags = exifread.process_file(fh, stop_tag="Image DateTime")
dateTaken = tags["Image DateTime"]
dateTaken = dateTaken.values[:10].replace(':', '')
return dateTaken
except KeyError:
return 'unknown'
existing_size = {}
for dirpath, dirnames, filenames in os.walk("."):
for filename in [f for f in filenames if f[0] != '.' and
f[-4:].upper() in ['.JPG', '.GIF', '.PNG', '.MOV', '.AVI', 'RAW']]:
full_name = os.path.join(dirpath, filename)
print(full_name)
size = os.path.getsize(full_name)
datestamp = get_date(full_name)
try:
basename = filename.split(' ')[0].split('.')[0] + '_' + datestamp + '_' + str(size)
except UnicodeDecodeError:
continue
if basename not in existing_size:
existing_size[basename] = []
existing_size[basename].append(full_name)
existing_size = {k: v for k, v in existing_size.items() if len(v) > 1}
remove = False
for file_list in existing_size.values():
rem_list = file_list[:-1]
print(rem_list, file_list[-1])
if remove:
for filename in rem_list:
os.remove(filename)
# now for real:
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment