Skip to content

Instantly share code, notes, and snippets.

@jacoor
Last active April 19, 2018 12:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jacoor/08c354bd01fa8d5f347c5cc16026409f to your computer and use it in GitHub Desktop.
Save jacoor/08c354bd01fa8d5f347c5cc16026409f to your computer and use it in GitHub Desktop.
duplicate photos cleaner. Cleans JPG if RAW of the same name is present in the same dir. Uncomment remove line, 27, otherwise it just lists files.
# remove duplicate files: JPGs when RAW is present
from pathlib import Path
import os
class ProcessFiles(object):
"""
Walks over current dir and subdirectories searching for JPG and RAW (PEF) files.
Prints out a list of duplicates.
"""
def __init__(self):
self.raws = {}
self.du = 0
raw_extension = "PEF"
jpg_extension = "jpg"
for path, subdirs, files in os.walk(Path()):
for name in files:
if name.lower().endswith(jpg_extension.lower()):
file_path = os.path.join(path, name)
raw_file = file_path[:-3] + raw_extension
if Path(raw_file).exists():
self.raws[file_path] = raw_file
self.du += Path(file_path).stat().st_size
for jpg, raw in self.raws.items():
# os.remove(jpg)
print(jpg)
print(len(self.raws))
print(self.humansize(self.du))
def humansize(self, nbytes):
suffixes = ['B', 'KB', 'MB', 'GB', 'TB', 'PB']
i = 0
while nbytes >= 1024 and i < len(suffixes) - 1:
nbytes /= 1024.
i += 1
f = ('%.2f' % nbytes).rstrip('0').rstrip('.')
return '%s %s' % (f, suffixes[i])
if __name__ == "__main__":
ProcessFiles()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment