Skip to content

Instantly share code, notes, and snippets.

@blacktaxi
Created July 9, 2017 17:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save blacktaxi/2aacf407207341273f8d287497c51750 to your computer and use it in GitHub Desktop.
Save blacktaxi/2aacf407207341273f8d287497c51750 to your computer and use it in GitHub Desktop.
Cleaning up duplicates in Dropbox's Camera Uploads
#!/usr/bin/env python
import sys, os, re
from PIL import Image, ImageChops
def find(root, pattern):
for root, dns, fns in os.walk(root):
for fn in fns:
if re.match(pattern, fn):
yield (root, fn)
def imgequal(im1, im2):
return ImageChops.difference(im1, im2).getbbox() is None
def finddupes():
dupe_re = r'^(?P<name>.*?)-1\.jpg$'
for d, f in find('.', dupe_re):
dupe_path = os.path.join(d, f)
orig_path = os.path.join(d, re.match(dupe_re, f).groupdict()['name'] + '.jpg')
if os.path.isfile(orig_path) and imgequal(Image.open(dupe_path), Image.open(orig_path)):
yield dupe_path
if __name__ == '__main__':
if len(sys.argv) == 1:
for f in finddupes():
sys.stdout.write(f)
sys.stdout.write(os.linesep)
sys.stdout.flush()
else:
print '''USAGE:
This tool finds duplicate Camera Uploads in your Dropbox. These are characterised by:
1) Having a name that looks like 2020.12.34 56.78.12-1.jpg (see the '-1' at the end)
2) Having exactly the same pixel content
This script searches from the current dir and calculates image difference (with PIL),
then outputs duplicate paths (the '-1' names) for which there is no difference with
the original (the file without '-1', IF it exists).
To delete duplicate images you can use this script like so:
$ ./dbdupes.py | xargs -t -I_ rm _
(I know, xargs is a piece of shit)
'''
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment