Skip to content

Instantly share code, notes, and snippets.

@flagranterror
Last active August 29, 2015 14:26
Show Gist options
  • Save flagranterror/8f9a9be24d958c98b79a to your computer and use it in GitHub Desktop.
Save flagranterror/8f9a9be24d958c98b79a to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
from __future__ import print_function
import os
import hashlib
import sys
# I use this to sort through images and movies. Keep RAM utilization low.
# http://stackoverflow.com/questions/3431825/generating-a-md5-checksum-of-a-file
def hashfile(afile, hasher, blocksize=65536):
buf = afile.read(blocksize)
while len(buf) > 0:
hasher.update(buf)
buf = afile.read(blocksize)
return hasher.hexdigest()
results = dict()
for root, dirs, files in os.walk(sys.argv[1]):
for f in files:
path = os.path.join(root, f)
# Hide with 2> /dev/null
print('Considering ' + f, file=sys.stderr)
filehash = hashfile(open(path, 'r'), hashlib.md5())
try:
results[filehash].append(path)
except:
results[filehash] = [ path ]
for d in results.iterkeys():
if len(results[d]) > 1:
results[d].sort(key=len)
# Remove original
results[d].pop(0)
# hide with 2> /dev/null
print(d + " -", file=sys.stderr)
for f in results[d]:
print("{}".format(f))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment