Skip to content

Instantly share code, notes, and snippets.

@carlos-aguayo
Last active August 29, 2015 14:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save carlos-aguayo/7b532d2f679694d1572f to your computer and use it in GitHub Desktop.
Save carlos-aguayo/7b532d2f679694d1572f to your computer and use it in GitHub Desktop.
Python - Remove duplicated files
import os, hashlib
from os import listdir
from os.path import isfile, join
mypath = "."
files = [ f for f in listdir(mypath) if isfile(join(mypath,f)) ]
hashes = set()
for f in files:
# h = hashlib.md5(open(f).read()).hexdigest()
h = hashfile(f)
if h in hashes:
print 'File ' + f + ' is duplicated'
os.remove(f)
else:
hashes.add(h)
def hashfile(path, blocksize = 65536):
afile = open(path, 'rb')
hasher = hashlib.md5()
buf = afile.read(blocksize)
while len(buf) > 0:
hasher.update(buf)
buf = afile.read(blocksize)
afile.close()
return hasher.hexdigest()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment