Skip to content

Instantly share code, notes, and snippets.

@sandeepraju
Created April 27, 2013 13:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sandeepraju/5473139 to your computer and use it in GitHub Desktop.
Save sandeepraju/5473139 to your computer and use it in GitHub Desktop.
checksum
#!/usr/bin/env python
import os
import sys
import hashlib
import pprint
def getSHA1(path):
sha1 = hashlib.sha1()
with open(path, "rb") as f:
data = f.read(1024)
while data:
sha1.update(data)
data = f.read(1024)
f.close()
return sha1.hexdigest()
def findDuplicates(path):
fileHash = {}
for current, dirs, files in os.walk(path):
for f in files:
if os.path.splitext(f)[1].lower() in ['.mp3', '.wav', '.wma', '.ogg']:
print "Processing: %s" % os.path.join(current, f)
sha = getSHA1(os.path.join(current, f))
if fileHash.has_key(sha):
fileHash[sha]["files"].append(os.path.join(current, f))
else:
fileHash[sha] = { "files": [os.path.join(current, f)] }
# pp = pprint.PrettyPrinter(indent=2)
# pp.pprint(fileHash)
for sha1 in fileHash:
if len(fileHash[sha1]["files"]) > 1:
print "-"*80
print "[%s - %s - %s]" % (sha1, os.path.split(fileHash[sha1]["files"][0])[1], len(fileHash[sha1]["files"]))
for f in fileHash[sha1]["files"]:
print "\t%s" % f
print ""
for f in fileHash[sha1]["files"][1:]:
os.remove(f)
print "\tRemoved: %s" % f
def main():
if len(sys.argv) > 1:
for path in sys.argv[1:]:
findDuplicates(path)
else:
print "Error: Too few arguments!"
print "Usage: $python %s path/to/directory" % sys.argv[0]
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment