Skip to content

Instantly share code, notes, and snippets.

@kylecronin
Created May 13, 2011 18:08
Show Gist options
  • Save kylecronin/971014 to your computer and use it in GitHub Desktop.
Save kylecronin/971014 to your computer and use it in GitHub Desktop.
Small one-off script I wrote to delete duplicate voice memos
import os
import hashlib
import re
files = {}
desiredname = re.compile("\d{8} \d{6}")
for filename in os.listdir("."):
h = hashlib.md5()
h.update(open(filename, 'r').read())
if h.hexdigest() in files:
files[h.hexdigest()] += [filename]
else:
files[h.hexdigest()] = [filename]
print "%(hash)s %(filename)s" % {'hash': h.hexdigest(), 'filename': filename}
print
for fhash in files:
if len(files[fhash]) > 1:
print "%s:" % fhash
newname = ""
for f in files[fhash]:
#print "\t%s" % f
if re.match(desiredname, f):
newname = f
if newname == "":
print "\tno acceptable name was found"
for f in files[fhash]:
print "\t%s" % f
#if re.match(desiredname, f):
# matches += 1
else:
for f in files[fhash]:
if newname != f:
os.remove(f)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment