public
Created

Small one-off script I wrote to delete duplicate voice memos

  • Download Gist
dedupe.py
Python
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37
import os
import hashlib
import re
 
files = {}
desiredname = re.compile("\d{8} \d{6}")
 
for filename in os.listdir("."):
h = hashlib.md5()
h.update(open(filename, 'r').read())
if h.hexdigest() in files:
files[h.hexdigest()] += [filename]
else:
files[h.hexdigest()] = [filename]
print "%(hash)s %(filename)s" % {'hash': h.hexdigest(), 'filename': filename}
print
for fhash in files:
if len(files[fhash]) > 1:
print "%s:" % fhash
newname = ""
for f in files[fhash]:
#print "\t%s" % f
if re.match(desiredname, f):
newname = f
if newname == "":
print "\tno acceptable name was found"
for f in files[fhash]:
print "\t%s" % f
#if re.match(desiredname, f):
# matches += 1
else:
for f in files[fhash]:
if newname != f:
os.remove(f)

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.