Skip to content

Instantly share code, notes, and snippets.

@iolloyd
Created June 25, 2012 06:36
Show Gist options
  • Save iolloyd/2987007 to your computer and use it in GitHub Desktop.
Save iolloyd/2987007 to your computer and use it in GitHub Desktop.
Find duplicate mp3 files in a folder
import os
import re
import hashlib
def tagAll(dirname):
files = os.listdir(dirname)
files = [x for x in files if not os.path.isdir(dirname + '/' + x)]
tagged = {}
dupes = []
originals = []
for f in files:
f = dirname + '/' + f
code = tag(f)
if not code in tagged:
tagged[code] = f
else:
dupes.append({'file' : f, 'matches' : tagged[code]})
originals.append(f)
return {'originals': originals, 'dupes' : dupes }
def tag(filename):
f = open(filename, 'r')
f.read(16 * 8)
raw = f.read(16**2).encode('base64')
f.close()
tag = hashlib.sha224(raw).hexdigest()
return tag
"""
Change the following for your mp3 directory
"""
mp3s = '/Volumes/MyMp3s'
processed = tagAll(mp3s)
a,b = len(processed['originals']), len(processed['dupes'])
print 'originals -> %i' % a
print 'duplicates -> %i' % b
print 'total files -> %i' % (a + b)
for dup in processed['dupes']:
print dup
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment