Created
June 25, 2012 06:36
-
-
Save iolloyd/2987007 to your computer and use it in GitHub Desktop.
Find duplicate mp3 files in a folder
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import re | |
import hashlib | |
def tagAll(dirname): | |
files = os.listdir(dirname) | |
files = [x for x in files if not os.path.isdir(dirname + '/' + x)] | |
tagged = {} | |
dupes = [] | |
originals = [] | |
for f in files: | |
f = dirname + '/' + f | |
code = tag(f) | |
if not code in tagged: | |
tagged[code] = f | |
else: | |
dupes.append({'file' : f, 'matches' : tagged[code]}) | |
originals.append(f) | |
return {'originals': originals, 'dupes' : dupes } | |
def tag(filename): | |
f = open(filename, 'r') | |
f.read(16 * 8) | |
raw = f.read(16**2).encode('base64') | |
f.close() | |
tag = hashlib.sha224(raw).hexdigest() | |
return tag | |
""" | |
Change the following for your mp3 directory | |
""" | |
mp3s = '/Volumes/MyMp3s' | |
processed = tagAll(mp3s) | |
a,b = len(processed['originals']), len(processed['dupes']) | |
print 'originals -> %i' % a | |
print 'duplicates -> %i' % b | |
print 'total files -> %i' % (a + b) | |
for dup in processed['dupes']: | |
print dup |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment