Skip to content

Instantly share code, notes, and snippets.

@hletrd
Created December 23, 2018 19:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hletrd/4ce2aac566e7c0b0cadd2650c0ca2aa0 to your computer and use it in GitHub Desktop.
Save hletrd/4ce2aac566e7c0b0cadd2650c0ca2aa0 to your computer and use it in GitHub Desktop.
iTunes music library duplication remove & cleanup
path = './MUSIC'
path_out = './OUTPUT'
import os
from mutagen import easyid3, mp4, flac, mp3
from shutil import copy2
files = os.listdir(path)
try:
os.mkdir(os.path.join(path_out, 'failed'))
except:
pass
try:
os.mkdir(os.path.join(path_out, 'flac'))
except:
pass
try:
os.mkdir(os.path.join(path_out, 'selected'))
except:
pass
result = []
cnt = 0
for i in files:
extension = i.split('.')[-1].lower()
#print(i)
single = {}
try:
if extension == 'mp3':
music = easyid3.EasyID3(os.path.join(path, i))
music2 = mp3.MP3(os.path.join(path, i))
single['title'] = music['title'][0]
single['artist'] = music['artist'][0]
#single['album'] = music['album'][0]
single['filename'] = i
single['length'] = music2.info.length
result.append(single)
elif extension == 'm4a':
music = mp4.MP4(os.path.join(path, i))
single['title'] = music['©nam'][0] if '©nam' in music else '.....'
single['artist'] = music['©ART'][0] if '©ART' in music else '.....'
#single['album'] = music['©alb'][0] if '©alb' in music else '.....'
single['filename'] = i
single['length'] = music.info.length
result.append(single)
elif extension == 'flac':
music = flac.FLAC(os.path.join(path, i))
copy2(os.path.join(path, i), os.path.join(path_out, 'flac', i))
single['title'] = music['title'][0]
single['artist'] = music['artist'][0]
#single['album'] = music['album'][0]
except KeyError:
#pass
copy2(os.path.join(path, i), os.path.join(path_out, 'failed', i))
cnt += 1
if cnt % 100 == 0:
#break
print(cnt)
flag = [1] * len(result)
for i in range(len(result)-1):
for j in range(i+1, len(result)):
try:
if result[i]['title'] == result[j]['title'] and result[i]['artist'] == result[j]['artist']and result[i]['title'] != '' and result[i]['artist'] != '' and abs(result[i]['length'] - result[j]['length']) < 1.5:
#and result[i]['album'] == result[j]['album']
#and result[i]['album'] != ''
size_i = os.stat(os.path.join(path, result[i]['filename'])).st_size
size_j = os.stat(os.path.join(path, result[j]['filename'])).st_size
print(result[i]['title'], 'Duplicated!', result[i]['length'], result[j]['length'])
if size_i > size_j:
flag[j] = 0
else:
flag[i] = 0
except:
print(result[i], result[j])
for i in range(len(result)):
if flag[i]:
copy2(os.path.join(path, result[i]['filename']), os.path.join(path_out, 'selected', result[i]['filename']))
print('complete')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment