Skip to content

Instantly share code, notes, and snippets.

@curiousercreative
Created May 8, 2016 17:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save curiousercreative/6645963d0ef5dda8494425faaaae5c05 to your computer and use it in GitHub Desktop.
Save curiousercreative/6645963d0ef5dda8494425faaaae5c05 to your computer and use it in GitHub Desktop.
Script to load an iTunes Media Library.xml, find duplicate tracks, remove the track with a larger file size and ensure that the remaining track inherits the older track_id
import json
import os
import re
from xml.etree import ElementTree
def get_track_attr (track, attr_name):
# iterate over each attr to find the filename
attributes = list(track)
for a in range(len(attributes)):
attr = attributes[a]
if attr.text == attr_name:
return attributes[a+1].text
def set_track_attr (track, attr_name, value):
# iterate over each attr to find the filename
attributes = list(track)
for a in range(len(attributes)):
attr = attributes[a]
if attr.text == attr_name:
attributes[a+1].text = value
def are_tracks_duplicate (track, track2):
return track.get('track_id') != track2.get('track_id') and track.get('name') == track2.get('name') and track.get('artist') == track2.get('artist')
# dump the xml file into an ElementTree object
# TODO: make this a real path to your iTunes Media Library.xml
with open('somepath/itunes_library.xml', 'rw') as f:
tree = ElementTree.parse(f)
root = tree.getroot()
tracks_container = root.find('dict').find('dict')
tracks = list(tracks_container)
# init our lists so that we can separate modifying our tree from analyzing
nodes_to_remove = []
nodes_to_update = []
# Iterate over track list in reverse (because we'll be removing items)
for t in reversed(xrange(len(tracks))):
# only look at the dictionaries
if (tracks[t].tag == 'dict'):
track = {
'track_id': get_track_attr(tracks[t], 'Track ID'),
'filesize': get_track_attr(tracks[t], 'Size'),
'artist': get_track_attr(tracks[t], 'Artist'),
'name': get_track_attr(tracks[t], 'Name'),
}
print track.get('track_id')
# make sure this track hasn't already been flagged for removal
if track.get('track_id') not in nodes_to_remove:
print 'this track has not already been flagged for removal'
# check if track_id is different and name and total time match
for t2 in range(len(tracks)):
# only look at the dictionaries
if (tracks[t2].tag == 'dict'):
track2 = {
'track_id': get_track_attr(tracks[t2], 'Track ID'),
'filesize': get_track_attr(tracks[t2], 'Size'),
'artist': get_track_attr(tracks[t], 'Artist'),
'name': get_track_attr(tracks[t2], 'Name'),
}
# find duplicates
if (are_tracks_duplicate(track, track2)):
print 'found duplicate!'
# then compare sizes
if int(track.get('filesize')) <= int(track2.get('filesize')):
# the new one is smaller, remove the old one
nodes_to_remove.append(track2.get('track_id'))
# track id inheritance
if int(track.get('track_id')) > int(track2.get('track_id')):
# mark the new track to update id
nodes_to_update.append({'track_id': track.get('track_id'), 'new_track_id': track2.get('track_id')})
# remove from list
tracks.remove(tracks[t2])
else:
# mark the old track for removal
nodes_to_remove.append(track.get('track_id'))
# track id inheritance
if int(track.get('track_id')) < int(track2.get('track_id')):
# mark the new track to update id
nodes_to_update.append({'track_id': track2.get('track_id'), 'new_track_id': track.get('track_id')})
# remove from list
tracks.remove(tracks[t])
print len(nodes_to_remove)
break
# # save our list of track ids to delete
# with open('somepath/tracks_to_delete.json', 'wb') as outfile:
# json.dump(nodes_to_remove, outfile)
#
# # save our list of track dictionaries for swapping
# with open('somepath/tracks_to_update.json', 'wb') as outfile:
# json.dump(nodes_to_update, outfile)
# # load our track ids
# with open('somepath/tracks_to_delete.json', 'r') as outfile:
# nodes_to_remove = json.load(outfile)
#
# with open('somepath/tracks_to_update.json', 'r') as outfile:
# nodes_to_update = json.load(outfile)
# delete the old tracks
# reverse the list so we don't run into index out of bounds problems
# we modified tracks previously, let's start fresh
tracks = list(tracks_container)
for t in reversed(xrange(len(tracks))):
track = tracks[t]
# remove the track id key object
if track.tag == 'key' and track.text in nodes_to_remove:
print 'removing track key'
print track.text
# remove from xml
tracks.remove(track)
# remove the dict
elif track.tag == 'dict':
track_dict = {
'track_id': get_track_attr(track, 'Track ID'),
'location': get_track_attr(track, 'Location'),
}
if track_dict.get('track_id') in nodes_to_remove:
print 'removing from xml tree'
# remove from xml
tracks.remove(track)
# get the filenames
filepath = track_dict.get('location').replace('file://', '').replace('%20', ' ')
print 'deleting file:'
print filepath
try:
os.remove(filepath)
except (OSError, IOError) as e:
# ignore errors
pass
# update the nodes
for n in nodes_to_update:
print 'updating nodes'
track_id = n.get('track_id')
new_track_id = n.get('new_track_id')
for t in range(len(tracks)):
track = tracks[t]
if track.tag == 'key' and track.text == track_id:
print 'found key match'
track.text = new_track_id
elif track.tag == 'dict' and get_track_attr(track, 'Track ID') == track_id:
print 'found dict match'
set_track_attr(track, 'Track ID', new_track_id)
break
# update our xml tree with the list
tracks_container.clear()
tracks_container.extend(tracks)
# save the changes we've made
# TODO: fill in a file path for the modified library file
# tree.write('somepath/itunes_library-modified.xml')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment