Skip to content

Instantly share code, notes, and snippets.

@Pretz
Created May 5, 2011 02:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Pretz/956434 to your computer and use it in GitHub Desktop.
Save Pretz/956434 to your computer and use it in GitHub Desktop.
Find all tracks in an iTunes library xml file that don't exist. Run from your iTunes folder
import re, htmlentitydefs, os.path, urllib
##
# Removes HTML or XML character references and entities from a text string.
# Thank you Fredrik Lundh
# http://effbot.org/zone/re-sub.htm#unescape-html
#
# @param text The HTML (or XML) source text.
# @return The plain text, as a Unicode string, if necessary.
def unescape(text):
def fixup(m):
text = m.group(0)
if text[:2] == "&#":
# character reference
try:
if text[:3] == "&#x":
return unichr(int(text[3:-1], 16))
else:
return unichr(int(text[2:-1]))
except ValueError:
pass
else:
# named entity
try:
text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
except KeyError:
pass
return text # leave as is
return re.sub("&#?\w+;", fixup, text)
f = open("iTunes Music Library.xml", "r")
pattern = re.compile(r"<key>Location</key><string>file://localhost(.*)</string>")
for l in f:
m = pattern.search(l)
if m:
path = unescape(unicode(urllib.unquote(m.group(1)), 'utf-8'))
if not os.path.exists(path):
print path
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment