cyanidium/music_tag_corrector.py

## music_tag_corrector.py
#!/usr/bin/env python

"""
Corrects the tags of all music files in a given directory or listed in a given
file by fingerprinting the song and querying last.fm for the best match to that
fingerprint. If a directory and a file are given, uses the file. Doesn't correct
any tags to do with albums.
"""

#Requires apt-get install python-mutagen liblastfm-fingerprint1
#Requires pylast from http://code.google.com/p/pylast/
#Requires pylastfp from http://pypi.python.org/pypi/pylastfp/

#Last updated 16 Mar 2013
#cyanidium

import os
import os.path
import lastfp
import pylast
import mutagen
import time
from optparse import OptionParser
#For error handling
import httplib

#Default values
__API_KEY__ = "goes here"
__API_SECRET__ = "goes here"
__RELATIVE_CERTAINTY__ = 5
__RETRIES__ = 3
__SLEEP__ = 5
__MEDIA_PLAYER__ = "/usr/bin/totem"

#Parse options
parser = OptionParser(description=__doc__.strip())
parser.add_option("-w",
                  "--write-changes",
                  action="store_true",
                  dest="writable",
                  default=False,
                  help="write changes to music files")
parser.add_option("-d",
                  "--directory",
                  action="store",
                  type="string",
                  dest="music_dir",
                  default=False,
                  help="directory with music to correct",
                  metavar="DIR")
parser.add_option("-f",
                  "--file",
                  action="store",
                  type="string",
                  dest="files_list",
                  default=False,
                  help="file containing list of music files each on a new line",
                  metavar="FILE")
options, args = parser.parse_args()

def main():
    """
    Function loader.
    """

    #Start the correcting
    if ((options.music_dir and os.path.isdir(options.music_dir)) or
        (options.files_list and os.access(options.files_list, os.R_OK))):
        run_script()
    else:
        parser.print_help()
        print "Error: no music directory given"

def run_script():
    """
    Run the automatic and manual tag selectors in turn and provide output to
    monitor the progress.
    """

    file_paths = sorted(find_all_music_files())
    #skip = 0
    #file_paths = file_paths[skip:]
    manual_fix_list = dict()
    errors = []

    #Automatic tagging
    start_time = time.time()
    completed = 0.0
    total_files = len(file_paths)
    for file_path in file_paths:
        errors.append(tag_corrector(manual_fix_list, file_path))
        completed += 1
        percent = completed/total_files
        #Estimate the completion time based on how long it has taken so far
        print "ETC=%s (%05d/%05d) %s" % (time.strftime("%H:%M:%S",
                                                       time.localtime(start_time+
                                                                      (time.time()-start_time)/percent
                                                                      )
                                                       ),
                                         completed,
                                         total_files,
                                         file_path)
    end_time = time.time()

    #Manual tagging
    sorted(manual_fix_list)
    remaining = len(manual_fix_list)
    for file_path, fingerprint_matches in manual_fix_list.iteritems():
        print "Remaining: ", remaining
        errors.append(manually_choose_tags(file_path, fingerprint_matches))
        remaining -= 1

    #Warn of any problems encounted
    errors.sort()
    for error in errors:
        if error:
            print error

    #Nice information on runtime
    chours = int(end_time - start_time) / 3600
    cmins = int(end_time - start_time) % 3600 / 60
    csecs = int(end_time - start_time) % 3600 % 60
    print "Completed in %d hours, %d minutes and %d seconds" % (chours, cmins, csecs)

def find_all_music_files():
    """
    Creates a list of all music files either from a file or from a directory.
    File format is with each music file path on a new line, ideal for copying
    from the output of this script if there are errors.
    """

    file_paths = ()

    #Either get a list of files from the file given, or use the directory given
    if options.files_list:
        with open(options.files_list, 'r') as f:
            #Each file on a new line, strip the new line character
            for line in f:
                file_path = line.strip()
                if os.access(file_path, os.R_OK):
                    file_paths += file_path,
    elif options.music_dir:
        for root, dirs, files in os.walk(options.music_dir):
            for music_file in files:
                file_paths += os.path.join(root, music_file),

    return file_paths

def tag_corrector(manual_fix_list, file_path):
    """
    Main function.

    Generates last.fm fingerprints and fixes any incorrect tags.

    Completely ignores current tags.
    """

    #Handle all music types. easy is needed to nicely handle id3 tags in mp3s
    orig_tags = mutagen.File(file_path, easy=True)

    #Ignore non-music files, but still proceed with untagged music files
    if orig_tags is None:
        return False #Don't really care about these files

    #####Get the matches from the file#####
    #Retry incase of network issues
    for i in range(__RETRIES__):
        try:
            xml = lastfp.match_file(__API_KEY__, file_path)
            fingerprint_matches = lastfp.parse_metadata(xml)
            break
        #File may be corrupt or just too short to fingerprint
        except lastfp.ExtractionError:
            return "%s: no fingerprint" % (file_path)
        except lastfp.NotFoundError:
            return "%s: no results found" % (file_path)
        except (lastfp.CommunicationError,
                httplib.BadStatusLine,
                ):
            print "Connection lost on attempt %d, retrying" % (i+1)
            time.sleep(__SLEEP__)
    else:
        return "%s: connection problem" % (file_path)
    #For some reason, this is needed and is not caught by the try/except clauses above
    if not fingerprint_matches or len(fingerprint_matches) == 0:
        return "%s: no results found" % (file_path)

    #####Get the tags from the matches#####
    lastfm_tags = False
    #Sort by rank
    sorted(fingerprint_matches, key=lambda i: i['rank'], reverse=True)
    #Certain matches
    if len(fingerprint_matches) == 1 or fingerprint_matches[0]['rank'] == 1:
        lastfm_tags = fingerprint_matches[0]
    #Uncertain matches
    else:
        #Trim the insignificant matches from the end
        for i in range(1,len(fingerprint_matches)):
            if fingerprint_matches[i-1]['rank'] / fingerprint_matches[i]['rank'] > __RELATIVE_CERTAINTY__:
                del fingerprint_matches[i:]
                break
        #Trim the non-existant matches too
        for match in fingerprint_matches:
            #Retry incase of network issues
            for i in range(__RETRIES__):
                try:
                    #Setup last.fm connection
                    network = pylast.get_lastfm_network(api_key=__API_KEY__,
                                                        api_secret=__API_SECRET__)
                    tracksearch = network.search_for_track(match['artist'],
                                                           match['title']).get_total_result_count()
                    break
                except httplib.BadStatusLine:
                    print "Connection lost on attempt %d, retrying shortly" % (i+1)
                    time.sleep(__SLEEP__)
            else:
                return "%s: connection problem" % (file_path)
            if tracksearch == 0:
                fingerprint_matches.remove(match)

        #Check if we narrowed the field to only one choice
        if len(fingerprint_matches) == 1:
            lastfm_tags = fingerprint_matches[0]
        else:
        #Can't do it automatically, so defer for later human input
            manual_fix_list[file_path] = fingerprint_matches
            return False

    #####Write tags to files#####
    orig_tags['artist'] = [lastfm_tags['artist']]
    orig_tags['title'] = [lastfm_tags['title']]
    if options.writable:
        orig_tags.save()

    return False

def manually_choose_tags(file_path, fingerprint_matches):
    """
    Provide UI for manual selection of ambiguous music.
    """

    lastfm_tags = False
    while not lastfm_tags:
        #Print choices
        print "Choose which is the best match for the file:"
        print file_path
        for i, match in enumerate(fingerprint_matches):
            print "\t", i, \
                  "\tRank=", match['rank'], \
                  "\tTrackMBID=", match['track_mbid'], \
                  "\t", match['artist'], '-', match['title']
        print "\t", "p", "\t", "Play the file with Totem"
        print "\t", "s", "\t", "Skip the file"
        choice = raw_input("Your choice? ")

        #Parse choice
        if (choice.isdigit() and
            int(choice) in range(0,len(fingerprint_matches))):
            lastfm_tags = fingerprint_matches[int(choice)]
        elif choice == "p" or choice == "P":
            os.system("%s \"%s\"" % (__MEDIA_PLAYER__, file_path))
        elif choice == "s" or choice == "S":
            return "%s: skipped" % (file_path)

    #####Write tags to files#####
    orig_tags = mutagen.File(file_path, easy=True)
    orig_tags['artist'] = [lastfm_tags['artist']]
    orig_tags['title'] = [lastfm_tags['title']]
    if options.writable:
        orig_tags.save()

    #No errors
    return False

if __name__ == '__main__':
    main()
	#!/usr/bin/env python

	"""
	Corrects the tags of all music files in a given directory or listed in a given
	file by fingerprinting the song and querying last.fm for the best match to that
	fingerprint. If a directory and a file are given, uses the file. Doesn't correct
	any tags to do with albums.
	"""

	#Requires apt-get install python-mutagen liblastfm-fingerprint1
	#Requires pylast from http://code.google.com/p/pylast/
	#Requires pylastfp from http://pypi.python.org/pypi/pylastfp/

	#Last updated 16 Mar 2013
	#cyanidium

	import os
	import os.path
	import lastfp
	import pylast
	import mutagen
	import time
	from optparse import OptionParser
	#For error handling
	import httplib

	#Default values
	__API_KEY__ = "goes here"
	__API_SECRET__ = "goes here"
	__RELATIVE_CERTAINTY__ = 5
	__RETRIES__ = 3
	__SLEEP__ = 5
	__MEDIA_PLAYER__ = "/usr/bin/totem"

	#Parse options
	parser = OptionParser(description=__doc__.strip())
	parser.add_option("-w",
	"--write-changes",
	action="store_true",
	dest="writable",
	default=False,
	help="write changes to music files")
	parser.add_option("-d",
	"--directory",
	action="store",
	type="string",
	dest="music_dir",
	default=False,
	help="directory with music to correct",
	metavar="DIR")
	parser.add_option("-f",
	"--file",
	action="store",
	type="string",
	dest="files_list",
	default=False,
	help="file containing list of music files each on a new line",
	metavar="FILE")
	options, args = parser.parse_args()

	def main():
	"""
	Function loader.
	"""

	#Start the correcting
	if ((options.music_dir and os.path.isdir(options.music_dir)) or
	(options.files_list and os.access(options.files_list, os.R_OK))):
	run_script()
	else:
	parser.print_help()
	print "Error: no music directory given"

	def run_script():
	"""
	Run the automatic and manual tag selectors in turn and provide output to
	monitor the progress.
	"""

	file_paths = sorted(find_all_music_files())
	#skip = 0
	#file_paths = file_paths[skip:]
	manual_fix_list = dict()
	errors = []

	#Automatic tagging
	start_time = time.time()
	completed = 0.0
	total_files = len(file_paths)
	for file_path in file_paths:
	errors.append(tag_corrector(manual_fix_list, file_path))
	completed += 1
	percent = completed/total_files
	#Estimate the completion time based on how long it has taken so far
	print "ETC=%s (%05d/%05d) %s" % (time.strftime("%H:%M:%S",
	time.localtime(start_time+
	(time.time()-start_time)/percent
	)
	),
	completed,
	total_files,
	file_path)
	end_time = time.time()

	#Manual tagging
	sorted(manual_fix_list)
	remaining = len(manual_fix_list)
	for file_path, fingerprint_matches in manual_fix_list.iteritems():
	print "Remaining: ", remaining
	errors.append(manually_choose_tags(file_path, fingerprint_matches))
	remaining -= 1

	#Warn of any problems encounted
	errors.sort()
	for error in errors:
	if error:
	print error

	#Nice information on runtime
	chours = int(end_time - start_time) / 3600
	cmins = int(end_time - start_time) % 3600 / 60
	csecs = int(end_time - start_time) % 3600 % 60
	print "Completed in %d hours, %d minutes and %d seconds" % (chours, cmins, csecs)

	def find_all_music_files():
	"""
	Creates a list of all music files either from a file or from a directory.
	File format is with each music file path on a new line, ideal for copying
	from the output of this script if there are errors.
	"""

	file_paths = ()

	#Either get a list of files from the file given, or use the directory given
	if options.files_list:
	with open(options.files_list, 'r') as f:
	#Each file on a new line, strip the new line character
	for line in f:
	file_path = line.strip()
	if os.access(file_path, os.R_OK):
	file_paths += file_path,
	elif options.music_dir:
	for root, dirs, files in os.walk(options.music_dir):
	for music_file in files:
	file_paths += os.path.join(root, music_file),

	return file_paths

	def tag_corrector(manual_fix_list, file_path):
	"""
	Main function.

	Generates last.fm fingerprints and fixes any incorrect tags.

	Completely ignores current tags.
	"""

	#Handle all music types. easy is needed to nicely handle id3 tags in mp3s
	orig_tags = mutagen.File(file_path, easy=True)

	#Ignore non-music files, but still proceed with untagged music files
	if orig_tags is None:
	return False #Don't really care about these files

	#####Get the matches from the file#####
	#Retry incase of network issues
	for i in range(__RETRIES__):
	try:
	xml = lastfp.match_file(__API_KEY__, file_path)
	fingerprint_matches = lastfp.parse_metadata(xml)
	break
	#File may be corrupt or just too short to fingerprint
	except lastfp.ExtractionError:
	return "%s: no fingerprint" % (file_path)
	except lastfp.NotFoundError:
	return "%s: no results found" % (file_path)
	except (lastfp.CommunicationError,
	httplib.BadStatusLine,
	):
	print "Connection lost on attempt %d, retrying" % (i+1)
	time.sleep(__SLEEP__)
	else:
	return "%s: connection problem" % (file_path)
	#For some reason, this is needed and is not caught by the try/except clauses above
	if not fingerprint_matches or len(fingerprint_matches) == 0:
	return "%s: no results found" % (file_path)

	#####Get the tags from the matches#####
	lastfm_tags = False
	#Sort by rank
	sorted(fingerprint_matches, key=lambda i: i['rank'], reverse=True)
	#Certain matches
	if len(fingerprint_matches) == 1 or fingerprint_matches[0]['rank'] == 1:
	lastfm_tags = fingerprint_matches[0]
	#Uncertain matches
	else:
	#Trim the insignificant matches from the end
	for i in range(1,len(fingerprint_matches)):
	if fingerprint_matches[i-1]['rank'] / fingerprint_matches[i]['rank'] > __RELATIVE_CERTAINTY__:
	del fingerprint_matches[i:]
	break
	#Trim the non-existant matches too
	for match in fingerprint_matches:
	#Retry incase of network issues
	for i in range(__RETRIES__):
	try:
	#Setup last.fm connection
	network = pylast.get_lastfm_network(api_key=__API_KEY__,
	api_secret=__API_SECRET__)
	tracksearch = network.search_for_track(match['artist'],
	match['title']).get_total_result_count()
	break
	except httplib.BadStatusLine:
	print "Connection lost on attempt %d, retrying shortly" % (i+1)
	time.sleep(__SLEEP__)
	else:
	return "%s: connection problem" % (file_path)
	if tracksearch == 0:
	fingerprint_matches.remove(match)

	#Check if we narrowed the field to only one choice
	if len(fingerprint_matches) == 1:
	lastfm_tags = fingerprint_matches[0]
	else:
	#Can't do it automatically, so defer for later human input
	manual_fix_list[file_path] = fingerprint_matches
	return False

	#####Write tags to files#####
	orig_tags['artist'] = [lastfm_tags['artist']]
	orig_tags['title'] = [lastfm_tags['title']]
	if options.writable:
	orig_tags.save()

	return False

	def manually_choose_tags(file_path, fingerprint_matches):
	"""
	Provide UI for manual selection of ambiguous music.
	"""

	lastfm_tags = False
	while not lastfm_tags:
	#Print choices
	print "Choose which is the best match for the file:"
	print file_path
	for i, match in enumerate(fingerprint_matches):
	print "\t", i, \
	"\tRank=", match['rank'], \
	"\tTrackMBID=", match['track_mbid'], \
	"\t", match['artist'], '-', match['title']
	print "\t", "p", "\t", "Play the file with Totem"
	print "\t", "s", "\t", "Skip the file"
	choice = raw_input("Your choice? ")

	#Parse choice
	if (choice.isdigit() and
	int(choice) in range(0,len(fingerprint_matches))):
	lastfm_tags = fingerprint_matches[int(choice)]
	elif choice == "p" or choice == "P":
	os.system("%s \"%s\"" % (__MEDIA_PLAYER__, file_path))
	elif choice == "s" or choice == "S":
	return "%s: skipped" % (file_path)

	#####Write tags to files#####
	orig_tags = mutagen.File(file_path, easy=True)
	orig_tags['artist'] = [lastfm_tags['artist']]
	orig_tags['title'] = [lastfm_tags['title']]
	if options.writable:
	orig_tags.save()

	#No errors
	return False

	if __name__ == '__main__':
	main()