Skip to content

Instantly share code, notes, and snippets.

@cyanidium
Created October 19, 2014 17:44
Show Gist options
  • Save cyanidium/40aa46fd4fd028ae057a to your computer and use it in GitHub Desktop.
Save cyanidium/40aa46fd4fd028ae057a to your computer and use it in GitHub Desktop.
Uses fingerprinting from last.fm to identify a music file and provide tags regardless of file name or current tags.
#!/usr/bin/env python
"""
Corrects the tags of all music files in a given directory or listed in a given
file by fingerprinting the song and querying last.fm for the best match to that
fingerprint. If a directory and a file are given, uses the file. Doesn't correct
any tags to do with albums.
"""
#Requires apt-get install python-mutagen liblastfm-fingerprint1
#Requires pylast from http://code.google.com/p/pylast/
#Requires pylastfp from http://pypi.python.org/pypi/pylastfp/
#Last updated 16 Mar 2013
#cyanidium
import os
import os.path
import lastfp
import pylast
import mutagen
import time
from optparse import OptionParser
#For error handling
import httplib
#Default values
__API_KEY__ = "goes here"
__API_SECRET__ = "goes here"
__RELATIVE_CERTAINTY__ = 5
__RETRIES__ = 3
__SLEEP__ = 5
__MEDIA_PLAYER__ = "/usr/bin/totem"
#Parse options
parser = OptionParser(description=__doc__.strip())
parser.add_option("-w",
"--write-changes",
action="store_true",
dest="writable",
default=False,
help="write changes to music files")
parser.add_option("-d",
"--directory",
action="store",
type="string",
dest="music_dir",
default=False,
help="directory with music to correct",
metavar="DIR")
parser.add_option("-f",
"--file",
action="store",
type="string",
dest="files_list",
default=False,
help="file containing list of music files each on a new line",
metavar="FILE")
options, args = parser.parse_args()
def main():
"""
Function loader.
"""
#Start the correcting
if ((options.music_dir and os.path.isdir(options.music_dir)) or
(options.files_list and os.access(options.files_list, os.R_OK))):
run_script()
else:
parser.print_help()
print "Error: no music directory given"
def run_script():
"""
Run the automatic and manual tag selectors in turn and provide output to
monitor the progress.
"""
file_paths = sorted(find_all_music_files())
#skip = 0
#file_paths = file_paths[skip:]
manual_fix_list = dict()
errors = []
#Automatic tagging
start_time = time.time()
completed = 0.0
total_files = len(file_paths)
for file_path in file_paths:
errors.append(tag_corrector(manual_fix_list, file_path))
completed += 1
percent = completed/total_files
#Estimate the completion time based on how long it has taken so far
print "ETC=%s (%05d/%05d) %s" % (time.strftime("%H:%M:%S",
time.localtime(start_time+
(time.time()-start_time)/percent
)
),
completed,
total_files,
file_path)
end_time = time.time()
#Manual tagging
sorted(manual_fix_list)
remaining = len(manual_fix_list)
for file_path, fingerprint_matches in manual_fix_list.iteritems():
print "Remaining: ", remaining
errors.append(manually_choose_tags(file_path, fingerprint_matches))
remaining -= 1
#Warn of any problems encounted
errors.sort()
for error in errors:
if error:
print error
#Nice information on runtime
chours = int(end_time - start_time) / 3600
cmins = int(end_time - start_time) % 3600 / 60
csecs = int(end_time - start_time) % 3600 % 60
print "Completed in %d hours, %d minutes and %d seconds" % (chours, cmins, csecs)
def find_all_music_files():
"""
Creates a list of all music files either from a file or from a directory.
File format is with each music file path on a new line, ideal for copying
from the output of this script if there are errors.
"""
file_paths = ()
#Either get a list of files from the file given, or use the directory given
if options.files_list:
with open(options.files_list, 'r') as f:
#Each file on a new line, strip the new line character
for line in f:
file_path = line.strip()
if os.access(file_path, os.R_OK):
file_paths += file_path,
elif options.music_dir:
for root, dirs, files in os.walk(options.music_dir):
for music_file in files:
file_paths += os.path.join(root, music_file),
return file_paths
def tag_corrector(manual_fix_list, file_path):
"""
Main function.
Generates last.fm fingerprints and fixes any incorrect tags.
Completely ignores current tags.
"""
#Handle all music types. easy is needed to nicely handle id3 tags in mp3s
orig_tags = mutagen.File(file_path, easy=True)
#Ignore non-music files, but still proceed with untagged music files
if orig_tags is None:
return False #Don't really care about these files
#####Get the matches from the file#####
#Retry incase of network issues
for i in range(__RETRIES__):
try:
xml = lastfp.match_file(__API_KEY__, file_path)
fingerprint_matches = lastfp.parse_metadata(xml)
break
#File may be corrupt or just too short to fingerprint
except lastfp.ExtractionError:
return "%s: no fingerprint" % (file_path)
except lastfp.NotFoundError:
return "%s: no results found" % (file_path)
except (lastfp.CommunicationError,
httplib.BadStatusLine,
):
print "Connection lost on attempt %d, retrying" % (i+1)
time.sleep(__SLEEP__)
else:
return "%s: connection problem" % (file_path)
#For some reason, this is needed and is not caught by the try/except clauses above
if not fingerprint_matches or len(fingerprint_matches) == 0:
return "%s: no results found" % (file_path)
#####Get the tags from the matches#####
lastfm_tags = False
#Sort by rank
sorted(fingerprint_matches, key=lambda i: i['rank'], reverse=True)
#Certain matches
if len(fingerprint_matches) == 1 or fingerprint_matches[0]['rank'] == 1:
lastfm_tags = fingerprint_matches[0]
#Uncertain matches
else:
#Trim the insignificant matches from the end
for i in range(1,len(fingerprint_matches)):
if fingerprint_matches[i-1]['rank'] / fingerprint_matches[i]['rank'] > __RELATIVE_CERTAINTY__:
del fingerprint_matches[i:]
break
#Trim the non-existant matches too
for match in fingerprint_matches:
#Retry incase of network issues
for i in range(__RETRIES__):
try:
#Setup last.fm connection
network = pylast.get_lastfm_network(api_key=__API_KEY__,
api_secret=__API_SECRET__)
tracksearch = network.search_for_track(match['artist'],
match['title']).get_total_result_count()
break
except httplib.BadStatusLine:
print "Connection lost on attempt %d, retrying shortly" % (i+1)
time.sleep(__SLEEP__)
else:
return "%s: connection problem" % (file_path)
if tracksearch == 0:
fingerprint_matches.remove(match)
#Check if we narrowed the field to only one choice
if len(fingerprint_matches) == 1:
lastfm_tags = fingerprint_matches[0]
else:
#Can't do it automatically, so defer for later human input
manual_fix_list[file_path] = fingerprint_matches
return False
#####Write tags to files#####
orig_tags['artist'] = [lastfm_tags['artist']]
orig_tags['title'] = [lastfm_tags['title']]
if options.writable:
orig_tags.save()
return False
def manually_choose_tags(file_path, fingerprint_matches):
"""
Provide UI for manual selection of ambiguous music.
"""
lastfm_tags = False
while not lastfm_tags:
#Print choices
print "Choose which is the best match for the file:"
print file_path
for i, match in enumerate(fingerprint_matches):
print "\t", i, \
"\tRank=", match['rank'], \
"\tTrackMBID=", match['track_mbid'], \
"\t", match['artist'], '-', match['title']
print "\t", "p", "\t", "Play the file with Totem"
print "\t", "s", "\t", "Skip the file"
choice = raw_input("Your choice? ")
#Parse choice
if (choice.isdigit() and
int(choice) in range(0,len(fingerprint_matches))):
lastfm_tags = fingerprint_matches[int(choice)]
elif choice == "p" or choice == "P":
os.system("%s \"%s\"" % (__MEDIA_PLAYER__, file_path))
elif choice == "s" or choice == "S":
return "%s: skipped" % (file_path)
#####Write tags to files#####
orig_tags = mutagen.File(file_path, easy=True)
orig_tags['artist'] = [lastfm_tags['artist']]
orig_tags['title'] = [lastfm_tags['title']]
if options.writable:
orig_tags.save()
#No errors
return False
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment