Created
October 19, 2014 17:44
-
-
Save cyanidium/40aa46fd4fd028ae057a to your computer and use it in GitHub Desktop.
Uses fingerprinting from last.fm to identify a music file and provide tags regardless of file name or current tags.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
Corrects the tags of all music files in a given directory or listed in a given | |
file by fingerprinting the song and querying last.fm for the best match to that | |
fingerprint. If a directory and a file are given, uses the file. Doesn't correct | |
any tags to do with albums. | |
""" | |
#Requires apt-get install python-mutagen liblastfm-fingerprint1 | |
#Requires pylast from http://code.google.com/p/pylast/ | |
#Requires pylastfp from http://pypi.python.org/pypi/pylastfp/ | |
#Last updated 16 Mar 2013 | |
#cyanidium | |
import os | |
import os.path | |
import lastfp | |
import pylast | |
import mutagen | |
import time | |
from optparse import OptionParser | |
#For error handling | |
import httplib | |
#Default values | |
__API_KEY__ = "goes here" | |
__API_SECRET__ = "goes here" | |
__RELATIVE_CERTAINTY__ = 5 | |
__RETRIES__ = 3 | |
__SLEEP__ = 5 | |
__MEDIA_PLAYER__ = "/usr/bin/totem" | |
#Parse options | |
parser = OptionParser(description=__doc__.strip()) | |
parser.add_option("-w", | |
"--write-changes", | |
action="store_true", | |
dest="writable", | |
default=False, | |
help="write changes to music files") | |
parser.add_option("-d", | |
"--directory", | |
action="store", | |
type="string", | |
dest="music_dir", | |
default=False, | |
help="directory with music to correct", | |
metavar="DIR") | |
parser.add_option("-f", | |
"--file", | |
action="store", | |
type="string", | |
dest="files_list", | |
default=False, | |
help="file containing list of music files each on a new line", | |
metavar="FILE") | |
options, args = parser.parse_args() | |
def main(): | |
""" | |
Function loader. | |
""" | |
#Start the correcting | |
if ((options.music_dir and os.path.isdir(options.music_dir)) or | |
(options.files_list and os.access(options.files_list, os.R_OK))): | |
run_script() | |
else: | |
parser.print_help() | |
print "Error: no music directory given" | |
def run_script(): | |
""" | |
Run the automatic and manual tag selectors in turn and provide output to | |
monitor the progress. | |
""" | |
file_paths = sorted(find_all_music_files()) | |
#skip = 0 | |
#file_paths = file_paths[skip:] | |
manual_fix_list = dict() | |
errors = [] | |
#Automatic tagging | |
start_time = time.time() | |
completed = 0.0 | |
total_files = len(file_paths) | |
for file_path in file_paths: | |
errors.append(tag_corrector(manual_fix_list, file_path)) | |
completed += 1 | |
percent = completed/total_files | |
#Estimate the completion time based on how long it has taken so far | |
print "ETC=%s (%05d/%05d) %s" % (time.strftime("%H:%M:%S", | |
time.localtime(start_time+ | |
(time.time()-start_time)/percent | |
) | |
), | |
completed, | |
total_files, | |
file_path) | |
end_time = time.time() | |
#Manual tagging | |
sorted(manual_fix_list) | |
remaining = len(manual_fix_list) | |
for file_path, fingerprint_matches in manual_fix_list.iteritems(): | |
print "Remaining: ", remaining | |
errors.append(manually_choose_tags(file_path, fingerprint_matches)) | |
remaining -= 1 | |
#Warn of any problems encounted | |
errors.sort() | |
for error in errors: | |
if error: | |
print error | |
#Nice information on runtime | |
chours = int(end_time - start_time) / 3600 | |
cmins = int(end_time - start_time) % 3600 / 60 | |
csecs = int(end_time - start_time) % 3600 % 60 | |
print "Completed in %d hours, %d minutes and %d seconds" % (chours, cmins, csecs) | |
def find_all_music_files(): | |
""" | |
Creates a list of all music files either from a file or from a directory. | |
File format is with each music file path on a new line, ideal for copying | |
from the output of this script if there are errors. | |
""" | |
file_paths = () | |
#Either get a list of files from the file given, or use the directory given | |
if options.files_list: | |
with open(options.files_list, 'r') as f: | |
#Each file on a new line, strip the new line character | |
for line in f: | |
file_path = line.strip() | |
if os.access(file_path, os.R_OK): | |
file_paths += file_path, | |
elif options.music_dir: | |
for root, dirs, files in os.walk(options.music_dir): | |
for music_file in files: | |
file_paths += os.path.join(root, music_file), | |
return file_paths | |
def tag_corrector(manual_fix_list, file_path): | |
""" | |
Main function. | |
Generates last.fm fingerprints and fixes any incorrect tags. | |
Completely ignores current tags. | |
""" | |
#Handle all music types. easy is needed to nicely handle id3 tags in mp3s | |
orig_tags = mutagen.File(file_path, easy=True) | |
#Ignore non-music files, but still proceed with untagged music files | |
if orig_tags is None: | |
return False #Don't really care about these files | |
#####Get the matches from the file##### | |
#Retry incase of network issues | |
for i in range(__RETRIES__): | |
try: | |
xml = lastfp.match_file(__API_KEY__, file_path) | |
fingerprint_matches = lastfp.parse_metadata(xml) | |
break | |
#File may be corrupt or just too short to fingerprint | |
except lastfp.ExtractionError: | |
return "%s: no fingerprint" % (file_path) | |
except lastfp.NotFoundError: | |
return "%s: no results found" % (file_path) | |
except (lastfp.CommunicationError, | |
httplib.BadStatusLine, | |
): | |
print "Connection lost on attempt %d, retrying" % (i+1) | |
time.sleep(__SLEEP__) | |
else: | |
return "%s: connection problem" % (file_path) | |
#For some reason, this is needed and is not caught by the try/except clauses above | |
if not fingerprint_matches or len(fingerprint_matches) == 0: | |
return "%s: no results found" % (file_path) | |
#####Get the tags from the matches##### | |
lastfm_tags = False | |
#Sort by rank | |
sorted(fingerprint_matches, key=lambda i: i['rank'], reverse=True) | |
#Certain matches | |
if len(fingerprint_matches) == 1 or fingerprint_matches[0]['rank'] == 1: | |
lastfm_tags = fingerprint_matches[0] | |
#Uncertain matches | |
else: | |
#Trim the insignificant matches from the end | |
for i in range(1,len(fingerprint_matches)): | |
if fingerprint_matches[i-1]['rank'] / fingerprint_matches[i]['rank'] > __RELATIVE_CERTAINTY__: | |
del fingerprint_matches[i:] | |
break | |
#Trim the non-existant matches too | |
for match in fingerprint_matches: | |
#Retry incase of network issues | |
for i in range(__RETRIES__): | |
try: | |
#Setup last.fm connection | |
network = pylast.get_lastfm_network(api_key=__API_KEY__, | |
api_secret=__API_SECRET__) | |
tracksearch = network.search_for_track(match['artist'], | |
match['title']).get_total_result_count() | |
break | |
except httplib.BadStatusLine: | |
print "Connection lost on attempt %d, retrying shortly" % (i+1) | |
time.sleep(__SLEEP__) | |
else: | |
return "%s: connection problem" % (file_path) | |
if tracksearch == 0: | |
fingerprint_matches.remove(match) | |
#Check if we narrowed the field to only one choice | |
if len(fingerprint_matches) == 1: | |
lastfm_tags = fingerprint_matches[0] | |
else: | |
#Can't do it automatically, so defer for later human input | |
manual_fix_list[file_path] = fingerprint_matches | |
return False | |
#####Write tags to files##### | |
orig_tags['artist'] = [lastfm_tags['artist']] | |
orig_tags['title'] = [lastfm_tags['title']] | |
if options.writable: | |
orig_tags.save() | |
return False | |
def manually_choose_tags(file_path, fingerprint_matches): | |
""" | |
Provide UI for manual selection of ambiguous music. | |
""" | |
lastfm_tags = False | |
while not lastfm_tags: | |
#Print choices | |
print "Choose which is the best match for the file:" | |
print file_path | |
for i, match in enumerate(fingerprint_matches): | |
print "\t", i, \ | |
"\tRank=", match['rank'], \ | |
"\tTrackMBID=", match['track_mbid'], \ | |
"\t", match['artist'], '-', match['title'] | |
print "\t", "p", "\t", "Play the file with Totem" | |
print "\t", "s", "\t", "Skip the file" | |
choice = raw_input("Your choice? ") | |
#Parse choice | |
if (choice.isdigit() and | |
int(choice) in range(0,len(fingerprint_matches))): | |
lastfm_tags = fingerprint_matches[int(choice)] | |
elif choice == "p" or choice == "P": | |
os.system("%s \"%s\"" % (__MEDIA_PLAYER__, file_path)) | |
elif choice == "s" or choice == "S": | |
return "%s: skipped" % (file_path) | |
#####Write tags to files##### | |
orig_tags = mutagen.File(file_path, easy=True) | |
orig_tags['artist'] = [lastfm_tags['artist']] | |
orig_tags['title'] = [lastfm_tags['title']] | |
if options.writable: | |
orig_tags.save() | |
#No errors | |
return False | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment