Created
October 19, 2014 17:59
-
-
Save cyanidium/24466102ef29f51005c5 to your computer and use it in GitHub Desktop.
Tries to use last.fm music fingerprints to rename a music video file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
Renames music video files in a given directory or listed in a given file by | |
fingerprinting the song and querying last.fm for the best match to that | |
fingerprint. If a directory and a file are given, uses the file. | |
""" | |
#Requires apt-get install liblastfm-fingerprint0 | |
#Requires pylast from http://code.google.com/p/pylast/ | |
#Requires pylastfp from http://pypi.python.org/pypi/pylastfp/ | |
#Last updated 23 Sep 2012 | |
#cyanidium | |
import os | |
import os.path | |
import lastfp | |
import pylast | |
import time | |
import re | |
from optparse import OptionParser | |
#For error handling | |
import httplib | |
#Default values | |
__API_KEY__ = "goes here" | |
__API_SECRET__ = "goes here" | |
__RELATIVE_CERTAINTY__ = 5 | |
__RETRIES__ = 3 | |
__SLEEP__ = 5 | |
__MEDIA_PLAYER__ = "/usr/bin/totem" | |
#Parse options | |
parser = OptionParser(description=__doc__.strip()) | |
parser.add_option("-w", | |
"--write-changes", | |
action="store_true", | |
dest="writable", | |
default=False, | |
help="write changes to music files") | |
parser.add_option("-d", | |
"--directory", | |
action="store", | |
type="string", | |
dest="music_dir", | |
default=False, | |
help="directory with music to correct", | |
metavar="DIR") | |
parser.add_option("-f", | |
"--file", | |
action="store", | |
type="string", | |
dest="files_list", | |
default=False, | |
help="file containing list of music files each on a new line", | |
metavar="FILE") | |
parser.add_option("-m", | |
"--mad", | |
action="store_true", | |
dest="use_mad", | |
default=False, | |
help="use MAD for decoding rather than the default GST (only works for MP3)") | |
options, args = parser.parse_args() | |
def main(): | |
""" | |
Function loader. | |
""" | |
#Start the correcting | |
if ((options.music_dir and os.path.isdir(options.music_dir)) or | |
(options.files_list and os.access(options.files_list, os.R_OK))): | |
run_script() | |
else: | |
parser.print_help() | |
print "Error: no music directory given" | |
def run_script(): | |
""" | |
Run the automatic and manual tag selectors in turn and provide output to | |
monitor the progress. | |
""" | |
file_paths = sorted(find_all_music_files()) | |
manual_fix_list = dict() | |
errors = [] | |
#Automatic tagging | |
start_time = time.time() | |
completed = 0.0 | |
total_files = len(file_paths) | |
for file_path in file_paths: | |
errors.append(tag_corrector(manual_fix_list, file_path)) | |
completed += 1 | |
percent = completed/total_files | |
#Estimate the completion time based on how long it has taken so far | |
print "ETC=%s (%05d/%05d) %s" % (time.strftime("%H:%M:%S", | |
time.localtime(start_time+ | |
(time.time()-start_time)/percent | |
) | |
), | |
completed, | |
total_files, | |
file_path) | |
end_time = time.time() | |
#Manual tagging | |
sorted(manual_fix_list) | |
remaining = len(manual_fix_list) | |
for file_path, fingerprint_matches in manual_fix_list.iteritems(): | |
print "Remaining: ", remaining | |
errors.append(manually_choose_tags(file_path, fingerprint_matches)) | |
remaining -= 1 | |
#Warn of any problems encounted | |
errors.sort() | |
for error in errors: | |
if error: | |
print error | |
#Nice information on runtime | |
chours = int(end_time - start_time) / 3600 | |
cmins = int(end_time - start_time) % 3600 / 60 | |
csecs = int(end_time - start_time) % 3600 % 60 | |
print "Completed in %d hours, %d minutes and %d seconds" % (chours, cmins, csecs) | |
def find_all_music_files(): | |
""" | |
Creates a list of all music files either from a file or from a directory. | |
File format is with each music file path on a new line, ideal for copying | |
from the output of this script if there are errors. | |
""" | |
file_paths = () | |
#Either get a list of files from the file given, or use the directory given | |
if options.files_list: | |
with open(options.files_list, 'r') as f: | |
#Each file on a new line, strip the new line character | |
for line in f: | |
file_path = line.strip() | |
if os.access(file_path, os.R_OK): | |
file_paths += file_path, | |
elif options.music_dir: | |
for root, dirs, files in os.walk(options.music_dir): | |
for music_file in files: | |
file_paths += os.path.join(root, music_file), | |
return file_paths | |
def rename_file(file_path, artist, title): | |
""" | |
Do the heavy lifting of the files. | |
""" | |
folder = os.path.dirname(file_path) | |
extension = os.path.splitext(file_path)[1].lower() | |
new_filename = "%s - %s%s" % (artist, title, extension) | |
new_filename = new_filename.encode('utf-8').replace(":","_").replace("?","_").replace("&","and").replace("!","_").replace("$","_") | |
new_file_path = os.path.join(folder, new_filename) | |
#Check if the filename is the same and skip if so | |
if file_path == new_file_path: | |
return False | |
if os.path.exists(new_file_path): | |
return "%s: tried to move to %s but it already exists" % (file_path, new_file_path) | |
try: | |
os.rename(file_path, new_file_path) | |
except OSError: | |
return "%s: tried to move to %s but there was an error" % (file_path, new_file_path) | |
return False | |
def tag_corrector(manual_fix_list, file_path): | |
""" | |
Main function. | |
Generates last.fm fingerprints and renames files. | |
Completely ignores current filename. | |
""" | |
#####Get the matches from the file##### | |
#Retry incase of network issues | |
for i in range(__RETRIES__): | |
try: | |
if options.use_mad: | |
xml = lastfp.mad_match(__API_KEY__, file_path) | |
else: | |
xml = lastfp.gst_match(__API_KEY__, file_path) | |
fingerprint_matches = lastfp.parse_metadata(xml) | |
break | |
#File may be corrupt or just too short to fingerprint | |
except lastfp.ExtractionError: | |
return "%s: no fingerprint" % (file_path) | |
except lastfp.NotFoundError: | |
return "%s: no results found, try switching between MAD and GST" % (file_path) | |
except (lastfp.CommunicationError, | |
httplib.BadStatusLine, | |
): | |
print "Connection lost on attempt %d, retrying" % (i+1) | |
time.sleep(__SLEEP__) | |
else: | |
return "%s: connection problem" % (file_path) | |
#For some reason, this is needed and is not caught by the try/except clauses above | |
if not fingerprint_matches or len(fingerprint_matches) == 0: | |
return "%s: no results found, try switching between MAD and GST" % (file_path) | |
#####Get the tags from the matches##### | |
lastfm_tags = False | |
#Sort by rank | |
sorted(fingerprint_matches, key=lambda i: i['rank'], reverse=True) | |
#Certain matches | |
if len(fingerprint_matches) == 1 or fingerprint_matches[0]['rank'] == 1: | |
lastfm_tags = fingerprint_matches[0] | |
#Uncertain matches | |
else: | |
#Trim the insignificant matches from the end | |
for i in range(1,len(fingerprint_matches)): | |
if fingerprint_matches[i-1]['rank'] / fingerprint_matches[i]['rank'] > __RELATIVE_CERTAINTY__: | |
del fingerprint_matches[i:] | |
break | |
#Trim the non-existant matches too | |
for match in fingerprint_matches: | |
#Retry incase of network issues | |
for i in range(__RETRIES__): | |
try: | |
#Setup last.fm connection | |
network = pylast.get_lastfm_network(api_key=__API_KEY__, | |
api_secret=__API_SECRET__) | |
tracksearch = network.search_for_track(match['artist'], | |
match['title']).get_total_result_count() | |
break | |
except httplib.BadStatusLine: | |
print "Connection lost on attempt %d, retrying shortly" % (i+1) | |
time.sleep(__SLEEP__) | |
else: | |
return "%s: connection problem" % (file_path) | |
if tracksearch == 0: | |
fingerprint_matches.remove(match) | |
#Check if we narrowed the field to only one choice | |
if len(fingerprint_matches) == 1: | |
lastfm_tags = fingerprint_matches[0] | |
else: | |
#Can't do it automatically, so defer for later human input | |
manual_fix_list[file_path] = fingerprint_matches | |
return False | |
#####Rename file##### | |
outcome = rename_file(file_path, lastfm_tags['artist'], lastfm_tags['title']) | |
return outcome | |
def manually_choose_tags(file_path, fingerprint_matches): | |
""" | |
Provide UI for manual selection of ambiguous music. | |
""" | |
lastfm_tags = False | |
while not lastfm_tags: | |
#Print choices | |
print "Choose which is the best match for the file:" | |
print file_path | |
for i, match in enumerate(fingerprint_matches): | |
print "\t", i, \ | |
"\tRank=", match['rank'], \ | |
"\tTrackMBID=", match['track_mbid'], \ | |
"\t", match['artist'], '-', match['title'] | |
print "\t", "p", "\t", "Play the file with Totem" | |
print "\t", "s", "\t", "Skip the file" | |
choice = raw_input("Your choice? ") | |
#Parse choice | |
if (choice.isdigit() and | |
int(choice) in range(0,len(fingerprint_matches))): | |
lastfm_tags = fingerprint_matches[int(choice)] | |
elif choice == "p" or choice == "P": | |
os.system("%s \"%s\"" % (__MEDIA_PLAYER__, file_path)) | |
elif choice == "s" or choice == "S": | |
return "%s: skipped" % (file_path) | |
#####Rename file##### | |
outcome = rename_file(file_path, lastfm_tags['artist'], lastfm_tags['title']) | |
return outcome | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment