Skip to content

Instantly share code, notes, and snippets.

@cyanidium
Created October 19, 2014 17:59
Show Gist options
  • Save cyanidium/24466102ef29f51005c5 to your computer and use it in GitHub Desktop.
Save cyanidium/24466102ef29f51005c5 to your computer and use it in GitHub Desktop.
Tries to use last.fm music fingerprints to rename a music video file.
#!/usr/bin/env python
"""
Renames music video files in a given directory or listed in a given file by
fingerprinting the song and querying last.fm for the best match to that
fingerprint. If a directory and a file are given, uses the file.
"""
#Requires apt-get install liblastfm-fingerprint0
#Requires pylast from http://code.google.com/p/pylast/
#Requires pylastfp from http://pypi.python.org/pypi/pylastfp/
#Last updated 23 Sep 2012
#cyanidium
import os
import os.path
import lastfp
import pylast
import time
import re
from optparse import OptionParser
#For error handling
import httplib
#Default values
__API_KEY__ = "goes here"
__API_SECRET__ = "goes here"
__RELATIVE_CERTAINTY__ = 5
__RETRIES__ = 3
__SLEEP__ = 5
__MEDIA_PLAYER__ = "/usr/bin/totem"
#Parse options
parser = OptionParser(description=__doc__.strip())
parser.add_option("-w",
"--write-changes",
action="store_true",
dest="writable",
default=False,
help="write changes to music files")
parser.add_option("-d",
"--directory",
action="store",
type="string",
dest="music_dir",
default=False,
help="directory with music to correct",
metavar="DIR")
parser.add_option("-f",
"--file",
action="store",
type="string",
dest="files_list",
default=False,
help="file containing list of music files each on a new line",
metavar="FILE")
parser.add_option("-m",
"--mad",
action="store_true",
dest="use_mad",
default=False,
help="use MAD for decoding rather than the default GST (only works for MP3)")
options, args = parser.parse_args()
def main():
"""
Function loader.
"""
#Start the correcting
if ((options.music_dir and os.path.isdir(options.music_dir)) or
(options.files_list and os.access(options.files_list, os.R_OK))):
run_script()
else:
parser.print_help()
print "Error: no music directory given"
def run_script():
"""
Run the automatic and manual tag selectors in turn and provide output to
monitor the progress.
"""
file_paths = sorted(find_all_music_files())
manual_fix_list = dict()
errors = []
#Automatic tagging
start_time = time.time()
completed = 0.0
total_files = len(file_paths)
for file_path in file_paths:
errors.append(tag_corrector(manual_fix_list, file_path))
completed += 1
percent = completed/total_files
#Estimate the completion time based on how long it has taken so far
print "ETC=%s (%05d/%05d) %s" % (time.strftime("%H:%M:%S",
time.localtime(start_time+
(time.time()-start_time)/percent
)
),
completed,
total_files,
file_path)
end_time = time.time()
#Manual tagging
sorted(manual_fix_list)
remaining = len(manual_fix_list)
for file_path, fingerprint_matches in manual_fix_list.iteritems():
print "Remaining: ", remaining
errors.append(manually_choose_tags(file_path, fingerprint_matches))
remaining -= 1
#Warn of any problems encounted
errors.sort()
for error in errors:
if error:
print error
#Nice information on runtime
chours = int(end_time - start_time) / 3600
cmins = int(end_time - start_time) % 3600 / 60
csecs = int(end_time - start_time) % 3600 % 60
print "Completed in %d hours, %d minutes and %d seconds" % (chours, cmins, csecs)
def find_all_music_files():
"""
Creates a list of all music files either from a file or from a directory.
File format is with each music file path on a new line, ideal for copying
from the output of this script if there are errors.
"""
file_paths = ()
#Either get a list of files from the file given, or use the directory given
if options.files_list:
with open(options.files_list, 'r') as f:
#Each file on a new line, strip the new line character
for line in f:
file_path = line.strip()
if os.access(file_path, os.R_OK):
file_paths += file_path,
elif options.music_dir:
for root, dirs, files in os.walk(options.music_dir):
for music_file in files:
file_paths += os.path.join(root, music_file),
return file_paths
def rename_file(file_path, artist, title):
"""
Do the heavy lifting of the files.
"""
folder = os.path.dirname(file_path)
extension = os.path.splitext(file_path)[1].lower()
new_filename = "%s - %s%s" % (artist, title, extension)
new_filename = new_filename.encode('utf-8').replace(":","_").replace("?","_").replace("&","and").replace("!","_").replace("$","_")
new_file_path = os.path.join(folder, new_filename)
#Check if the filename is the same and skip if so
if file_path == new_file_path:
return False
if os.path.exists(new_file_path):
return "%s: tried to move to %s but it already exists" % (file_path, new_file_path)
try:
os.rename(file_path, new_file_path)
except OSError:
return "%s: tried to move to %s but there was an error" % (file_path, new_file_path)
return False
def tag_corrector(manual_fix_list, file_path):
"""
Main function.
Generates last.fm fingerprints and renames files.
Completely ignores current filename.
"""
#####Get the matches from the file#####
#Retry incase of network issues
for i in range(__RETRIES__):
try:
if options.use_mad:
xml = lastfp.mad_match(__API_KEY__, file_path)
else:
xml = lastfp.gst_match(__API_KEY__, file_path)
fingerprint_matches = lastfp.parse_metadata(xml)
break
#File may be corrupt or just too short to fingerprint
except lastfp.ExtractionError:
return "%s: no fingerprint" % (file_path)
except lastfp.NotFoundError:
return "%s: no results found, try switching between MAD and GST" % (file_path)
except (lastfp.CommunicationError,
httplib.BadStatusLine,
):
print "Connection lost on attempt %d, retrying" % (i+1)
time.sleep(__SLEEP__)
else:
return "%s: connection problem" % (file_path)
#For some reason, this is needed and is not caught by the try/except clauses above
if not fingerprint_matches or len(fingerprint_matches) == 0:
return "%s: no results found, try switching between MAD and GST" % (file_path)
#####Get the tags from the matches#####
lastfm_tags = False
#Sort by rank
sorted(fingerprint_matches, key=lambda i: i['rank'], reverse=True)
#Certain matches
if len(fingerprint_matches) == 1 or fingerprint_matches[0]['rank'] == 1:
lastfm_tags = fingerprint_matches[0]
#Uncertain matches
else:
#Trim the insignificant matches from the end
for i in range(1,len(fingerprint_matches)):
if fingerprint_matches[i-1]['rank'] / fingerprint_matches[i]['rank'] > __RELATIVE_CERTAINTY__:
del fingerprint_matches[i:]
break
#Trim the non-existant matches too
for match in fingerprint_matches:
#Retry incase of network issues
for i in range(__RETRIES__):
try:
#Setup last.fm connection
network = pylast.get_lastfm_network(api_key=__API_KEY__,
api_secret=__API_SECRET__)
tracksearch = network.search_for_track(match['artist'],
match['title']).get_total_result_count()
break
except httplib.BadStatusLine:
print "Connection lost on attempt %d, retrying shortly" % (i+1)
time.sleep(__SLEEP__)
else:
return "%s: connection problem" % (file_path)
if tracksearch == 0:
fingerprint_matches.remove(match)
#Check if we narrowed the field to only one choice
if len(fingerprint_matches) == 1:
lastfm_tags = fingerprint_matches[0]
else:
#Can't do it automatically, so defer for later human input
manual_fix_list[file_path] = fingerprint_matches
return False
#####Rename file#####
outcome = rename_file(file_path, lastfm_tags['artist'], lastfm_tags['title'])
return outcome
def manually_choose_tags(file_path, fingerprint_matches):
"""
Provide UI for manual selection of ambiguous music.
"""
lastfm_tags = False
while not lastfm_tags:
#Print choices
print "Choose which is the best match for the file:"
print file_path
for i, match in enumerate(fingerprint_matches):
print "\t", i, \
"\tRank=", match['rank'], \
"\tTrackMBID=", match['track_mbid'], \
"\t", match['artist'], '-', match['title']
print "\t", "p", "\t", "Play the file with Totem"
print "\t", "s", "\t", "Skip the file"
choice = raw_input("Your choice? ")
#Parse choice
if (choice.isdigit() and
int(choice) in range(0,len(fingerprint_matches))):
lastfm_tags = fingerprint_matches[int(choice)]
elif choice == "p" or choice == "P":
os.system("%s \"%s\"" % (__MEDIA_PLAYER__, file_path))
elif choice == "s" or choice == "S":
return "%s: skipped" % (file_path)
#####Rename file#####
outcome = rename_file(file_path, lastfm_tags['artist'], lastfm_tags['title'])
return outcome
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment