mindsocket/lastmatchwithnums.py

## lastmatchwithnums.py
#!/home/roger/.virtualenvs/lastfm/bin/python -u
"""
File: lastmatchwithnums.py
Author: Roger Barnes

A simple program for using acoustid to fingerprint and look up metadata (esp. play counts)
for MP3 files via lastfm. Usage:

    $ python lastmatchwithnums.py [folder] ...

All mp3s in all folders (recursive) will be fingerprinted with the beets
acoustid plugin, then looked up (if possible) from last.fm for play and
listener counts.  Data is written into redis.

Finally, the data can be extracted and reported.  Feed into a symlink thusly:

    $ ./lastmatchwithnums.py | xargs -d '\n' -Ixxx ln -sf xxx ~/audiolinks/

Requirements, all pip installable (+ some dependent system packages, YMMV):
    beets
    redis
    pyacoustid

Technology note: The generator pipeline driven programming style in Mathcher was inspired by
David Beazley's presentations on generators. - http://www.dabeaz.com/generators/
TODO - multiprocessing?

"""
import sys
import os
import pylast
from beets.autotag import mb
from redis import Redis
from beetsplug import chroma
from pprint import pprint

# This API key is specifically for this script.
# http://last.fm/api/account
API_KEY = 'faf408096c145277a0e01e712ae4a5f2'

PYLAST_EXCEPTIONS = (
    pylast.WSError,
    pylast.MalformedResponseError,
    pylast.NetworkError,
)

import fnmatch

def gen_find(filepat,top):
    for path, dirlist, filelist in os.walk(top):
        for name in fnmatch.filter(filelist,filepat):
            yield os.path.join(path,name)

class Matcher(object):

    def __init__(self, **kwargs):
        self.redis = Redis(host=kwargs['redis_host'], port=int(kwargs['redis_port']), db=int(kwargs['redis_db']))
        self.network = pylast.LastFMNetwork(api_key = API_KEY)

    def processpath(self, path):
        ''' Cue the generator pipeline shenanigans! Each call is a generator function
        '''

        mp3files = gen_find("*.mp3", path)
        fullpaths = self._getfullpath(mp3files)
        # This short circuits anything already in redis
        newpaths = (path for path in fullpaths if self._addpath(path))
        matches = self._getmatches(newpaths)
        matchrecs = self._getmatchrec(matches)
        return sum(self._lastfmlookup(matchrecs))

    def _getfullpath(self, files):
        for relfile in files:
#            print >> sys.stderr, "getfullpath", relfile
            yield os.path.abspath(os.path.expanduser(relfile))

    def _addpath(self, path):
        ''' This one's not a generator '''
        key = "lastfm:" + path
        self.redis.sadd("lastfmdirs", os.path.dirname(key))
        self.redis.sadd("lastfmdir:" + os.path.dirname(key), key)
        result = self.redis.sadd("lastfmpaths", key)
        if not result:
            print >> sys.stderr, path, "exists", self.redis.hgetall(key)
        return result

    def _getmatches(self, files):
        for path in files:
#            print >> sys.stderr, "getmatches", path
            matchid = self.redis.get("mbid_for:" + path)
            if not matchid:
                try:
                    match = chroma.acoustid_match(path)
                    matchid = match[0] if match else None
                except (EOFError, AttributeError, IOError), err:
                    print >> sys.stderr, "ERROR matching", path, err
                    matchid = None

                self.redis.set("mbid_for:" + path, matchid)
                if not matchid:
                    continue
            yield (path, matchid)

    def _getmatchrec(self, pairs):
        for path, matchid in pairs:
#            print >> sys.stderr, "getmatchrec", path, matchid
            if matchid:
                matchrec = self.redis.hgetall("matchrec:" + matchid)
                if not matchrec:
                    matchrec = {'id': matchid}
                    mbtrack = mb.track_for_id(matchid)
                    try:
                        matchrec['artist'] = mbtrack.artist
                        matchrec['title'] = mbtrack.title
                    except AttributeError:
                        continue
                    self.redis.hmset("matchrec:" + matchid, matchrec)
            else:
                matchrec = {}

            yield (path, matchrec)

    def _lastfmlookup(self, matchrecs):
        for path, matchrec in matchrecs:
#            print >> sys.stderr, "lastfmlookup", path, matchrec
            key = "lastfm:" + path

            if 'id' in matchrec:
                try:
                    track = self.network.get_track_by_mbid(matchrec['id'])
                except PYLAST_EXCEPTIONS:
                    if 'artist' in matchrec and 'title' in matchrec:
                        track = self.network.get_track(matchrec['artist'], matchrec['title'])
                    else:
                        continue

                self.redis.hset(key, 'id', matchrec['id'])
                self.redis.hset(key, 'artist', matchrec['artist'])
                self.redis.hset(key, 'title', matchrec['title'])
                try:
                    self.redis.hset(key, 'playcount', track.get_playcount())
                    self.redis.hset(key, 'listener_count', track.get_listener_count())
                except PYLAST_EXCEPTIONS:
                    continue
            else:
                self.redis.hset(key, 'id', "NOMATCH")

            print >> sys.stderr, path, "new", self.redis.hgetall(key)
            yield 1

    def _printsorted(self, identifier, allkeys, calc=lambda play,listen:float(play) / float(listen)):
        self.redis.delete(identifier)
        for key in allkeys:
            rec = self.redis.hgetall(key)
            if 'playcount' in rec and 'listener_count' in rec and int(rec['playcount']) > 1000:
                self.redis.zadd(identifier, key, calc(rec['playcount'],rec['listener_count']))

        scored = self.redis.zrange(identifier, 0, -1, withscores=True)
        scored.reverse()
        return scored

    def printsortedresults(self):
        allkeys = self.redis.smembers("lastfmpaths")
        scored = self._printsorted("lastfmscore", allkeys)
        for x in scored:
            if x[1] > 5:
                print x[0].split(':')[1]

    def printsortedresultsbydir(self):
        alldirs = self.redis.smembers("lastfmdirs")
        for path in alldirs:
            allkeys = self.redis.smembers("lastfmdir:" + path)
            scored = self._printsorted("lastfmscore:" + path, allkeys, calc=lambda play,listen:float(play))
            for x in scored[:2]:
                print x[0].split(':')[1]


if __name__ == '__main__':
    args = sys.argv[1:]
    matcher = Matcher(redis_host='localhost', redis_port=6379, redis_db=0)
    total = 0
    for patharg in args:
        total += matcher.processpath(patharg)

    #print "Processed", total, "files"

    matcher.printsortedresultsbydir()
    #matcher.printsortedresults()
	#!/home/roger/.virtualenvs/lastfm/bin/python -u
	"""
	File: lastmatchwithnums.py
	Author: Roger Barnes

	A simple program for using acoustid to fingerprint and look up metadata (esp. play counts)
	for MP3 files via lastfm. Usage:

	$ python lastmatchwithnums.py [folder] ...

	All mp3s in all folders (recursive) will be fingerprinted with the beets
	acoustid plugin, then looked up (if possible) from last.fm for play and
	listener counts. Data is written into redis.

	Finally, the data can be extracted and reported. Feed into a symlink thusly:

	$ ./lastmatchwithnums.py \| xargs -d '\n' -Ixxx ln -sf xxx ~/audiolinks/

	Requirements, all pip installable (+ some dependent system packages, YMMV):
	beets
	redis
	pyacoustid

	Technology note: The generator pipeline driven programming style in Mathcher was inspired by
	David Beazley's presentations on generators. - http://www.dabeaz.com/generators/
	TODO - multiprocessing?

	"""
	import sys
	import os
	import pylast
	from beets.autotag import mb
	from redis import Redis
	from beetsplug import chroma
	from pprint import pprint

	# This API key is specifically for this script.
	# http://last.fm/api/account
	API_KEY = 'faf408096c145277a0e01e712ae4a5f2'

	PYLAST_EXCEPTIONS = (
	pylast.WSError,
	pylast.MalformedResponseError,
	pylast.NetworkError,
	)

	import fnmatch

	def gen_find(filepat,top):
	for path, dirlist, filelist in os.walk(top):
	for name in fnmatch.filter(filelist,filepat):
	yield os.path.join(path,name)

	class Matcher(object):

	def __init__(self, **kwargs):
	self.redis = Redis(host=kwargs['redis_host'], port=int(kwargs['redis_port']), db=int(kwargs['redis_db']))
	self.network = pylast.LastFMNetwork(api_key = API_KEY)

	def processpath(self, path):
	''' Cue the generator pipeline shenanigans! Each call is a generator function
	'''

	mp3files = gen_find("*.mp3", path)
	fullpaths = self._getfullpath(mp3files)
	# This short circuits anything already in redis
	newpaths = (path for path in fullpaths if self._addpath(path))
	matches = self._getmatches(newpaths)
	matchrecs = self._getmatchrec(matches)
	return sum(self._lastfmlookup(matchrecs))

	def _getfullpath(self, files):
	for relfile in files:
	# print >> sys.stderr, "getfullpath", relfile
	yield os.path.abspath(os.path.expanduser(relfile))

	def _addpath(self, path):
	''' This one's not a generator '''
	key = "lastfm:" + path
	self.redis.sadd("lastfmdirs", os.path.dirname(key))
	self.redis.sadd("lastfmdir:" + os.path.dirname(key), key)
	result = self.redis.sadd("lastfmpaths", key)
	if not result:
	print >> sys.stderr, path, "exists", self.redis.hgetall(key)
	return result

	def _getmatches(self, files):
	for path in files:
	# print >> sys.stderr, "getmatches", path
	matchid = self.redis.get("mbid_for:" + path)
	if not matchid:
	try:
	match = chroma.acoustid_match(path)
	matchid = match[0] if match else None
	except (EOFError, AttributeError, IOError), err:
	print >> sys.stderr, "ERROR matching", path, err
	matchid = None

	self.redis.set("mbid_for:" + path, matchid)
	if not matchid:
	continue
	yield (path, matchid)

	def _getmatchrec(self, pairs):
	for path, matchid in pairs:
	# print >> sys.stderr, "getmatchrec", path, matchid
	if matchid:
	matchrec = self.redis.hgetall("matchrec:" + matchid)
	if not matchrec:
	matchrec = {'id': matchid}
	mbtrack = mb.track_for_id(matchid)
	try:
	matchrec['artist'] = mbtrack.artist
	matchrec['title'] = mbtrack.title
	except AttributeError:
	continue
	self.redis.hmset("matchrec:" + matchid, matchrec)
	else:
	matchrec = {}

	yield (path, matchrec)

	def _lastfmlookup(self, matchrecs):
	for path, matchrec in matchrecs:
	# print >> sys.stderr, "lastfmlookup", path, matchrec
	key = "lastfm:" + path

	if 'id' in matchrec:
	try:
	track = self.network.get_track_by_mbid(matchrec['id'])
	except PYLAST_EXCEPTIONS:
	if 'artist' in matchrec and 'title' in matchrec:
	track = self.network.get_track(matchrec['artist'], matchrec['title'])
	else:
	continue

	self.redis.hset(key, 'id', matchrec['id'])
	self.redis.hset(key, 'artist', matchrec['artist'])
	self.redis.hset(key, 'title', matchrec['title'])
	try:
	self.redis.hset(key, 'playcount', track.get_playcount())
	self.redis.hset(key, 'listener_count', track.get_listener_count())
	except PYLAST_EXCEPTIONS:
	continue
	else:
	self.redis.hset(key, 'id', "NOMATCH")

	print >> sys.stderr, path, "new", self.redis.hgetall(key)
	yield 1

	def _printsorted(self, identifier, allkeys, calc=lambda play,listen:float(play) / float(listen)):
	self.redis.delete(identifier)
	for key in allkeys:
	rec = self.redis.hgetall(key)
	if 'playcount' in rec and 'listener_count' in rec and int(rec['playcount']) > 1000:
	self.redis.zadd(identifier, key, calc(rec['playcount'],rec['listener_count']))

	scored = self.redis.zrange(identifier, 0, -1, withscores=True)
	scored.reverse()
	return scored

	def printsortedresults(self):
	allkeys = self.redis.smembers("lastfmpaths")
	scored = self._printsorted("lastfmscore", allkeys)
	for x in scored:
	if x[1] > 5:
	print x[0].split(':')[1]

	def printsortedresultsbydir(self):
	alldirs = self.redis.smembers("lastfmdirs")
	for path in alldirs:
	allkeys = self.redis.smembers("lastfmdir:" + path)
	scored = self._printsorted("lastfmscore:" + path, allkeys, calc=lambda play,listen:float(play))
	for x in scored[:2]:
	print x[0].split(':')[1]


	if __name__ == '__main__':
	args = sys.argv[1:]
	matcher = Matcher(redis_host='localhost', redis_port=6379, redis_db=0)
	total = 0
	for patharg in args:
	total += matcher.processpath(patharg)

	#print "Processed", total, "files"

	matcher.printsortedresultsbydir()
	#matcher.printsortedresults()