Created
May 10, 2009 16:43
-
-
Save quad/109667 to your computer and use it in GitHub Desktop.
What did you listen to the most on any given month? (via last.fm)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# | |
# any-month.py | |
# | |
# What did you listen to the most on any given month? (via last.fm) | |
# | |
# Example: | |
# | |
# $ find ~/public_html/mixtape -name index.xspf -print0 | xargs -0 ./any-month.py | |
# Wild Orchid Children where the mexican boys go | |
# Crystal Castles Reckless | |
# Natalie Portman's Shaved Head Sophisticated Side Ponytail | |
# Natalie Portman's Shaved Head Iceage Babeland | |
# Wild Orchid Children birth of a cabin | |
# Wolf Parade Fancy Claps | |
# Crystal Castles 1991 | |
# Portugal. The Man Church Mouth | |
# Feral Children The Beast=Goldmine | |
# Wolf Parade Grounds for Divorce | |
# Feral Children Zyghost | |
# The Sound of Animals Fighting I, The Swan | |
# The Sound of Animals Fighting The Ocean And The Sun | |
# Man Party The Party Will Come | |
# Man Party Sonny Boy | |
# UNKLE In a Broken Dream | |
# UNKLE 24 Frames | |
# Simon Viklund Rise of the Albatross | |
# Simon Viklund Bionic Commando Rearmed (Main Theme) | |
# Portugal. The Man Marching with 6 | |
# | |
import collections | |
import datetime | |
import difflib | |
import operator | |
import optparse | |
import urllib | |
import xml.etree.ElementTree as ET | |
LFM_API_KEY = '068e57ae9465c496f8c4ceecdc4ca644' | |
OPTIONS = optparse.OptionParser(usage='Usage: %prog [old-playlist.xspf ...]', | |
description='What did you listen to the most on any given month? (via last.fm)', | |
epilog='The XSPFs playlists, of course, contains tracks you want to exclude.') | |
OPTIONS.add_option('-u', '--user', | |
default='quad', | |
help='your last.fm user (default: quad)', | |
metavar='USER', | |
action='store') | |
OPTIONS.add_option('-w', '--when', | |
default=datetime.datetime.now().strftime('%Y-%m'), | |
help='the year and month to report on (default: this month)', | |
metavar='YYYY-MM', | |
action='store') | |
class XSPF: | |
"""A very ghetto XSPF parser.""" | |
NS = '{http://xspf.org/ns/0/}' | |
Track = collections.namedtuple('Track', 'album, creator, title') | |
def __init__(self, filename): | |
tree = ET.parse(filename) | |
tracks_xml = tree.find(self.NS + 'trackList').findall(self.NS + 'track') | |
self.tracks = [self.Track(t.find(self.NS + 'album').text, | |
t.find(self.NS + 'creator').text, | |
t.find(self.NS + 'title').text) | |
for t in tracks_xml] | |
def lfm_call(method, **kwargs): | |
"""Make a web service call to last.fm.""" | |
url = "http://ws.audioscrobbler.com/2.0/?method=%s&%s&api_key=%s" % \ | |
(method, urllib.urlencode(kwargs), LFM_API_KEY) | |
result = urllib.urlopen(url) | |
return ET.parse(result) | |
def get_charts(user): | |
"""Get all of a user's weekly charts.""" | |
charts_resp = lfm_call('user.getWeeklyChartList', user=user) | |
charts_xml = charts_resp.find('weeklychartlist').findall('chart') | |
return [map(datetime.datetime.fromtimestamp, [int(chart.attrib['from']), int(chart.attrib['to'])]) | |
for chart in charts_xml] | |
def get_weekly_tracks(user, when_from, when_to): | |
"""Get all of a user's weekly tracks between the specified dates.""" | |
c_from, c_to = map(lambda d: d.strftime('%s'), [when_from, when_to]) | |
tracks_resp = lfm_call('user.getWeeklyTrackChart', **{'user': user, 'from': c_from, 'to': c_to}) | |
tracks_xml = tracks_resp.find('weeklytrackchart').findall('track') | |
return dict([((t.find('artist').text, t.find('name').text), int(t.find('playcount').text)) | |
for t in tracks_xml]) | |
def get_pop_tracks(user, when): | |
"""Get a user's popular tracks for a specified month.""" | |
# Get all charts for the specified month. | |
charts = [(c_from, c_to) for c_from, c_to in get_charts(user) | |
if c_from.year == when.year and c_from.month == when.month] | |
# Get all tracks for those charts. | |
tracks_weekly = [get_weekly_tracks(user, c_from, c_to) for c_from, c_to in charts] | |
# Combine the weekly charts together. | |
def _(left, right): | |
for k, v in left.items(): | |
right[k] = right.get(k, 0) + v | |
return right | |
tracks = reduce(_, tracks_weekly, {}) | |
# Find the top tracks. | |
return sorted(tracks.iteritems(), key=operator.itemgetter(1), reverse=True) | |
def mask_tracks(tracks, mask): | |
"""Mask a selection of tracks.""" | |
lowered_mask = [(a.lower(), t.lower()) for a, t in mask] | |
all_artists = set([a for a, t in lowered_mask]) | |
def _(artist, title): | |
artist = artist.lower() | |
title = title.lower() | |
for maybe_artist in difflib.get_close_matches(artist, all_artists): | |
maybe_titles = [t for a, t in lowered_mask if a == maybe_artist] | |
if difflib.get_close_matches(title, maybe_titles): | |
return True | |
return [(t, pc) for t, pc in tracks if not _(*t)] | |
def limit_artist_occurence(tracks, max_occurence): | |
"""Limit the number of times an artist can occur in a track list.""" | |
selected_artists = collections.defaultdict(int) | |
for index, playcount in tracks: | |
artist, title = index | |
selected_artists[artist] += 1 | |
if (selected_artists[artist] <= max_occurence): | |
yield index, playcount | |
def prettyprint_tracklist(tracks): | |
"""Print a formatted track list.""" | |
if tracks: | |
max_artist_length = max([len(a) for (a, t), pc in tracks]) | |
f = "%%%us %%s" % max_artist_length | |
for (a, t), pc in tracks: | |
print f % (a, t) | |
def main(): | |
# Parse the command-line arguments. | |
(opts, playlists) = OPTIONS.parse_args() | |
# Load the tracks to be masked from the playlists. | |
tracks_xspf = sum([XSPF(fn).tracks for fn in playlists], []) | |
masks = [(t.creator, t.title) for t in set(tracks_xspf)] | |
# Retrieve the most listened tracks for the month. | |
when = datetime.datetime.strptime(opts.when, '%Y-%m').date() | |
tracks_pop = get_pop_tracks(opts.user, when) | |
# Do some playlist post-processing, so as to not be too boring. | |
tracks = mask_tracks(tracks_pop, masks) | |
mix = list(limit_artist_occurence(tracks, 2)) | |
# Only print the top 20 tracks. | |
num_tracks = min(20, len(mix)) | |
prettyprint_tracklist(mix[:num_tracks]) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment