Skip to content

Instantly share code, notes, and snippets.

@reiddraper
Forked from jwheare/fetch.py
Created April 24, 2010 21:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save reiddraper/377977 to your computer and use it in GitHub Desktop.
Save reiddraper/377977 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
"""
Fetch all a user's Last.fm scrobbles by paging through their recent tracks
Usage: ./fetch.py <username> [<start_page> [<end_page>]]
Be aware: You may end up with duplicated data if the user is scrobbling
when you fetch for tracks. Make sure you check for dupes when you process
the XML later
"""
import urllib, sys, os, errno
# Change these if you like
OUTDIR = '../data/_ignore/scrobbles/'
API_KEY = 'b25b959554ed76058ac220b7b2e0a026' # Good ole test key
def get_page(user, page = 1):
url = 'http://ws.audioscrobbler.com/2.0/?%s' % urllib.urlencode(dict(
method = 'user.getrecenttracks',
user = user,
api_key = API_KEY,
limit = 200,
page = page
))
try:
tracks = urllib.urlopen(url)
except IOError:
# Try once more in case of flaky connections
tracks = urllib.urlopen(url)
return tracks.read()
args = sys.argv[1:]
user = args[0]
# Get start and end from arguments
if len(args) > 1:
start = int(args[1])
else:
start = 1
if len(args) > 2:
end = int(args[2])
else:
# Fetch a page and use the <recenttracks totalPages> attribute
import xml.etree.ElementTree as ET
tree = ET.fromstring(get_page(user))
end = int(tree.find('recenttracks').get('totalPages'))
print 'fetching pages %s till %s for %s' % (start, end, user)
# Create the output directory if needed
outdir = '%s%s' % (OUTDIR, user)
try:
os.makedirs(outdir)
except OSError, exc: # Python >2.5
if exc.errno == errno.EEXIST:
pass
else: raise
# Loop through the pages, fetch XML and write to disk
for page in range(start, end + 1):
print '%s: %s of %s' % (user, page, end)
out = open('%s/%s.xml' % (outdir, page), 'w')
out.write(get_page(user, page))
out.close()
print 'done fetching for %s' % user
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment