Skip to content

Instantly share code, notes, and snippets.

@plamere
Last active August 29, 2015 14:04
Show Gist options
  • Save plamere/dba05ca4736e5049a20a to your computer and use it in GitHub Desktop.
Save plamere/dba05ca4736e5049a20a to your computer and use it in GitHub Desktop.
parses outside lands lineup and generates a json of artists along with Spotify, Echo Nest and Rdio IDs
import sys
from bs4 import BeautifulSoup
import json
import pyen
en = pyen.Pyen()
def get_fid(artist, idspace):
if 'foreign_ids' in artist and len(artist['foreign_ids']) > 0:
for fids in artist['foreign_ids']:
if fids['catalog'] == idspace:
return fids['foreign_id']
return None
def en_artist_lookup(name):
response = en.get('artist/search', name=name,
bucket=['id:spotify', 'id:rdio-US'])
artists = response['artists']
if len(artists) > 0:
artist = artists[0]
print artist
enid = artist['id']
spid = get_fid(artist, 'spotify')
rdio = get_fid(artist, 'rdio-US')
if rdio:
rdio = rdio.split(':')[2]
print name, '/', artist['name'], enid, spid, rdio
ids = {
'echonest':enid,
'spotify':spid,
'rdio':rdio
}
return ids
return {}
if __name__ == '__main__':
lineup = []
f = open(sys.argv[1])
html_doc = f.read()
soup = BeautifulSoup(html_doc)
for a in soup.find_all('a', class_='band'):
name = a.text.strip()
ids = en_artist_lookup(name)
artist = {
'artist' : name,
'link' : a['href'],
'ids' : ids
}
lineup.append(artist)
out = open('lineup_2014.json', 'w')
print >> out, json.dumps(lineup, indent=4)
out.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment