Skip to content

Instantly share code, notes, and snippets.

@jeffiar
Created January 23, 2016 07:33
Show Gist options
  • Save jeffiar/db6a310b818e89a7f1c7 to your computer and use it in GitHub Desktop.
Save jeffiar/db6a310b818e89a7f1c7 to your computer and use it in GitHub Desktop.
download the lyrics for all of an artist's songs from genius
from bs4 import BeautifulSoup
import requests
import itertools
import sys
import os
def get_lyrics(url):
page = requests.get(url)
soup = BeautifulSoup(page.text, "lxml")
lyrics = soup.lyrics.getText()
return lyrics.encode('ascii', 'ignore')
def get_songs(artist_id):
url = 'http://api.genius.com/artists/%d/songs' % artist_id
access_token = 'sJN8Itm9GbO3EsIZjObxk04wd4lzv6i8Kt-NiARvZ1XuPkos5HtMR6l3E5uFqLgb'
print "Fetching songs..."
for i in itertools.count(1):
sys.stdout.write('.')
sys.stdout.flush()
params = {'access_token' : access_token, 'page' : i}
r = requests.get(url, params=params)
songs = r.json()['response']['songs']
if not songs:
print
print "Songs fetched."
return
for song in songs:
yield song
artist_id = 123 # Dre's id
song_urls = [song['url'] for song in get_songs(artist_id)]
print
print "Writing %d files..." % len(song_urls)
if not os.path.exists('data'):
os.makedirs('data')
for url in song_urls:
lyrics = get_lyrics(url)
song_name = url[18:]
filename = "data/" + song_name
print song_name
with open(filename, 'w') as f:
f.write(lyrics)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment