imdkm/GeniusMetaData.py

## GeniusMetaData.py
# this code is inspired by Jon Evans' project.
# http://www.jw.pe/blog/post/quantifying-sufjan-stevens-with-the-genius-api-and-nltk/

import requests, json
from time import sleep

# constant values.
BASE_URL = "https://api.genius.com"
CLIENT_ACCESS_TOKEN = "<YOUR TOKEN HERE>"
ARTIST_NAME = "<ANY ARTIST NAME>"

# send request and get response in json format.
def _get(path, params=None, headers=None):

    # generate request URL
    requrl = '/'.join([BASE_URL, path])
    token = "Bearer {}".format(CLIENT_ACCESS_TOKEN)
    if headers:
        headers['Authorization'] = token
    else:
        headers = {"Authorization": token}

    response = requests.get(url=requrl, params=params, headers=headers)
    response.raise_for_status()

    return response.json()

def get_artist_songs(artist_id):
    # initialize variables & a list.
    current_page = 1
    next_page = True
    songs = []

    # main loop
    while next_page:

        path = "artists/{}/songs/".format(artist_id)
        params = {'page': current_page}
        data = _get(path=path, params=params)

        page_songs = data['response']['songs']

        if page_songs:
            # add all the songs of current page,
            # and increment current_page value for next loop.
            songs += page_songs
            current_page += 1
        else:
            # if page_songs is empty, quit.
            next_page = False

    # get all the song ids, excluding not-primary-artist songs.
    songs = [song["id"] for song in songs
             if song["primary_artist"]["id"] == artist_id]

    return songs

def get_song_information(song_ids):
    # initialize a dictionary.
    song_list = {}

    # main loop
    for i, song_id in enumerate(song_ids):
        print("id:" + str(song_id) + " start. ->")

        path = "songs/{}".format(song_id)
        data = _get(path=path)["response"]["song"]

        song_list.update({
        i: {
            "title": data["title"],
            "album": data["album"]["name"] if data["album"] else "<single>",
            "release_date": data["release_date"] if data["release_date"] else "unidentified",
            "featured_artists":
                [feat["name"] if data["featured_artists"] else "" for feat in data["featured_artists"]],
            "producer_artists":
                [feat["name"] if data["producer_artists"] else "" for feat in data["producer_artists"]],
            "writer_artists":
                [feat["name"] if data["writer_artists"] else "" for feat in data["writer_artists"]],
            "genius_track_id": song_id,
            "genius_album_id": data["album"]["id"] if data["album"] else "none"}
        })

        print("-> id:" + str(song_id) + " is finished. \n")
    return song_list

# # #

print("searching " + ARTIST_NAME + "'s artist id. \n")

# find artist id from given data.
find_id = _get("search", {'q': ARTIST_NAME})
for hit in find_id["response"]["hits"]:
   if hit["result"]["primary_artist"]["name"] == ARTIST_NAME:
       artist_id = hit["result"]["primary_artist"]["id"]
       break

print("-> " + ARTIST_NAME + "'s id is " + str(artist_id) + "\n")

print("getting song ids. \n")

# get all song ids and make a list.
song_ids = get_artist_songs(artist_id)

with open("./" + ARTIST_NAME + " Genius Song IDs.text", "w") as f:
    write(song_ids)

print(song_ids)
print("\n-> got all the song ids. take a break for a while \n")

sleep(30)

print("getting meta data of each song. \n")

# finally, make a full list of songs with meta data.
full_list_of_songs = get_song_information(song_ids)

print("-> Finished! Dump the data into json data. \n")

with open("./" + ARTIST_NAME + " Songs.json", "w") as f:
    json.dump(full_list_of_songs, g)

print("-> Mission complete! Check it out!")
	# this code is inspired by Jon Evans' project.
	# http://www.jw.pe/blog/post/quantifying-sufjan-stevens-with-the-genius-api-and-nltk/

	import requests, json
	from time import sleep

	# constant values.
	BASE_URL = "https://api.genius.com"
	CLIENT_ACCESS_TOKEN = "<YOUR TOKEN HERE>"
	ARTIST_NAME = "<ANY ARTIST NAME>"

	# send request and get response in json format.
	def _get(path, params=None, headers=None):

	# generate request URL
	requrl = '/'.join([BASE_URL, path])
	token = "Bearer {}".format(CLIENT_ACCESS_TOKEN)
	if headers:
	headers['Authorization'] = token
	else:
	headers = {"Authorization": token}

	response = requests.get(url=requrl, params=params, headers=headers)
	response.raise_for_status()

	return response.json()

	def get_artist_songs(artist_id):
	# initialize variables & a list.
	current_page = 1
	next_page = True
	songs = []

	# main loop
	while next_page:

	path = "artists/{}/songs/".format(artist_id)
	params = {'page': current_page}
	data = _get(path=path, params=params)

	page_songs = data['response']['songs']

	if page_songs:
	# add all the songs of current page,
	# and increment current_page value for next loop.
	songs += page_songs
	current_page += 1
	else:
	# if page_songs is empty, quit.
	next_page = False

	# get all the song ids, excluding not-primary-artist songs.
	songs = [song["id"] for song in songs
	if song["primary_artist"]["id"] == artist_id]

	return songs

	def get_song_information(song_ids):
	# initialize a dictionary.
	song_list = {}

	# main loop
	for i, song_id in enumerate(song_ids):
	print("id:" + str(song_id) + " start. ->")

	path = "songs/{}".format(song_id)
	data = _get(path=path)["response"]["song"]

	song_list.update({
	i: {
	"title": data["title"],
	"album": data["album"]["name"] if data["album"] else "<single>",
	"release_date": data["release_date"] if data["release_date"] else "unidentified",
	"featured_artists":
	[feat["name"] if data["featured_artists"] else "" for feat in data["featured_artists"]],
	"producer_artists":
	[feat["name"] if data["producer_artists"] else "" for feat in data["producer_artists"]],
	"writer_artists":
	[feat["name"] if data["writer_artists"] else "" for feat in data["writer_artists"]],
	"genius_track_id": song_id,
	"genius_album_id": data["album"]["id"] if data["album"] else "none"}
	})

	print("-> id:" + str(song_id) + " is finished. \n")
	return song_list

	# # #

	print("searching " + ARTIST_NAME + "'s artist id. \n")

	# find artist id from given data.
	find_id = _get("search", {'q': ARTIST_NAME})
	for hit in find_id["response"]["hits"]:
	if hit["result"]["primary_artist"]["name"] == ARTIST_NAME:
	artist_id = hit["result"]["primary_artist"]["id"]
	break

	print("-> " + ARTIST_NAME + "'s id is " + str(artist_id) + "\n")

	print("getting song ids. \n")

	# get all song ids and make a list.
	song_ids = get_artist_songs(artist_id)

	with open("./" + ARTIST_NAME + " Genius Song IDs.text", "w") as f:
	write(song_ids)

	print(song_ids)
	print("\n-> got all the song ids. take a break for a while \n")

	sleep(30)

	print("getting meta data of each song. \n")

	# finally, make a full list of songs with meta data.
	full_list_of_songs = get_song_information(song_ids)

	print("-> Finished! Dump the data into json data. \n")

	with open("./" + ARTIST_NAME + " Songs.json", "w") as f:
	json.dump(full_list_of_songs, g)

	print("-> Mission complete! Check it out!")