Skip to content

Instantly share code, notes, and snippets.

@imdkm
Last active February 15, 2024 20:17
Show Gist options
  • Save imdkm/a60247b59ff1881fa4bb8846a9b44c96 to your computer and use it in GitHub Desktop.
Save imdkm/a60247b59ff1881fa4bb8846a9b44c96 to your computer and use it in GitHub Desktop.
getting every song's meta data from a certain artist with Genius API
# this code is inspired by Jon Evans' project.
# http://www.jw.pe/blog/post/quantifying-sufjan-stevens-with-the-genius-api-and-nltk/
import requests, json
from time import sleep
# constant values.
BASE_URL = "https://api.genius.com"
CLIENT_ACCESS_TOKEN = "<YOUR TOKEN HERE>"
ARTIST_NAME = "<ANY ARTIST NAME>"
# send request and get response in json format.
def _get(path, params=None, headers=None):
# generate request URL
requrl = '/'.join([BASE_URL, path])
token = "Bearer {}".format(CLIENT_ACCESS_TOKEN)
if headers:
headers['Authorization'] = token
else:
headers = {"Authorization": token}
response = requests.get(url=requrl, params=params, headers=headers)
response.raise_for_status()
return response.json()
def get_artist_songs(artist_id):
# initialize variables & a list.
current_page = 1
next_page = True
songs = []
# main loop
while next_page:
path = "artists/{}/songs/".format(artist_id)
params = {'page': current_page}
data = _get(path=path, params=params)
page_songs = data['response']['songs']
if page_songs:
# add all the songs of current page,
# and increment current_page value for next loop.
songs += page_songs
current_page += 1
else:
# if page_songs is empty, quit.
next_page = False
# get all the song ids, excluding not-primary-artist songs.
songs = [song["id"] for song in songs
if song["primary_artist"]["id"] == artist_id]
return songs
def get_song_information(song_ids):
# initialize a dictionary.
song_list = {}
# main loop
for i, song_id in enumerate(song_ids):
print("id:" + str(song_id) + " start. ->")
path = "songs/{}".format(song_id)
data = _get(path=path)["response"]["song"]
song_list.update({
i: {
"title": data["title"],
"album": data["album"]["name"] if data["album"] else "<single>",
"release_date": data["release_date"] if data["release_date"] else "unidentified",
"featured_artists":
[feat["name"] if data["featured_artists"] else "" for feat in data["featured_artists"]],
"producer_artists":
[feat["name"] if data["producer_artists"] else "" for feat in data["producer_artists"]],
"writer_artists":
[feat["name"] if data["writer_artists"] else "" for feat in data["writer_artists"]],
"genius_track_id": song_id,
"genius_album_id": data["album"]["id"] if data["album"] else "none"}
})
print("-> id:" + str(song_id) + " is finished. \n")
return song_list
# # #
print("searching " + ARTIST_NAME + "'s artist id. \n")
# find artist id from given data.
find_id = _get("search", {'q': ARTIST_NAME})
for hit in find_id["response"]["hits"]:
if hit["result"]["primary_artist"]["name"] == ARTIST_NAME:
artist_id = hit["result"]["primary_artist"]["id"]
break
print("-> " + ARTIST_NAME + "'s id is " + str(artist_id) + "\n")
print("getting song ids. \n")
# get all song ids and make a list.
song_ids = get_artist_songs(artist_id)
with open("./" + ARTIST_NAME + " Genius Song IDs.text", "w") as f:
write(song_ids)
print(song_ids)
print("\n-> got all the song ids. take a break for a while \n")
sleep(30)
print("getting meta data of each song. \n")
# finally, make a full list of songs with meta data.
full_list_of_songs = get_song_information(song_ids)
print("-> Finished! Dump the data into json data. \n")
with open("./" + ARTIST_NAME + " Songs.json", "w") as f:
json.dump(full_list_of_songs, g)
print("-> Mission complete! Check it out!")
@alexandar-pixel
Copy link

Hey man!
This looks like just the kind of thing I was looking for. I'm going to implement some of your ideas into my code. Thank you so much!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment