Skip to content

Instantly share code, notes, and snippets.

@DhavalThkkar
Created August 18, 2021 16:14
Show Gist options
  • Save DhavalThkkar/5a5f488afb3c0b73baa5d9bbdd68d44f to your computer and use it in GitHub Desktop.
Save DhavalThkkar/5a5f488afb3c0b73baa5d9bbdd68d44f to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Aug 18 18:08:03 2021
@author: thakkar
"""
# Tekore imports
import tekore as tk
# Data analysis imports
import modin.pandas as md
import pandas as pd
import numpy as np
from tqdm import tqdm
from collections import OrderedDict
# Util functions
def check_presence(target : dict, source: dict,
target_col: str, source_col : str, col_nest: str = '',
is_num : bool = False):
if source_col in source:
if col_nest != '':
target[target_col] = source[source_col][col_nest]
else:
target[target_col] = source[source_col]
else:
if is_num:
target[target_col] = np.NaN
else:
target[target_col] = None
return target
# Load the files for running tekore
conf = tk.config_from_file('config.txt')
scope = tk.scope.playlist_read_private
token = tk.prompt_for_user_token(*conf, scope=scope)
# Data processing
data = md.read_csv('Artist-Genres-URIs.csv')
data['id'] = data['spotify_uri'].apply(lambda x: x.split(':')[-1])
spotify = tk.Spotify(token, max_limits_on=True, chunked_on=True)
def extract_details(artists, source_id):
if len(artists) > 0:
detail_response_chunk = []
for artist in artists:
response = artist.asbuiltin()
res_detail = OrderedDict()
res_detail = OrderedDict()
res_detail['source_artist_uri'] = 'spotify:artist:'+source_id
res_detail = check_presence(res_detail, response, 'artist_uri', 'uri')
res_detail = check_presence(res_detail, response, 'artist_name', 'name')
res_detail = check_presence(res_detail, response, 'id', 'id')
res_detail = check_presence(res_detail, response, 'genres', 'genres')
detail_response_chunk.append(res_detail)
detail_response_chunk = pd.DataFrame(detail_response_chunk)
else:
cols = ['source_artist_uri', 'artist_uri', 'artist_name', 'id', 'genres']
detail_response_chunk = pd.DataFrame(columns = cols)
detail_response_chunk.loc[0, 'source_artist_uri'] = 'spotify:artist:'+source_id
detail_response_chunk.loc[0, 'artist_uri'] = None
detail_response_chunk.loc[0, 'artist_name'] = None
detail_response_chunk.loc[0, 'id'] = ''
detail_response_chunk.loc[0, 'genres'] = []
return detail_response_chunk
# Artists
artist_dfs = []
for id_val in tqdm(range(len(data))):
source_id = data['id'][id_val]
artists = spotify.artist_related_artists(source_id)
artist_dfs.append(extract_details(artists, source_id))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment