Created
August 18, 2021 16:14
-
-
Save DhavalThkkar/5a5f488afb3c0b73baa5d9bbdd68d44f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
""" | |
Created on Wed Aug 18 18:08:03 2021 | |
@author: thakkar | |
""" | |
# Tekore imports | |
import tekore as tk | |
# Data analysis imports | |
import modin.pandas as md | |
import pandas as pd | |
import numpy as np | |
from tqdm import tqdm | |
from collections import OrderedDict | |
# Util functions | |
def check_presence(target : dict, source: dict, | |
target_col: str, source_col : str, col_nest: str = '', | |
is_num : bool = False): | |
if source_col in source: | |
if col_nest != '': | |
target[target_col] = source[source_col][col_nest] | |
else: | |
target[target_col] = source[source_col] | |
else: | |
if is_num: | |
target[target_col] = np.NaN | |
else: | |
target[target_col] = None | |
return target | |
# Load the files for running tekore | |
conf = tk.config_from_file('config.txt') | |
scope = tk.scope.playlist_read_private | |
token = tk.prompt_for_user_token(*conf, scope=scope) | |
# Data processing | |
data = md.read_csv('Artist-Genres-URIs.csv') | |
data['id'] = data['spotify_uri'].apply(lambda x: x.split(':')[-1]) | |
spotify = tk.Spotify(token, max_limits_on=True, chunked_on=True) | |
def extract_details(artists, source_id): | |
if len(artists) > 0: | |
detail_response_chunk = [] | |
for artist in artists: | |
response = artist.asbuiltin() | |
res_detail = OrderedDict() | |
res_detail = OrderedDict() | |
res_detail['source_artist_uri'] = 'spotify:artist:'+source_id | |
res_detail = check_presence(res_detail, response, 'artist_uri', 'uri') | |
res_detail = check_presence(res_detail, response, 'artist_name', 'name') | |
res_detail = check_presence(res_detail, response, 'id', 'id') | |
res_detail = check_presence(res_detail, response, 'genres', 'genres') | |
detail_response_chunk.append(res_detail) | |
detail_response_chunk = pd.DataFrame(detail_response_chunk) | |
else: | |
cols = ['source_artist_uri', 'artist_uri', 'artist_name', 'id', 'genres'] | |
detail_response_chunk = pd.DataFrame(columns = cols) | |
detail_response_chunk.loc[0, 'source_artist_uri'] = 'spotify:artist:'+source_id | |
detail_response_chunk.loc[0, 'artist_uri'] = None | |
detail_response_chunk.loc[0, 'artist_name'] = None | |
detail_response_chunk.loc[0, 'id'] = '' | |
detail_response_chunk.loc[0, 'genres'] = [] | |
return detail_response_chunk | |
# Artists | |
artist_dfs = [] | |
for id_val in tqdm(range(len(data))): | |
source_id = data['id'][id_val] | |
artists = spotify.artist_related_artists(source_id) | |
artist_dfs.append(extract_details(artists, source_id)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment