Created
January 21, 2022 18:56
-
-
Save jpurrutia/4745291b63059c627b6c5e0d7e0dde76 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import psycopg2 | |
from psycopg2 import extras | |
# ? might import this upstream in utils if I add the distinct function | |
# have to think about this and the dependency relationships | |
import pandas as pd | |
from database.db import local_dbc | |
from utils import get_distinct_artist_id | |
import spotipy | |
from spotipy.oauth2 import SpotifyClientCredentials | |
uri_prefix = 'spotify:artist:' | |
spotify = spotify = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials()) | |
# open postgres connection | |
d = local_dbc() | |
# get distinct artist list | |
rows = get_distinct_artist_id() | |
#breakpoint() | |
# close initial db connection | |
d.close() | |
#distinct_df = pd.DataFrame(distinct_artists,rows ) | |
distinct_df = pd.DataFrame(rows, columns=['artist_name']) | |
def get_artist_album_data(url_prefix, df, col, idx): | |
results = spotify.artist_albums(url_prefix+df[col][idx], album_type='album') | |
albums = results['items'] | |
while results['next']: | |
results = spotify.next(results) | |
albums.extend(results['items']) | |
return albums | |
# im working on parallelizing this - I think I need to breakdown the functions like the PBP | |
# test it out | |
album_name_list = [] | |
album_id_list = [] | |
album_url_list = [] | |
album_release_date_list = [] | |
album_name_list = [] | |
album_release_date_precision_list = [] | |
album_total_tracks_list = [] | |
album_artist_id_list = [] | |
album_type_list = [] | |
album_external_url_list = [] | |
album_images_list = [] | |
#iterate over list of artists names | |
for idx, val in enumerate(distinct_df['artist_name']): | |
#get albums for each artist | |
# returns a list | |
albums = get_artist_album_data(uri_prefix, distinct_df, 'artist_name', idx) | |
for album in albums: | |
album_name_list.append(album['name']) | |
album_id_list.append(album['id']) | |
album_url_list.append(album['uri']) | |
album_artist_id_list.append(album['artists'][0]['id']) | |
album_release_date_list.append(album['release_date']) | |
album_release_date_precision_list.append(album['release_date_precision']) | |
album_total_tracks_list.append(album['total_tracks']) | |
album_type_list.append(album['type']) | |
album_external_url_list.append(album['external_urls']['spotify']) | |
try: | |
album_images_list.append(album['images'][0]['url']) | |
except IndexError: | |
album_images_list.append('No Image') | |
# you should never do this (except: pass) | |
pass | |
#I'm missing image | |
my_dict = {'album_id': album_id_list, | |
'album_name': album_name_list, | |
'album_url': album_url_list, | |
'album_release_date': album_release_date_list, | |
'album_release_date_precision': album_release_date_precision_list, | |
'album_total_tracks': album_total_tracks_list, | |
'album_artist_id': album_artist_id_list, | |
'album_type': album_type_list, | |
'album_external_url': album_external_url_list, | |
'album_image_url': album_images_list} | |
df = pd.DataFrame(my_dict) | |
d = local_dbc() | |
cur = d.cursor() | |
table = 'recordfi.spotify_albums' | |
cols = ','.join(list(df.columns)) | |
tuples = [tuple(x) for x in df.to_numpy()] | |
sql = "INSERT INTO %s(%s) VALUES %%s" % (table, cols) | |
extras.execute_values(cur, sql, tuples) | |
#print('Third Loop exited...') | |
#print(datetime.now() - begin_time) | |
d.commit() | |
d.close() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment