Skip to content

Instantly share code, notes, and snippets.

@jpurrutia
Created January 21, 2022 18:56
Show Gist options
  • Save jpurrutia/4745291b63059c627b6c5e0d7e0dde76 to your computer and use it in GitHub Desktop.
Save jpurrutia/4745291b63059c627b6c5e0d7e0dde76 to your computer and use it in GitHub Desktop.
import psycopg2
from psycopg2 import extras
# ? might import this upstream in utils if I add the distinct function
# have to think about this and the dependency relationships
import pandas as pd
from database.db import local_dbc
from utils import get_distinct_artist_id
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
uri_prefix = 'spotify:artist:'
spotify = spotify = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials())
# open postgres connection
d = local_dbc()
# get distinct artist list
rows = get_distinct_artist_id()
#breakpoint()
# close initial db connection
d.close()
#distinct_df = pd.DataFrame(distinct_artists,rows )
distinct_df = pd.DataFrame(rows, columns=['artist_name'])
def get_artist_album_data(url_prefix, df, col, idx):
results = spotify.artist_albums(url_prefix+df[col][idx], album_type='album')
albums = results['items']
while results['next']:
results = spotify.next(results)
albums.extend(results['items'])
return albums
# im working on parallelizing this - I think I need to breakdown the functions like the PBP
# test it out
album_name_list = []
album_id_list = []
album_url_list = []
album_release_date_list = []
album_name_list = []
album_release_date_precision_list = []
album_total_tracks_list = []
album_artist_id_list = []
album_type_list = []
album_external_url_list = []
album_images_list = []
#iterate over list of artists names
for idx, val in enumerate(distinct_df['artist_name']):
#get albums for each artist
# returns a list
albums = get_artist_album_data(uri_prefix, distinct_df, 'artist_name', idx)
for album in albums:
album_name_list.append(album['name'])
album_id_list.append(album['id'])
album_url_list.append(album['uri'])
album_artist_id_list.append(album['artists'][0]['id'])
album_release_date_list.append(album['release_date'])
album_release_date_precision_list.append(album['release_date_precision'])
album_total_tracks_list.append(album['total_tracks'])
album_type_list.append(album['type'])
album_external_url_list.append(album['external_urls']['spotify'])
try:
album_images_list.append(album['images'][0]['url'])
except IndexError:
album_images_list.append('No Image')
# you should never do this (except: pass)
pass
#I'm missing image
my_dict = {'album_id': album_id_list,
'album_name': album_name_list,
'album_url': album_url_list,
'album_release_date': album_release_date_list,
'album_release_date_precision': album_release_date_precision_list,
'album_total_tracks': album_total_tracks_list,
'album_artist_id': album_artist_id_list,
'album_type': album_type_list,
'album_external_url': album_external_url_list,
'album_image_url': album_images_list}
df = pd.DataFrame(my_dict)
d = local_dbc()
cur = d.cursor()
table = 'recordfi.spotify_albums'
cols = ','.join(list(df.columns))
tuples = [tuple(x) for x in df.to_numpy()]
sql = "INSERT INTO %s(%s) VALUES %%s" % (table, cols)
extras.execute_values(cur, sql, tuples)
#print('Third Loop exited...')
#print(datetime.now() - begin_time)
d.commit()
d.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment