jpurrutia/get_album_data_using_artist_id.py

## get_album_data_using_artist_id.py
import psycopg2
from psycopg2 import extras
# ? might import this upstream in utils if I add the distinct function
# have to think about this and the dependency relationships
import pandas as pd
from database.db import local_dbc
from utils import get_distinct_artist_id

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

uri_prefix = 'spotify:artist:'
spotify = spotify = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials())

# open postgres connection
d = local_dbc()

# get distinct artist list
rows = get_distinct_artist_id()
#breakpoint()
# close initial db connection
d.close()

#distinct_df = pd.DataFrame(distinct_artists,rows )
distinct_df = pd.DataFrame(rows, columns=['artist_name'])


def get_artist_album_data(url_prefix, df, col, idx):
    results = spotify.artist_albums(url_prefix+df[col][idx], album_type='album')
    albums = results['items']
    while results['next']:
        results = spotify.next(results)
        albums.extend(results['items'])
    return albums


# im working on parallelizing this - I think I need to breakdown the functions like the PBP
# test it out

album_name_list = []
album_id_list = []
album_url_list = []
album_release_date_list = []
album_name_list = []
album_release_date_precision_list = []
album_total_tracks_list = []
album_artist_id_list = []
album_type_list = []
album_external_url_list = []
album_images_list = []

#iterate over list of artists names
for idx, val in enumerate(distinct_df['artist_name']):
    #get albums for each artist

    # returns a list
    albums = get_artist_album_data(uri_prefix, distinct_df, 'artist_name', idx)

    for album in albums:
        album_name_list.append(album['name'])
        album_id_list.append(album['id'])
        album_url_list.append(album['uri'])
        album_artist_id_list.append(album['artists'][0]['id'])
        album_release_date_list.append(album['release_date'])
        album_release_date_precision_list.append(album['release_date_precision'])
        album_total_tracks_list.append(album['total_tracks'])
        album_type_list.append(album['type'])
        album_external_url_list.append(album['external_urls']['spotify'])
        try:
            album_images_list.append(album['images'][0]['url'])
        except IndexError:
            album_images_list.append('No Image')
        # you should never do this (except: pass)
            pass

#I'm missing image
my_dict = {'album_id': album_id_list,
           'album_name': album_name_list,
           'album_url': album_url_list,
           'album_release_date': album_release_date_list,
           'album_release_date_precision': album_release_date_precision_list,
           'album_total_tracks': album_total_tracks_list,
           'album_artist_id': album_artist_id_list,
           'album_type': album_type_list,
           'album_external_url': album_external_url_list,
           'album_image_url': album_images_list}

df = pd.DataFrame(my_dict)

d = local_dbc()

cur = d.cursor()
table = 'recordfi.spotify_albums'
cols = ','.join(list(df.columns))
tuples = [tuple(x) for x in df.to_numpy()]
sql = "INSERT INTO %s(%s) VALUES %%s" % (table, cols)

extras.execute_values(cur, sql, tuples)
#print('Third Loop exited...')
#print(datetime.now() - begin_time)

d.commit()
d.close()
	import psycopg2
	from psycopg2 import extras
	# ? might import this upstream in utils if I add the distinct function
	# have to think about this and the dependency relationships
	import pandas as pd
	from database.db import local_dbc
	from utils import get_distinct_artist_id

	import spotipy
	from spotipy.oauth2 import SpotifyClientCredentials

	uri_prefix = 'spotify:artist:'
	spotify = spotify = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials())

	# open postgres connection
	d = local_dbc()

	# get distinct artist list
	rows = get_distinct_artist_id()
	#breakpoint()
	# close initial db connection
	d.close()

	#distinct_df = pd.DataFrame(distinct_artists,rows )
	distinct_df = pd.DataFrame(rows, columns=['artist_name'])


	def get_artist_album_data(url_prefix, df, col, idx):
	results = spotify.artist_albums(url_prefix+df[col][idx], album_type='album')
	albums = results['items']
	while results['next']:
	results = spotify.next(results)
	albums.extend(results['items'])
	return albums


	# im working on parallelizing this - I think I need to breakdown the functions like the PBP
	# test it out

	album_name_list = []
	album_id_list = []
	album_url_list = []
	album_release_date_list = []
	album_name_list = []
	album_release_date_precision_list = []
	album_total_tracks_list = []
	album_artist_id_list = []
	album_type_list = []
	album_external_url_list = []
	album_images_list = []

	#iterate over list of artists names
	for idx, val in enumerate(distinct_df['artist_name']):
	#get albums for each artist

	# returns a list
	albums = get_artist_album_data(uri_prefix, distinct_df, 'artist_name', idx)

	for album in albums:
	album_name_list.append(album['name'])
	album_id_list.append(album['id'])
	album_url_list.append(album['uri'])
	album_artist_id_list.append(album['artists'][0]['id'])
	album_release_date_list.append(album['release_date'])
	album_release_date_precision_list.append(album['release_date_precision'])
	album_total_tracks_list.append(album['total_tracks'])
	album_type_list.append(album['type'])
	album_external_url_list.append(album['external_urls']['spotify'])
	try:
	album_images_list.append(album['images'][0]['url'])
	except IndexError:
	album_images_list.append('No Image')
	# you should never do this (except: pass)
	pass

	#I'm missing image
	my_dict = {'album_id': album_id_list,
	'album_name': album_name_list,
	'album_url': album_url_list,
	'album_release_date': album_release_date_list,
	'album_release_date_precision': album_release_date_precision_list,
	'album_total_tracks': album_total_tracks_list,
	'album_artist_id': album_artist_id_list,
	'album_type': album_type_list,
	'album_external_url': album_external_url_list,
	'album_image_url': album_images_list}

	df = pd.DataFrame(my_dict)

	d = local_dbc()

	cur = d.cursor()
	table = 'recordfi.spotify_albums'
	cols = ','.join(list(df.columns))
	tuples = [tuple(x) for x in df.to_numpy()]
	sql = "INSERT INTO %s(%s) VALUES %%s" % (table, cols)

	extras.execute_values(cur, sql, tuples)
	#print('Third Loop exited...')
	#print(datetime.now() - begin_time)

	d.commit()
	d.close()