silviutofan92/playlistplugin.py

## playlistplugin.py
# Code for custom code recipe recipe from notebook Spotify Plugin (imported from a Python recipe)

# To finish creating your custom recipe from your original PySpark recipe, you need to:
#  - Declare the input and output roles in recipe.json
#  - Replace the dataset names by roles access in your code
#  - Declare, if any, the params of your custom recipe in recipe.json
#  - Replace the hardcoded params values by acccess to the configuration map

# See sample code below for how to do that.
# The code of your original recipe is included afterwards for convenience.
# Please also see the "recipe.json" file for more information.

# Inputs and outputs are defined by roles. In the recipe's I/O tab, the user can associate one
# or more dataset to each input and output role.
# Roles need to be defined in recipe.json, in the inputRoles and outputRoles fields.

# For outputs, the process is the same:
#output_A_names = get_output_names_for_role('main_output')
#output_A_datasets = [dataiku.Dataset(name) for name in output_A_names]

# The configuration consists of the parameters set up by the user in the recipe Settings tab.

# Parameters must be added to the recipe.json file so that DSS can prompt the user for values in
# the Settings tab of the recipe. The field "params" holds a list of all the params for wich the
# user will be prompted for values.

# The configuration is simply a map of parameters, and retrieving the value of one of them is simply:
#my_variable = get_recipe_config()['parameter_name']

# For optional parameters, you should provide a default value in case the parameter is not present:
#my_variable = get_recipe_config().get('parameter_name', None)

# Note about typing:
# The configuration of the recipe is passed through a JSON object
# As such, INT parameters of the recipe are received in the get_recipe_config() dict as a Python float.
# If you absolutely require a Python int, use int(get_recipe_config()["my_int_param"])


#############################
# Your original recipe
#############################

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
import dataiku
from dataiku.customrecipe import *
from dataiku import pandasutils as pdu
import pandas as pd
import requests
import base64
import six

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
#Define API keys
client_id = get_recipe_config()['PLUGIN_CLIENT_ID']
client_secret = get_recipe_config()['PLUGIN_CLIENT_SECRET']
authorization_base_url = 'https://accounts.spotify.com/authorize'
token_url = 'https://accounts.spotify.com/api/token'

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
#Create requests session
session = requests.Session()

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
#Define function to create headers
def _make_authorization_headers(client_id, client_secret):
    auth_header = base64.b64encode(six.text_type(client_id + ':' + client_secret).encode('ascii'))
    return {'Authorization': 'Basic %s' % auth_header.decode('ascii')}

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
#Get API token
payload = { 'grant_type': 'client_credentials'}
headers = _make_authorization_headers(client_id, client_secret)
response = requests.post(token_url, data=payload, headers=headers, verify=True)
token_info = response.json()

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
#Create request headers
headers = {'Authorization': 'Bearer {0}'.format(token_info['access_token'])}

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
#Define requests

id = ''
ids = ''

#Get list of categories
request_url1 = 'https://api.spotify.com/v1/browse/categories' #id = 'holidays'
#Get playlists for that category
request_url2 = 'https://api.spotify.com/v1/browse/categories/holidays/playlists'
#Get an artist's related artists
request_url3 = 'https://api.spotify.com/v1/artists/%s/related-artists'
#Get audio analysis for 1 track
request_url4 = 'https://api.spotify.com/v1/audio-analysis/%s'
#Get audio features for multiple tracks
request_url5 = 'https://api.spotify.com/v1/audio-features/?ids=%s'
#Get track info
request_url6 = 'https://api.spotify.com/v1/tracks/%s'
#Get all tracks in a playlist
request_url7 = 'https://api.spotify.com/v1/users/%s/playlists/%s/tracks'

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
PLUGIN_ITEM_IDENT = get_recipe_config()['PLUGIN_ITEM_IDENT']
PLUGIN_SPOTIFY_USER = get_recipe_config()['PLUGIN_SPOTIFY_USER']
PLUGIN_SPOTIFY_ITEM = get_recipe_config()['PLUGIN_SPOTIFY_ITEM']

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
#Create dataframe to store our Christmas songs:
df1 = pd.DataFrame()

### Define plugin vars
pno = PLUGIN_ITEM_IDENT

#Retrieve songs from playlist
r = session.request('GET', request_url7 % (PLUGIN_SPOTIFY_USER, PLUGIN_SPOTIFY_ITEM), headers=headers)

#Create track id url list
track_ids = ''

#Retrieve name, ID, artist
for track in r.json()['items']:

    name = track['track']['name']
    id = track['track']['id']
    track_ids = track_ids+str(id)+','
    artist = track['track']['album']['artists'][0]['name']
    artist_id = track['track']['album']['artists'][0]['id']

    #Retrieve an artist's related artists
    r = session.request('GET', request_url3 % (artist_id), headers=headers)
    if r.json()['artists'] != []:
        related_artist = r.json()['artists'][0]['name']
    else:
        related_artist = ''

    app = pd.DataFrame({'song_col': [name], 'artist_col': [artist], 'id_col': [id],
                       'related_artist': [related_artist], 'myid': [pno]})
    df1 = df1.append(app)


df2 = pd.DataFrame()

#Get audio analysis
r = session.request('GET', request_url5 % (track_ids[:-1]), headers=headers).json()['audio_features']

for song in r:

    danceability = song['danceability']
    energy = song['energy']
    key = song['key']
    loudness = song['loudness']
    mode = song['mode']
    speechiness = song['speechiness']
    acousticness = song['acousticness']
    instrumentalness = song['instrumentalness']
    liveness = song['liveness']
    valence = song['valence']
    tempo = song['tempo']
    time_signature = song['time_signature']
    duration_ms = song['duration_ms']

    #Append to dataframe
    app = pd.DataFrame({'danceability': [danceability],
                       'energy': [energy], 'key': [key], 'loudness': [loudness], 'mode': [mode],
                        'speechiness': [speechiness], 'acousticness': [acousticness],
                        'instrumentalness': [instrumentalness], 'liveness': [liveness], 'valence': [valence],
                       'tempo': [tempo], 'time_signature': [time_signature], 'duration_ms': [duration_ms]})
    df2 = df2.append(app)

df = pd.concat([df1, df2], axis=1)

# Write the output to the output dataset
main_output_name = get_output_names_for_role('main_output')[0]
output_dataset =  dataiku.Dataset(main_output_name)
output_dataset.write_with_schema(df)
	# Code for custom code recipe recipe from notebook Spotify Plugin (imported from a Python recipe)

	# To finish creating your custom recipe from your original PySpark recipe, you need to:
	# - Declare the input and output roles in recipe.json
	# - Replace the dataset names by roles access in your code
	# - Declare, if any, the params of your custom recipe in recipe.json
	# - Replace the hardcoded params values by acccess to the configuration map

	# See sample code below for how to do that.
	# The code of your original recipe is included afterwards for convenience.
	# Please also see the "recipe.json" file for more information.

	# Inputs and outputs are defined by roles. In the recipe's I/O tab, the user can associate one
	# or more dataset to each input and output role.
	# Roles need to be defined in recipe.json, in the inputRoles and outputRoles fields.

	# For outputs, the process is the same:
	#output_A_names = get_output_names_for_role('main_output')
	#output_A_datasets = [dataiku.Dataset(name) for name in output_A_names]

	# The configuration consists of the parameters set up by the user in the recipe Settings tab.

	# Parameters must be added to the recipe.json file so that DSS can prompt the user for values in
	# the Settings tab of the recipe. The field "params" holds a list of all the params for wich the
	# user will be prompted for values.

	# The configuration is simply a map of parameters, and retrieving the value of one of them is simply:
	#my_variable = get_recipe_config()['parameter_name']

	# For optional parameters, you should provide a default value in case the parameter is not present:
	#my_variable = get_recipe_config().get('parameter_name', None)

	# Note about typing:
	# The configuration of the recipe is passed through a JSON object
	# As such, INT parameters of the recipe are received in the get_recipe_config() dict as a Python float.
	# If you absolutely require a Python int, use int(get_recipe_config()["my_int_param"])


	#############################
	# Your original recipe
	#############################

	# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
	import dataiku
	from dataiku.customrecipe import *
	from dataiku import pandasutils as pdu
	import pandas as pd
	import requests
	import base64
	import six

	# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
	#Define API keys
	client_id = get_recipe_config()['PLUGIN_CLIENT_ID']
	client_secret = get_recipe_config()['PLUGIN_CLIENT_SECRET']
	authorization_base_url = 'https://accounts.spotify.com/authorize'
	token_url = 'https://accounts.spotify.com/api/token'

	# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
	#Create requests session
	session = requests.Session()

	# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
	#Define function to create headers
	def _make_authorization_headers(client_id, client_secret):
	auth_header = base64.b64encode(six.text_type(client_id + ':' + client_secret).encode('ascii'))
	return {'Authorization': 'Basic %s' % auth_header.decode('ascii')}

	# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
	#Get API token
	payload = { 'grant_type': 'client_credentials'}
	headers = _make_authorization_headers(client_id, client_secret)
	response = requests.post(token_url, data=payload, headers=headers, verify=True)
	token_info = response.json()

	# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
	#Create request headers
	headers = {'Authorization': 'Bearer {0}'.format(token_info['access_token'])}

	# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
	#Define requests

	id = ''
	ids = ''

	#Get list of categories
	request_url1 = 'https://api.spotify.com/v1/browse/categories' #id = 'holidays'
	#Get playlists for that category
	request_url2 = 'https://api.spotify.com/v1/browse/categories/holidays/playlists'
	#Get an artist's related artists
	request_url3 = 'https://api.spotify.com/v1/artists/%s/related-artists'
	#Get audio analysis for 1 track
	request_url4 = 'https://api.spotify.com/v1/audio-analysis/%s'
	#Get audio features for multiple tracks
	request_url5 = 'https://api.spotify.com/v1/audio-features/?ids=%s'
	#Get track info
	request_url6 = 'https://api.spotify.com/v1/tracks/%s'
	#Get all tracks in a playlist
	request_url7 = 'https://api.spotify.com/v1/users/%s/playlists/%s/tracks'

	# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
	PLUGIN_ITEM_IDENT = get_recipe_config()['PLUGIN_ITEM_IDENT']
	PLUGIN_SPOTIFY_USER = get_recipe_config()['PLUGIN_SPOTIFY_USER']
	PLUGIN_SPOTIFY_ITEM = get_recipe_config()['PLUGIN_SPOTIFY_ITEM']

	# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
	#Create dataframe to store our Christmas songs:
	df1 = pd.DataFrame()

	### Define plugin vars
	pno = PLUGIN_ITEM_IDENT

	#Retrieve songs from playlist
	r = session.request('GET', request_url7 % (PLUGIN_SPOTIFY_USER, PLUGIN_SPOTIFY_ITEM), headers=headers)

	#Create track id url list
	track_ids = ''

	#Retrieve name, ID, artist
	for track in r.json()['items']:

	name = track['track']['name']
	id = track['track']['id']
	track_ids = track_ids+str(id)+','
	artist = track['track']['album']['artists'][0]['name']
	artist_id = track['track']['album']['artists'][0]['id']

	#Retrieve an artist's related artists
	r = session.request('GET', request_url3 % (artist_id), headers=headers)
	if r.json()['artists'] != []:
	related_artist = r.json()['artists'][0]['name']
	else:
	related_artist = ''

	app = pd.DataFrame({'song_col': [name], 'artist_col': [artist], 'id_col': [id],
	'related_artist': [related_artist], 'myid': [pno]})
	df1 = df1.append(app)


	df2 = pd.DataFrame()

	#Get audio analysis
	r = session.request('GET', request_url5 % (track_ids[:-1]), headers=headers).json()['audio_features']

	for song in r:

	danceability = song['danceability']
	energy = song['energy']
	key = song['key']
	loudness = song['loudness']
	mode = song['mode']
	speechiness = song['speechiness']
	acousticness = song['acousticness']
	instrumentalness = song['instrumentalness']
	liveness = song['liveness']
	valence = song['valence']
	tempo = song['tempo']
	time_signature = song['time_signature']
	duration_ms = song['duration_ms']

	#Append to dataframe
	app = pd.DataFrame({'danceability': [danceability],
	'energy': [energy], 'key': [key], 'loudness': [loudness], 'mode': [mode],
	'speechiness': [speechiness], 'acousticness': [acousticness],
	'instrumentalness': [instrumentalness], 'liveness': [liveness], 'valence': [valence],
	'tempo': [tempo], 'time_signature': [time_signature], 'duration_ms': [duration_ms]})
	df2 = df2.append(app)

	df = pd.concat([df1, df2], axis=1)

	# Write the output to the output dataset
	main_output_name = get_output_names_for_role('main_output')[0]
	output_dataset = dataiku.Dataset(main_output_name)
	output_dataset.write_with_schema(df)