Skip to content

Instantly share code, notes, and snippets.

@silviutofan92
Created December 18, 2017 10:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save silviutofan92/70acd7bdb80acff82213de480f10e010 to your computer and use it in GitHub Desktop.
Save silviutofan92/70acd7bdb80acff82213de480f10e010 to your computer and use it in GitHub Desktop.
DSS Spotify Plugin
# Code for custom code recipe recipe from notebook Spotify Plugin (imported from a Python recipe)
# To finish creating your custom recipe from your original PySpark recipe, you need to:
# - Declare the input and output roles in recipe.json
# - Replace the dataset names by roles access in your code
# - Declare, if any, the params of your custom recipe in recipe.json
# - Replace the hardcoded params values by acccess to the configuration map
# See sample code below for how to do that.
# The code of your original recipe is included afterwards for convenience.
# Please also see the "recipe.json" file for more information.
# Inputs and outputs are defined by roles. In the recipe's I/O tab, the user can associate one
# or more dataset to each input and output role.
# Roles need to be defined in recipe.json, in the inputRoles and outputRoles fields.
# For outputs, the process is the same:
#output_A_names = get_output_names_for_role('main_output')
#output_A_datasets = [dataiku.Dataset(name) for name in output_A_names]
# The configuration consists of the parameters set up by the user in the recipe Settings tab.
# Parameters must be added to the recipe.json file so that DSS can prompt the user for values in
# the Settings tab of the recipe. The field "params" holds a list of all the params for wich the
# user will be prompted for values.
# The configuration is simply a map of parameters, and retrieving the value of one of them is simply:
#my_variable = get_recipe_config()['parameter_name']
# For optional parameters, you should provide a default value in case the parameter is not present:
#my_variable = get_recipe_config().get('parameter_name', None)
# Note about typing:
# The configuration of the recipe is passed through a JSON object
# As such, INT parameters of the recipe are received in the get_recipe_config() dict as a Python float.
# If you absolutely require a Python int, use int(get_recipe_config()["my_int_param"])
#############################
# Your original recipe
#############################
# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
import dataiku
from dataiku.customrecipe import *
from dataiku import pandasutils as pdu
import pandas as pd
import requests
import base64
import six
# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
#Define API keys
client_id = get_recipe_config()['PLUGIN_CLIENT_ID']
client_secret = get_recipe_config()['PLUGIN_CLIENT_SECRET']
authorization_base_url = 'https://accounts.spotify.com/authorize'
token_url = 'https://accounts.spotify.com/api/token'
# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
#Create requests session
session = requests.Session()
# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
#Define function to create headers
def _make_authorization_headers(client_id, client_secret):
auth_header = base64.b64encode(six.text_type(client_id + ':' + client_secret).encode('ascii'))
return {'Authorization': 'Basic %s' % auth_header.decode('ascii')}
# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
#Get API token
payload = { 'grant_type': 'client_credentials'}
headers = _make_authorization_headers(client_id, client_secret)
response = requests.post(token_url, data=payload, headers=headers, verify=True)
token_info = response.json()
# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
#Create request headers
headers = {'Authorization': 'Bearer {0}'.format(token_info['access_token'])}
# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
#Define requests
id = ''
ids = ''
#Get list of categories
request_url1 = 'https://api.spotify.com/v1/browse/categories' #id = 'holidays'
#Get playlists for that category
request_url2 = 'https://api.spotify.com/v1/browse/categories/holidays/playlists'
#Get an artist's related artists
request_url3 = 'https://api.spotify.com/v1/artists/%s/related-artists'
#Get audio analysis for 1 track
request_url4 = 'https://api.spotify.com/v1/audio-analysis/%s'
#Get audio features for multiple tracks
request_url5 = 'https://api.spotify.com/v1/audio-features/?ids=%s'
#Get track info
request_url6 = 'https://api.spotify.com/v1/tracks/%s'
#Get all tracks in a playlist
request_url7 = 'https://api.spotify.com/v1/users/%s/playlists/%s/tracks'
# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
PLUGIN_ITEM_IDENT = get_recipe_config()['PLUGIN_ITEM_IDENT']
PLUGIN_SPOTIFY_USER = get_recipe_config()['PLUGIN_SPOTIFY_USER']
PLUGIN_SPOTIFY_ITEM = get_recipe_config()['PLUGIN_SPOTIFY_ITEM']
# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
#Create dataframe to store our Christmas songs:
df1 = pd.DataFrame()
### Define plugin vars
pno = PLUGIN_ITEM_IDENT
#Retrieve songs from playlist
r = session.request('GET', request_url7 % (PLUGIN_SPOTIFY_USER, PLUGIN_SPOTIFY_ITEM), headers=headers)
#Create track id url list
track_ids = ''
#Retrieve name, ID, artist
for track in r.json()['items']:
name = track['track']['name']
id = track['track']['id']
track_ids = track_ids+str(id)+','
artist = track['track']['album']['artists'][0]['name']
artist_id = track['track']['album']['artists'][0]['id']
#Retrieve an artist's related artists
r = session.request('GET', request_url3 % (artist_id), headers=headers)
if r.json()['artists'] != []:
related_artist = r.json()['artists'][0]['name']
else:
related_artist = ''
app = pd.DataFrame({'song_col': [name], 'artist_col': [artist], 'id_col': [id],
'related_artist': [related_artist], 'myid': [pno]})
df1 = df1.append(app)
df2 = pd.DataFrame()
#Get audio analysis
r = session.request('GET', request_url5 % (track_ids[:-1]), headers=headers).json()['audio_features']
for song in r:
danceability = song['danceability']
energy = song['energy']
key = song['key']
loudness = song['loudness']
mode = song['mode']
speechiness = song['speechiness']
acousticness = song['acousticness']
instrumentalness = song['instrumentalness']
liveness = song['liveness']
valence = song['valence']
tempo = song['tempo']
time_signature = song['time_signature']
duration_ms = song['duration_ms']
#Append to dataframe
app = pd.DataFrame({'danceability': [danceability],
'energy': [energy], 'key': [key], 'loudness': [loudness], 'mode': [mode],
'speechiness': [speechiness], 'acousticness': [acousticness],
'instrumentalness': [instrumentalness], 'liveness': [liveness], 'valence': [valence],
'tempo': [tempo], 'time_signature': [time_signature], 'duration_ms': [duration_ms]})
df2 = df2.append(app)
df = pd.concat([df1, df2], axis=1)
# Write the output to the output dataset
main_output_name = get_output_names_for_role('main_output')[0]
output_dataset = dataiku.Dataset(main_output_name)
output_dataset.write_with_schema(df)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment