Created
December 18, 2017 10:03
-
-
Save silviutofan92/70acd7bdb80acff82213de480f10e010 to your computer and use it in GitHub Desktop.
DSS Spotify Plugin
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Code for custom code recipe recipe from notebook Spotify Plugin (imported from a Python recipe) | |
# To finish creating your custom recipe from your original PySpark recipe, you need to: | |
# - Declare the input and output roles in recipe.json | |
# - Replace the dataset names by roles access in your code | |
# - Declare, if any, the params of your custom recipe in recipe.json | |
# - Replace the hardcoded params values by acccess to the configuration map | |
# See sample code below for how to do that. | |
# The code of your original recipe is included afterwards for convenience. | |
# Please also see the "recipe.json" file for more information. | |
# Inputs and outputs are defined by roles. In the recipe's I/O tab, the user can associate one | |
# or more dataset to each input and output role. | |
# Roles need to be defined in recipe.json, in the inputRoles and outputRoles fields. | |
# For outputs, the process is the same: | |
#output_A_names = get_output_names_for_role('main_output') | |
#output_A_datasets = [dataiku.Dataset(name) for name in output_A_names] | |
# The configuration consists of the parameters set up by the user in the recipe Settings tab. | |
# Parameters must be added to the recipe.json file so that DSS can prompt the user for values in | |
# the Settings tab of the recipe. The field "params" holds a list of all the params for wich the | |
# user will be prompted for values. | |
# The configuration is simply a map of parameters, and retrieving the value of one of them is simply: | |
#my_variable = get_recipe_config()['parameter_name'] | |
# For optional parameters, you should provide a default value in case the parameter is not present: | |
#my_variable = get_recipe_config().get('parameter_name', None) | |
# Note about typing: | |
# The configuration of the recipe is passed through a JSON object | |
# As such, INT parameters of the recipe are received in the get_recipe_config() dict as a Python float. | |
# If you absolutely require a Python int, use int(get_recipe_config()["my_int_param"]) | |
############################# | |
# Your original recipe | |
############################# | |
# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE | |
import dataiku | |
from dataiku.customrecipe import * | |
from dataiku import pandasutils as pdu | |
import pandas as pd | |
import requests | |
import base64 | |
import six | |
# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE | |
#Define API keys | |
client_id = get_recipe_config()['PLUGIN_CLIENT_ID'] | |
client_secret = get_recipe_config()['PLUGIN_CLIENT_SECRET'] | |
authorization_base_url = 'https://accounts.spotify.com/authorize' | |
token_url = 'https://accounts.spotify.com/api/token' | |
# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE | |
#Create requests session | |
session = requests.Session() | |
# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE | |
#Define function to create headers | |
def _make_authorization_headers(client_id, client_secret): | |
auth_header = base64.b64encode(six.text_type(client_id + ':' + client_secret).encode('ascii')) | |
return {'Authorization': 'Basic %s' % auth_header.decode('ascii')} | |
# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE | |
#Get API token | |
payload = { 'grant_type': 'client_credentials'} | |
headers = _make_authorization_headers(client_id, client_secret) | |
response = requests.post(token_url, data=payload, headers=headers, verify=True) | |
token_info = response.json() | |
# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE | |
#Create request headers | |
headers = {'Authorization': 'Bearer {0}'.format(token_info['access_token'])} | |
# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE | |
#Define requests | |
id = '' | |
ids = '' | |
#Get list of categories | |
request_url1 = 'https://api.spotify.com/v1/browse/categories' #id = 'holidays' | |
#Get playlists for that category | |
request_url2 = 'https://api.spotify.com/v1/browse/categories/holidays/playlists' | |
#Get an artist's related artists | |
request_url3 = 'https://api.spotify.com/v1/artists/%s/related-artists' | |
#Get audio analysis for 1 track | |
request_url4 = 'https://api.spotify.com/v1/audio-analysis/%s' | |
#Get audio features for multiple tracks | |
request_url5 = 'https://api.spotify.com/v1/audio-features/?ids=%s' | |
#Get track info | |
request_url6 = 'https://api.spotify.com/v1/tracks/%s' | |
#Get all tracks in a playlist | |
request_url7 = 'https://api.spotify.com/v1/users/%s/playlists/%s/tracks' | |
# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE | |
PLUGIN_ITEM_IDENT = get_recipe_config()['PLUGIN_ITEM_IDENT'] | |
PLUGIN_SPOTIFY_USER = get_recipe_config()['PLUGIN_SPOTIFY_USER'] | |
PLUGIN_SPOTIFY_ITEM = get_recipe_config()['PLUGIN_SPOTIFY_ITEM'] | |
# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE | |
#Create dataframe to store our Christmas songs: | |
df1 = pd.DataFrame() | |
### Define plugin vars | |
pno = PLUGIN_ITEM_IDENT | |
#Retrieve songs from playlist | |
r = session.request('GET', request_url7 % (PLUGIN_SPOTIFY_USER, PLUGIN_SPOTIFY_ITEM), headers=headers) | |
#Create track id url list | |
track_ids = '' | |
#Retrieve name, ID, artist | |
for track in r.json()['items']: | |
name = track['track']['name'] | |
id = track['track']['id'] | |
track_ids = track_ids+str(id)+',' | |
artist = track['track']['album']['artists'][0]['name'] | |
artist_id = track['track']['album']['artists'][0]['id'] | |
#Retrieve an artist's related artists | |
r = session.request('GET', request_url3 % (artist_id), headers=headers) | |
if r.json()['artists'] != []: | |
related_artist = r.json()['artists'][0]['name'] | |
else: | |
related_artist = '' | |
app = pd.DataFrame({'song_col': [name], 'artist_col': [artist], 'id_col': [id], | |
'related_artist': [related_artist], 'myid': [pno]}) | |
df1 = df1.append(app) | |
df2 = pd.DataFrame() | |
#Get audio analysis | |
r = session.request('GET', request_url5 % (track_ids[:-1]), headers=headers).json()['audio_features'] | |
for song in r: | |
danceability = song['danceability'] | |
energy = song['energy'] | |
key = song['key'] | |
loudness = song['loudness'] | |
mode = song['mode'] | |
speechiness = song['speechiness'] | |
acousticness = song['acousticness'] | |
instrumentalness = song['instrumentalness'] | |
liveness = song['liveness'] | |
valence = song['valence'] | |
tempo = song['tempo'] | |
time_signature = song['time_signature'] | |
duration_ms = song['duration_ms'] | |
#Append to dataframe | |
app = pd.DataFrame({'danceability': [danceability], | |
'energy': [energy], 'key': [key], 'loudness': [loudness], 'mode': [mode], | |
'speechiness': [speechiness], 'acousticness': [acousticness], | |
'instrumentalness': [instrumentalness], 'liveness': [liveness], 'valence': [valence], | |
'tempo': [tempo], 'time_signature': [time_signature], 'duration_ms': [duration_ms]}) | |
df2 = df2.append(app) | |
df = pd.concat([df1, df2], axis=1) | |
# Write the output to the output dataset | |
main_output_name = get_output_names_for_role('main_output')[0] | |
output_dataset = dataiku.Dataset(main_output_name) | |
output_dataset.write_with_schema(df) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment