Skip to content

Instantly share code, notes, and snippets.

@AmolMavuduru
Last active January 21, 2021 22:45
Show Gist options
  • Save AmolMavuduru/9eb1b185b70a0d7432a761e57a60cf28 to your computer and use it in GitHub Desktop.
Save AmolMavuduru/9eb1b185b70a0d7432a761e57a60cf28 to your computer and use it in GitHub Desktop.
Functions for generating song recommendations using Spotify data. Sample code for my Medium article: "How to build an amazing music recommendation algorithm."
from collections import defaultdict
from scipy.spatial.distance import cdist
import difflib
number_cols = ['valence', 'year', 'acousticness', 'danceability', 'duration_ms', 'energy', 'explicit',
'instrumentalness', 'key', 'liveness', 'loudness', 'mode', 'popularity', 'speechiness', 'tempo']
def get_song_data(song, spotify_data):
"""
Gets the song data for a specific song. The song argument takes the form of a dictionary with
key-value pairs for the name and release year of the song.
"""
try:
song_data = spotify_data[(spotify_data['name'] == song['name'])
& (spotify_data['year'] == song['year'])].iloc[0]
return song_data
except IndexError:
return find_song(song['name'], song['year'])
def get_mean_vector(song_list, spotify_data):
"""
Gets the mean vector for a list of songs.
"""
song_vectors = []
for song in song_list:
song_data = get_song_data(song, spotify_data)
if song_data is None:
print('Warning: {} does not exist in Spotify or in database'.format(song['name']))
continue
song_vector = song_data[number_cols].values
song_vectors.append(song_vector)
song_matrix = np.array(list(song_vectors))
return np.mean(song_matrix, axis=0)
def flatten_dict_list(dict_list):
"""
Utility function for flattening a list of dictionaries.
"""
flattened_dict = defaultdict()
for key in dict_list[0].keys():
flattened_dict[key] = []
for dictionary in dict_list:
for key, value in dictionary.items():
flattened_dict[key].append(value)
return flattened_dict
def recommend_songs(song_list, spotify_data, n_songs=10):
"""
Recommends songs based on a list of previous songs that a user has listened to.
"""
metadata_cols = ['name', 'year', 'artists']
song_dict = flatten_dict_list(song_list)
song_center = get_mean_vector(song_list, spotify_data)
scaler = song_cluster_pipeline.steps[0][1]
scaled_data = scaler.transform(spotify_data[number_cols])
scaled_song_center = scaler.transform(song_center.reshape(1, -1))
distances = cdist(scaled_song_center, scaled_data, 'cosine')
index = list(np.argsort(distances)[:, :n_songs][0])
rec_songs = spotify_data.iloc[index]
rec_songs = rec_songs[~rec_songs['name'].isin(song_dict['name'])]
return rec_songs[metadata_cols].to_dict(orient='records')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment