Last active
January 21, 2021 22:45
-
-
Save AmolMavuduru/9eb1b185b70a0d7432a761e57a60cf28 to your computer and use it in GitHub Desktop.
Functions for generating song recommendations using Spotify data. Sample code for my Medium article: "How to build an amazing music recommendation algorithm."
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import defaultdict | |
from scipy.spatial.distance import cdist | |
import difflib | |
number_cols = ['valence', 'year', 'acousticness', 'danceability', 'duration_ms', 'energy', 'explicit', | |
'instrumentalness', 'key', 'liveness', 'loudness', 'mode', 'popularity', 'speechiness', 'tempo'] | |
def get_song_data(song, spotify_data): | |
""" | |
Gets the song data for a specific song. The song argument takes the form of a dictionary with | |
key-value pairs for the name and release year of the song. | |
""" | |
try: | |
song_data = spotify_data[(spotify_data['name'] == song['name']) | |
& (spotify_data['year'] == song['year'])].iloc[0] | |
return song_data | |
except IndexError: | |
return find_song(song['name'], song['year']) | |
def get_mean_vector(song_list, spotify_data): | |
""" | |
Gets the mean vector for a list of songs. | |
""" | |
song_vectors = [] | |
for song in song_list: | |
song_data = get_song_data(song, spotify_data) | |
if song_data is None: | |
print('Warning: {} does not exist in Spotify or in database'.format(song['name'])) | |
continue | |
song_vector = song_data[number_cols].values | |
song_vectors.append(song_vector) | |
song_matrix = np.array(list(song_vectors)) | |
return np.mean(song_matrix, axis=0) | |
def flatten_dict_list(dict_list): | |
""" | |
Utility function for flattening a list of dictionaries. | |
""" | |
flattened_dict = defaultdict() | |
for key in dict_list[0].keys(): | |
flattened_dict[key] = [] | |
for dictionary in dict_list: | |
for key, value in dictionary.items(): | |
flattened_dict[key].append(value) | |
return flattened_dict | |
def recommend_songs(song_list, spotify_data, n_songs=10): | |
""" | |
Recommends songs based on a list of previous songs that a user has listened to. | |
""" | |
metadata_cols = ['name', 'year', 'artists'] | |
song_dict = flatten_dict_list(song_list) | |
song_center = get_mean_vector(song_list, spotify_data) | |
scaler = song_cluster_pipeline.steps[0][1] | |
scaled_data = scaler.transform(spotify_data[number_cols]) | |
scaled_song_center = scaler.transform(song_center.reshape(1, -1)) | |
distances = cdist(scaled_song_center, scaled_data, 'cosine') | |
index = list(np.argsort(distances)[:, :n_songs][0]) | |
rec_songs = spotify_data.iloc[index] | |
rec_songs = rec_songs[~rec_songs['name'].isin(song_dict['name'])] | |
return rec_songs[metadata_cols].to_dict(orient='records') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment