Skip to content

Instantly share code, notes, and snippets.

@victorkohler
Created March 14, 2019 21:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save victorkohler/5060654ed76536e3662ae1304b02c48f to your computer and use it in GitHub Desktop.
Save victorkohler/5060654ed76536e3662ae1304b02c48f to your computer and use it in GitHub Desktop.
#-----------------------
# FIND SIMILAR ARTISTS
#-----------------------
def find_similar_artists(artist=None, num_items=10):
"""Find artists similar to an artist.
Args:
artist (str): The name of the artist we want to find similar artists for
num_items (int): How many similar artists we want to return.
Returns:
similar (pandas.DataFrame): DataFrame with num_items artist names and scores
"""
# Grab our User matrix U
user_vecs = get_variable(graph, session, 'user_factors')
# Grab our Item matrix V
item_vecs = get_variable(graph, session, 'item_factors')
# Grab our item bias
item_bi = get_variable(graph, session, 'item_bias').reshape(-1)
# Get the item id for Lady GaGa
item_id = int(item_lookup[item_lookup.artist == artist]['artist_id'])
# Get the item vector for our item_id and transpose it.
item_vec = item_vecs[item_id].T
# Calculate the similarity between Lady GaGa and all other artists
# by multiplying the item vector with our item_matrix
scores = np.add(item_vecs.dot(item_vec), item_bi).reshape(1,-1)[0]
# Get the indices for the top 10 scores
top_10 = np.argsort(scores)[::-1][:num_items]
# We then use our lookup table to grab the names of these indices
# and add it along with its score to a pandas dataframe.
artists, artist_scores = [], []
for idx in top_10:
artists.append(item_lookup.artist.loc[item_lookup.artist_id == str(idx)].iloc[0])
artist_scores.append(scores[idx])
similar = pd.DataFrame({'artist': artists, 'score': artist_scores})
return similar
print(find_similar_artists(artist='beyoncé'))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment