Last active
August 22, 2017 17:42
-
-
Save victorkohler/f7c5b7de9e82df1b84cfde005c92fe8f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Let's say we want to recommend artists for user with ID 2023 | |
user_id = 2023 | |
#------------------------------ | |
# GET ITEMS CONSUMED BY USER | |
#------------------------------ | |
# Let's print out what the user has listened to | |
consumed_idx = data_sparse[user_id,:].nonzero()[1].astype(str) | |
consumed_items = item_lookup.loc[item_lookup.artist_id.isin(consumed_idx)] | |
print consumed_items | |
#------------------------------ | |
# CREATE USER RECOMMENDATIONS | |
#------------------------------ | |
def recommend(user_id, data_sparse, user_vecs, item_vecs, item_lookup, num_items=10): | |
"""Recommend items for a given user given a trained model | |
Args: | |
user_id (int): The id of the user we want to create recommendations for. | |
data_sparse (csr_matrix): Our original training data. | |
user_vecs (csr_matrix): The trained user x features vectors | |
item_vecs (csr_matrix): The trained item x features vectors | |
item_lookup (pandas.DataFrame): Used to map artist ids to artist names | |
num_items (int): How many recommendations we want to return: | |
Returns: | |
recommendations (pandas.DataFrame): DataFrame with num_items artist names and scores | |
""" | |
# Get all interactions by the user | |
user_interactions = data_sparse[user_id,:].toarray() | |
# We don't want to recommend items the user has consumed. So let's | |
# set them all to 0 and the unknowns to 1. | |
user_interactions = user_interactions.reshape(-1) + 1 #Reshape to turn into 1D array | |
user_interactions[user_interactions > 1] = 0 | |
# This is where we calculate the recommendation by taking the | |
# dot-product of the user vectors with the item vectors. | |
rec_vector = user_vecs[user_id,:].dot(item_vecs.T).toarray() | |
# Let's scale our scores between 0 and 1 to make it all easier to interpret. | |
min_max = MinMaxScaler() | |
rec_vector_scaled = min_max.fit_transform(rec_vector.reshape(-1,1))[:,0] | |
recommend_vector = user_interactions*rec_vector_scaled | |
# Get all the artist indices in order of recommendations (descending) and | |
# select only the top "num_items" items. | |
item_idx = np.argsort(recommend_vector)[::-1][:num_items] | |
artists = [] | |
scores = [] | |
# Loop through our recommended artist indicies and look up the actial artist name | |
for idx in item_idx: | |
artists.append(item_lookup.artist.loc[item_lookup.artist_id == str(idx)].iloc[0]) | |
scores.append(recommend_vector[idx]) | |
# Create a new dataframe with recommended artist names and scores | |
recommendations = pd.DataFrame({'artist': artists, 'score': scores}) | |
return recommendations | |
# Let's generate and print our recommendations | |
recommendations = recommend(user_id, data_sparse, user_vecs, item_vecs, item_lookup) | |
print recommendations |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment