Last active
August 1, 2017 09:35
-
-
Save victorkohler/afeafac11e82ec24c3f855c182148d2b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#------------------------ | |
# USER-ITEM CALCULATIONS | |
#------------------------ | |
# Construct a new dataframe with the 10 closest neighbours (most similar) | |
# for each artist. | |
data_neighbours = pd.DataFrame(index=data_matrix.columns, columns=range(1,11)) | |
for i in xrange(0, len(data_matrix.columns)): | |
data_neighbours.ix[i,:10] = data_matrix.ix[0:,i].sort_values(ascending=False)[:10].index | |
user = 5985 | |
user_index = data[data.user == user].index.tolist()[0] | |
# Get the artists the user has played. | |
known_user_likes = data_items.ix[user_index] | |
known_user_likes = known_user_likes[known_user_likes >0].index.values | |
# Construct the neighbourhood from the most similar items to the | |
# ones our user has already liked. | |
most_similar_to_likes = data_neighbours.ix[known_user_likes] | |
similar_list = most_similar_to_likes.values.tolist() | |
similar_list = list(set([item for sublist in similar_list for item in sublist])) | |
neighbourhood = data_matrix[similar_list].ix[similar_list] | |
# A user vector containing only the neighbourhood items and | |
# the known user likes. | |
user_vector = data_items.ix[user_index].ix[similar_list] | |
# Calculate the score. | |
score = neighbourhood.dot(user_vector).div(neighbourhood.sum(axis=1)) | |
# Drop the known likes. | |
score = score.drop(known_user_likes) | |
print known_user_likes | |
print score.nlargest(20) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment