Skip to content

Instantly share code, notes, and snippets.

@victorkohler
Last active August 1, 2017 09:35
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save victorkohler/afeafac11e82ec24c3f855c182148d2b to your computer and use it in GitHub Desktop.
Save victorkohler/afeafac11e82ec24c3f855c182148d2b to your computer and use it in GitHub Desktop.
#------------------------
# USER-ITEM CALCULATIONS
#------------------------
# Construct a new dataframe with the 10 closest neighbours (most similar)
# for each artist.
data_neighbours = pd.DataFrame(index=data_matrix.columns, columns=range(1,11))
for i in xrange(0, len(data_matrix.columns)):
data_neighbours.ix[i,:10] = data_matrix.ix[0:,i].sort_values(ascending=False)[:10].index
user = 5985
user_index = data[data.user == user].index.tolist()[0]
# Get the artists the user has played.
known_user_likes = data_items.ix[user_index]
known_user_likes = known_user_likes[known_user_likes >0].index.values
# Construct the neighbourhood from the most similar items to the
# ones our user has already liked.
most_similar_to_likes = data_neighbours.ix[known_user_likes]
similar_list = most_similar_to_likes.values.tolist()
similar_list = list(set([item for sublist in similar_list for item in sublist]))
neighbourhood = data_matrix[similar_list].ix[similar_list]
# A user vector containing only the neighbourhood items and
# the known user likes.
user_vector = data_items.ix[user_index].ix[similar_list]
# Calculate the score.
score = neighbourhood.dot(user_vector).div(neighbourhood.sum(axis=1))
# Drop the known likes.
score = score.drop(known_user_likes)
print known_user_likes
print score.nlargest(20)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment