Skip to content

Instantly share code, notes, and snippets.

@victorkohler
Last active February 25, 2019 03:45
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save victorkohler/f48ea6512058719ba52053851fedc745 to your computer and use it in GitHub Desktop.
Save victorkohler/f48ea6512058719ba52053851fedc745 to your computer and use it in GitHub Desktop.
import sys
import pandas as pd
import numpy as np
import scipy.sparse as sparse
from scipy.sparse.linalg import spsolve
import random
from sklearn.preprocessing import MinMaxScaler
import implicit
# Load the data like we did before
raw_data = pd.read_table('data/usersha1-artmbid-artname-plays.tsv')
raw_data = raw_data.drop(raw_data.columns[1], axis=1)
raw_data.columns = ['user', 'artist', 'plays']
# Drop NaN columns
data = raw_data.dropna()
data = data.copy()
# Create a numeric user_id and artist_id column
data['user'] = data['user'].astype("category")
data['artist'] = data['artist'].astype("category")
data['user_id'] = data['user'].cat.codes
data['artist_id'] = data['artist'].cat.codes
# The implicit library expects data as a item-user matrix so we
# create two matricies, one for fitting the model (item-user)
# and one for recommendations (user-item)
sparse_item_user = sparse.csr_matrix((data['plays'].astype(float), (data['artist_id'], data['user_id'])))
sparse_user_item = sparse.csr_matrix((data['plays'].astype(float), (data['user_id'], data['artist_id'])))
# Initialize the als model and fit it using the sparse item-user matrix
model = implicit.als.AlternatingLeastSquares(factors=20, regularization=0.1, iterations=20)
# Calculate the confidence by multiplying it by our alpha value.
alpha_val = 15
data_conf = (sparse_item_user * alpha_val).astype('double')
#Fit the model
model.fit(data_conf)
#---------------------
# FIND SIMILAR ITEMS
#---------------------
# Find the 10 most similar to Jay-Z
item_id = 147068 #Jay-Z
n_similar = 10
# Use implicit to get similar items.
similar = model.similar_items(item_id, n_similar)
# Print the names of our most similar artists
for item in similar:
idx, score = item
print data.artist.loc[data.artist_id == idx].iloc[0]
#------------------------------
# CREATE USER RECOMMENDATIONS
#------------------------------
# Create recommendations for user with id 2025
user_id = 2025
# Use the implicit recommender.
recommended = model.recommend(user_id, sparse_user_item)
artists = []
scores = []
# Get artist names from ids
for item in recommended:
idx, score = item
artists.append(data.artist.loc[data.artist_id == idx].iloc[0])
scores.append(score)
# Create a dataframe of artist names and scores
recommendations = pd.DataFrame({'artist': artists, 'score': scores})
print recommendations
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment