Last active
February 25, 2019 03:45
-
-
Save victorkohler/f48ea6512058719ba52053851fedc745 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import pandas as pd | |
import numpy as np | |
import scipy.sparse as sparse | |
from scipy.sparse.linalg import spsolve | |
import random | |
from sklearn.preprocessing import MinMaxScaler | |
import implicit | |
# Load the data like we did before | |
raw_data = pd.read_table('data/usersha1-artmbid-artname-plays.tsv') | |
raw_data = raw_data.drop(raw_data.columns[1], axis=1) | |
raw_data.columns = ['user', 'artist', 'plays'] | |
# Drop NaN columns | |
data = raw_data.dropna() | |
data = data.copy() | |
# Create a numeric user_id and artist_id column | |
data['user'] = data['user'].astype("category") | |
data['artist'] = data['artist'].astype("category") | |
data['user_id'] = data['user'].cat.codes | |
data['artist_id'] = data['artist'].cat.codes | |
# The implicit library expects data as a item-user matrix so we | |
# create two matricies, one for fitting the model (item-user) | |
# and one for recommendations (user-item) | |
sparse_item_user = sparse.csr_matrix((data['plays'].astype(float), (data['artist_id'], data['user_id']))) | |
sparse_user_item = sparse.csr_matrix((data['plays'].astype(float), (data['user_id'], data['artist_id']))) | |
# Initialize the als model and fit it using the sparse item-user matrix | |
model = implicit.als.AlternatingLeastSquares(factors=20, regularization=0.1, iterations=20) | |
# Calculate the confidence by multiplying it by our alpha value. | |
alpha_val = 15 | |
data_conf = (sparse_item_user * alpha_val).astype('double') | |
#Fit the model | |
model.fit(data_conf) | |
#--------------------- | |
# FIND SIMILAR ITEMS | |
#--------------------- | |
# Find the 10 most similar to Jay-Z | |
item_id = 147068 #Jay-Z | |
n_similar = 10 | |
# Use implicit to get similar items. | |
similar = model.similar_items(item_id, n_similar) | |
# Print the names of our most similar artists | |
for item in similar: | |
idx, score = item | |
print data.artist.loc[data.artist_id == idx].iloc[0] | |
#------------------------------ | |
# CREATE USER RECOMMENDATIONS | |
#------------------------------ | |
# Create recommendations for user with id 2025 | |
user_id = 2025 | |
# Use the implicit recommender. | |
recommended = model.recommend(user_id, sparse_user_item) | |
artists = [] | |
scores = [] | |
# Get artist names from ids | |
for item in recommended: | |
idx, score = item | |
artists.append(data.artist.loc[data.artist_id == idx].iloc[0]) | |
scores.append(score) | |
# Create a dataframe of artist names and scores | |
recommendations = pd.DataFrame({'artist': artists, 'score': scores}) | |
print recommendations | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment