This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
from sklearn.metrics.pairwise import cosine_similarity | |
from scipy import sparse | |
#------------------ | |
# LOAD THE DATASET | |
#------------------ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#------------------------ | |
# ITEM-ITEM CALCULATIONS | |
#------------------------ | |
# As a first step we normalize the user vectors to unit vectors. | |
# magnitude = sqrt(x2 + y2 + z2 + ...) | |
magnitude = np.sqrt(np.square(data_items).sum(axis=1)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#------------------------ | |
# USER-ITEM CALCULATIONS | |
#------------------------ | |
user = 5985 # The id of the user for whom we want to generate recommendations | |
user_index = data[data.user == user].index.tolist()[0] # Get the frame index | |
# Get the artists the user has likd. | |
known_user_likes = data_items.ix[user_index] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#------------------------ | |
# USER-ITEM CALCULATIONS | |
#------------------------ | |
# Construct a new dataframe with the 10 closest neighbours (most similar) | |
# for each artist. | |
data_neighbours = pd.DataFrame(index=data_matrix.columns, columns=range(1,11)) | |
for i in xrange(0, len(data_matrix.columns)): | |
data_neighbours.ix[i,:10] = data_matrix.ix[0:,i].sort_values(ascending=False)[:10].index |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
from sklearn.metrics.pairwise import cosine_similarity | |
from scipy import sparse | |
#------------------ | |
# LOAD THE DATASET | |
#------------------ |
We can't make this file beautiful and searchable because it's too large.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
user,a perfect circle,abba,ac/dc,adam green,aerosmith,afi,air,alanis morissette,alexisonfire,alicia keys,all that remains,amon amarth,amy macdonald,amy winehouse,anti-flag,aphex twin,apocalyptica,arcade fire,arch enemy,arctic monkeys,as i lay dying,atb,atreyu,audioslave,avril lavigne,babyshambles,bad religion,beastie boys,beatsteaks,beck,beirut,belle and sebastian,beyonce,billy talent,bjork,black eyed peas,black sabbath,blind guardian,blink-182,bloc party,bloodhound gang,blur,boards of canada,bob dylan,bob marley,bob marley & the wailers,breaking benjamin,bright eyes,bring me the horizon,britney spears,bruce springsteen,bullet for my valentine,caliban,cascada,cat power,children of bodom,chimaira,christina aguilera,clueso,cocorosie,coldplay,crystal castles,cypress hill,daft punk,damien rice,dark tranquillity,david bowie,david guetta,death cab for cutie,deep purple,deftones,deichkind,depeche mode,dido,die apokalyptischen reiter,die toten hosen,digitalism,dimmu borgir,dire straits,disturbed,dream theater,dredg,d |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
import pandas as pd | |
import numpy as np | |
import scipy.sparse as sparse | |
from scipy.sparse.linalg import spsolve | |
from sklearn.preprocessing import MinMaxScaler | |
#------------------------- | |
# LOAD AND PREP THE DATA |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def implicit_als(sparse_data, alpha_val=40, iterations=10, lambda_val=0.1, features=10): | |
""" Implementation of Alternating Least Squares with implicit data. We iteratively | |
compute the user (x_u) and item (y_i) vectors using the following formulas: | |
x_u = ((Y.T*Y + Y.T*(Cu - I) * Y) + lambda*I)^-1 * (X.T * Cu * p(u)) | |
y_i = ((X.T*X + X.T*(Ci - I) * X) + lambda*I)^-1 * (Y.T * Ci * p(i)) | |
Args: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" Continuation of implicit_als function""" | |
# Start main loop. For each iteration we first compute X and then Y | |
for i in xrange(iterations): | |
print 'iteration %d of %d' % (i+1, iterations) | |
# Precompute Y-transpose-Y and X-transpose-X | |
yTy = Y.T.dot(Y) | |
xTx = X.T.dot(X) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
user_vecs, item_vecs = implicit_als(data_sparse, iterations=20, features=20, alpha_val=40) | |
OlderNewer