Skip to content

Instantly share code, notes, and snippets.

@archie
Created January 17, 2012 14:58
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save archie/1626941 to your computer and use it in GitHub Desktop.
Save archie/1626941 to your computer and use it in GitHub Desktop.
Simple Matrix Factorization
#!/usr/bin/python
#
# Created by Albert Au Yeung (2010)
# Updated by Marcus Ljungblad (2012)
#
# An implementation of matrix factorization
#
try:
import numpy
except:
print "This implementation requires the numpy module."
exit(0)
###############################################################################
"""
INPUT:
MatrixToFactorise : a matrix to be factorized, dimension N x M
UsersPreferences : an initial matrix of dimension N x K
MoviesFeatures : an initial matrix of dimension M x K
NumberOfLatentFeatures : the number of latent features
MaxSteps : the maximum number of steps to perform the optimisation
LearningRate : the learning rate
RegularizationConstant : the regularization parameter
OUTPUT:
the final matrices P and Q
"""
def calculate_mean_squared_error_of_estimate(MatrixToFactorise, UsersPreferences, MoviesFeatures,
NumberOfLatentFeatures, RegularizationConstant):
Error = 0
for user in xrange(len(MatrixToFactorise)):
for movie in xrange(len(MatrixToFactorise[user])):
if MatrixToFactorise[user][movie] > 0:
Error = Error + pow(MatrixToFactorise[user][movie] -
numpy.dot(UsersPreferences[user,:],
MoviesFeatures[:,movie]), 2)
for feature in xrange(NumberOfLatentFeatures):
Error = Error + (RegularizationConstant/2) * \
(pow(UsersPreferences[user][feature],2) +
pow(MoviesFeatures[feature][movie], 2))
return Error
def matrix_factorization(MatrixToFactorise, UsersPreferences, MoviesFeatures,
NumberOfLatentFeatures, MaxSteps=5000,
LearningRate=0.0002, RegularizationConstant=0.02):
MoviesFeatures = MoviesFeatures.T
for step in xrange(MaxSteps):
for user in xrange(len(MatrixToFactorise)):
for movie in xrange(len(MatrixToFactorise[user])):
if MatrixToFactorise[user][movie] > 0:
estimatedUserMovieFactors = MatrixToFactorise[user][movie] - \
numpy.dot(UsersPreferences[user,:], MoviesFeatures[:,movie])
for feature in xrange(NumberOfLatentFeatures):
UsersPreferences[user][feature] = UsersPreferences[user][feature] + \
LearningRate * (2 * estimatedUserMovieFactors *
MoviesFeatures[feature][movie] -
RegularizationConstant * UsersPreferences[user][feature])
MoviesFeatures[feature][movie] = MoviesFeatures[feature][movie] + \
LearningRate * (2 * estimatedUserMovieFactors *
UsersPreferences[user][feature] -
RegularizationConstant * MoviesFeatures[feature][movie])
# if approximation is good enough, stop iterating
ApproximationError = calculate_mean_squared_error_of_estimate(MatrixToFactorise,
UsersPreferences,
MoviesFeatures,
NumberOfLatentFeatures,
RegularizationConstant)
if ApproximationError < 0.001:
break
return UsersPreferences, MoviesFeatures.T
def factorize(InputMatrix, NumberOfLatentFeatures):
InputMatrix = numpy.array(InputMatrix)
HeightOfMatrix = len(InputMatrix)
WidthOfMatrix = len(InputMatrix[0])
InitialUserPreferences = numpy.random.rand(HeightOfMatrix, NumberOfLatentFeatures)
InitialMovieFeatures = numpy.random.rand(WidthOfMatrix, NumberOfLatentFeatures)
PredictedUserPreferences, PredictedMovieFeatures = matrix_factorization(InputMatrix,
InitialUserPreferences,
InitialMovieFeatures,
NumberOfLatentFeatures)
PredictedValues = numpy.dot(PredictedUserPreferences, PredictedMovieFeatures.T)
return PredictedValues
def run_example():
InputMatrix = [
[2,4,4,0,1],
[3,5,0,0,1],
[0,4,2,1,0],
[1,0,1,3,3]
]
predictedvalues = factorize(InputMatrix, 5)
print numpy.around(predictedvalues, decimals=2)
if __name__ == "__main__":
run_example()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment