Created
January 17, 2012 14:58
-
-
Save archie/1626941 to your computer and use it in GitHub Desktop.
Simple Matrix Factorization
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# | |
# Created by Albert Au Yeung (2010) | |
# Updated by Marcus Ljungblad (2012) | |
# | |
# An implementation of matrix factorization | |
# | |
try: | |
import numpy | |
except: | |
print "This implementation requires the numpy module." | |
exit(0) | |
############################################################################### | |
""" | |
INPUT: | |
MatrixToFactorise : a matrix to be factorized, dimension N x M | |
UsersPreferences : an initial matrix of dimension N x K | |
MoviesFeatures : an initial matrix of dimension M x K | |
NumberOfLatentFeatures : the number of latent features | |
MaxSteps : the maximum number of steps to perform the optimisation | |
LearningRate : the learning rate | |
RegularizationConstant : the regularization parameter | |
OUTPUT: | |
the final matrices P and Q | |
""" | |
def calculate_mean_squared_error_of_estimate(MatrixToFactorise, UsersPreferences, MoviesFeatures, | |
NumberOfLatentFeatures, RegularizationConstant): | |
Error = 0 | |
for user in xrange(len(MatrixToFactorise)): | |
for movie in xrange(len(MatrixToFactorise[user])): | |
if MatrixToFactorise[user][movie] > 0: | |
Error = Error + pow(MatrixToFactorise[user][movie] - | |
numpy.dot(UsersPreferences[user,:], | |
MoviesFeatures[:,movie]), 2) | |
for feature in xrange(NumberOfLatentFeatures): | |
Error = Error + (RegularizationConstant/2) * \ | |
(pow(UsersPreferences[user][feature],2) + | |
pow(MoviesFeatures[feature][movie], 2)) | |
return Error | |
def matrix_factorization(MatrixToFactorise, UsersPreferences, MoviesFeatures, | |
NumberOfLatentFeatures, MaxSteps=5000, | |
LearningRate=0.0002, RegularizationConstant=0.02): | |
MoviesFeatures = MoviesFeatures.T | |
for step in xrange(MaxSteps): | |
for user in xrange(len(MatrixToFactorise)): | |
for movie in xrange(len(MatrixToFactorise[user])): | |
if MatrixToFactorise[user][movie] > 0: | |
estimatedUserMovieFactors = MatrixToFactorise[user][movie] - \ | |
numpy.dot(UsersPreferences[user,:], MoviesFeatures[:,movie]) | |
for feature in xrange(NumberOfLatentFeatures): | |
UsersPreferences[user][feature] = UsersPreferences[user][feature] + \ | |
LearningRate * (2 * estimatedUserMovieFactors * | |
MoviesFeatures[feature][movie] - | |
RegularizationConstant * UsersPreferences[user][feature]) | |
MoviesFeatures[feature][movie] = MoviesFeatures[feature][movie] + \ | |
LearningRate * (2 * estimatedUserMovieFactors * | |
UsersPreferences[user][feature] - | |
RegularizationConstant * MoviesFeatures[feature][movie]) | |
# if approximation is good enough, stop iterating | |
ApproximationError = calculate_mean_squared_error_of_estimate(MatrixToFactorise, | |
UsersPreferences, | |
MoviesFeatures, | |
NumberOfLatentFeatures, | |
RegularizationConstant) | |
if ApproximationError < 0.001: | |
break | |
return UsersPreferences, MoviesFeatures.T | |
def factorize(InputMatrix, NumberOfLatentFeatures): | |
InputMatrix = numpy.array(InputMatrix) | |
HeightOfMatrix = len(InputMatrix) | |
WidthOfMatrix = len(InputMatrix[0]) | |
InitialUserPreferences = numpy.random.rand(HeightOfMatrix, NumberOfLatentFeatures) | |
InitialMovieFeatures = numpy.random.rand(WidthOfMatrix, NumberOfLatentFeatures) | |
PredictedUserPreferences, PredictedMovieFeatures = matrix_factorization(InputMatrix, | |
InitialUserPreferences, | |
InitialMovieFeatures, | |
NumberOfLatentFeatures) | |
PredictedValues = numpy.dot(PredictedUserPreferences, PredictedMovieFeatures.T) | |
return PredictedValues | |
def run_example(): | |
InputMatrix = [ | |
[2,4,4,0,1], | |
[3,5,0,0,1], | |
[0,4,2,1,0], | |
[1,0,1,3,3] | |
] | |
predictedvalues = factorize(InputMatrix, 5) | |
print numpy.around(predictedvalues, decimals=2) | |
if __name__ == "__main__": | |
run_example() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment