tomokishii/README.md

## README.md

      
    Raw
  

              README.md
            
          
    Collaborative Filtering Tutorial Codes

The original codes comes from "Coursera Machine Learning" by prof. Andrew Ng, the program assignment of week 9.

I implemented this by Python,

1.Numpy + Scipy.Optimize

2.TensorFlow

ex8_cofi.py: Numpy + Scipy.Optimize code
check_costfun.py: support functions for ex8_cofi.py
cofi_tf.py: TensorFlow code


## check_costfun.py
#
#   check_costfun.py
#       date. 11/8/2016
#

import numpy as np

## =============== cofiCostFunc.m part ==================================
def cofi_costfunc(Y, R, shapes, lambda_c, params=None):
    #  [J, grad] = COFICOSTFUNC(params, Y, R, num_users, num_movies, ...
    #   num_features, lambda) returns the cost and gradient for the
    #   collaborative filtering problem.
    # Unfold the U and W matrices from params
    num_users = shapes[0]
    num_movies = shapes[1]
    num_features = shapes[2]
    boundary = num_movies * num_features
    params_c1 = np.copy(params[:boundary])
    params_c2 = np.copy(params[boundary:])
    X = params_c1.reshape((num_movies, num_features))
    Theta = params_c2.reshape((num_users, num_features))

    # You need to return the following values correctly
    Jcost = 0.5 * (((np.dot(X, Theta.T) - Y) * R) ** 2).sum() \
        + 0.5 * lambda_c * ((X.ravel() **2).sum() + (Theta.ravel() **2).sum())

    return Jcost

def cofi_costfunc_grad(Y, R, shapes, lambda_c, params=None):
    #  [J, grad] = COFICOSTFUNC(params, Y, R, num_users, num_movies, ...
    #   num_features, lambda) returns the cost and gradient for the
    #   collaborative filtering problem.
    # Unfold the U and W matrices from params
    num_users = shapes[0]
    num_movies = shapes[1]
    num_features = shapes[2]
    boundary = num_movies * num_features
    params_c1 = np.copy(params[:boundary])
    params_c2 = np.copy(params[boundary:])
    X = params_c1.reshape((num_movies, num_features))
    Theta = params_c2.reshape((num_users, num_features))

    # You need to return the following values correctly
    X_grad = np.dot(((np.dot(X, Theta.T) - Y) * R), Theta) + lambda_c * X
    Tgsub = (np.dot(X, Theta.T) - Y) * R
    Theta_grad = np.dot(Tgsub.T, X) + lambda_c * Theta
    Jgrad = np.concatenate((X_grad.ravel(), Theta_grad.ravel()))

    return Jgrad

def compute_numerical_grad(cost_fn, Y, R, shapes, lambda_c, theta):
    # COMPUTENUMERICALGRADIENT Computes the gradient using "finite differences"
    # and gives us a numerical estimate of the gradient.#
#   check_costfun.py
#       date. 11/8/2016
#

import numpy as np

## =============== cofiCostFunc.m part ==================================
def cofi_costfunc(Y, R, shapes, lambda_c, params=None):
    #  [J, grad] = COFICOSTFUNC(params, Y, R, num_users, num_movies, ...
    #   num_features, lambda) returns the cost and gradient for the
    #   collaborative filtering problem.
    # Unfold the U and W matrices from params
    num_users = shapes[0]
    num_movies = shapes[1]
    num_features = shapes[2]
    boundary = num_movies * num_features
    params_c1 = np.copy(params[:boundary])
    params_c2 = np.copy(params[boundary:])
    X = params_c1.reshape((num_movies, num_features))
    Theta = params_c2.reshape((num_users, num_features))

    # You need to return the following values correctly
    Jcost = 0.5 * (((np.dot(X, Theta.T) - Y) * R) ** 2).sum() \
        + 0.5 * lambda_c * ((X.ravel() **2).sum() + (Theta.ravel() **2).sum())

    return Jcost

def cofi_costfunc_grad(Y, R, shapes, lambda_c, params=None):
    #  [J, grad] = COFICOSTFUNC(params, Y, R, num_users, num_movies, ...
    #   num_features, lambda) returns the cost and gradient for the
    #   collaborative filtering problem.
    # Unfold the U and W matrices from params
    num_users = shapes[0]
    num_movies = shapes[1]
    num_features = shapes[2]
    boundary = num_movies * num_features
    params_c1 = np.copy(params[:boundary])
    params_c2 = np.copy(params[boundary:])
    X = params_c1.reshape((num_movies, num_features))
    Theta = params_c2.reshape((num_users, num_features))

    # You need to return the following values correctly
    X_grad = np.dot(((np.dot(X, Theta.T) - Y) * R), Theta) + lambda_c * X
    Tgsub = (np.dot(X, Theta.T) - Y) * R
    Theta_grad = np.dot(Tgsub.T, X) + lambda_c * Theta
    Jgrad = np.concatenate((X_grad.ravel(), Theta_grad.ravel()))

    return Jgrad

def compute_numerical_grad(cost_fn, Y, R, shapes, lambda_c, theta):
    # COMPUTENUMERICALGRADIENT Computes the gradient using "finite differences"
    # and gives us a numerical estimate of the gradient.
    # numgrad = COMPUTENUMERICALGRADIENT(J, theta) computes the numerical
    # gradient of the function J around theta. Calling y = J(theta) should
    # return the function value at theta.

    # Notes: The following code implements numerical gradient checking, and
    #        returns the numerical gradient.It sets numgrad(i) to (a numerical
    #        approximation of) the partial derivative of J with respect to the
    #        i-th input argument, evaluated at theta. (i.e., numgrad(i) should
    #        be the (approximately) the partial derivative of J with respect
    #       to theta(i).)

    numgrad = np.zeros_like(theta)
    perturb = np.zeros_like(theta)
    eps = 1.e-4

    for p in range(len(theta)):
        # Set perturbation vector
        perturb[p] = eps
        params1 = theta - perturb
        params2 = theta + perturb
        loss1 = cost_fn(Y, R, shapes, lambda_c, params1)
        loss2 = cost_fn(Y, R, shapes, lambda_c, params2)
        # Compute Numerical Gradient
        numgrad[p] = (loss2 - loss1) / (2. * eps)
        perturb[p] = 0.

    return numgrad
#

def check_costfn(lambda_c=0.0):

    # Create small problem
    X_t = np.random.rand(4, 3)
    Theta_t = np.random.rand(5, 3)

    # Zap out most entries
    Y = np.dot(X_t, Theta_t.T)
    Y[np.random.rand(Y.shape[0], Y.shape[1]) > 0.5] = 0.
    R = np.zeros_like(Y)
    R[Y != 0.] = 1.

    # Run Gradient Checking
    X = np.random.randn(4, 3)
    Theta = np.random.randn(5, 3)
    num_users = Y.shape[1]
    num_movies = Y.shape[0]
    num_features = Theta_t.shape[1]
    theta_arg = np.concatenate((X.ravel(), Theta.ravel()))
    shapes = [num_users, num_movies, num_features]

    numgrad = compute_numerical_grad(
                cofi_costfunc, Y, R, shapes, lambda_c, theta_arg
              )

    grad_ret = cofi_costfunc_grad(Y, R, shapes, lambda_c, theta_arg)
    grad = grad_ret.ravel()

    print(' [numgrad : grad] ')
    for i in range(len(numgrad)):
        print(' {0:>10.4f} : {1:>10.4f} '.format(numgrad[i],grad[i]))

    print('The above two columns you get should be very similar.\n', \
         '(Left-Your Numerical Gradient, Right-Analytical Gradient)\n\n')

    diff = np.linalg.norm(numgrad-grad) / np.linalg.norm(numgrad+grad)

    print('If your backpropagation implementation is correct, then \n' \
         'the relative difference will be small (less than 1e-9). \n' \
         '\nRelative Difference: ', diff)
#

    # numgrad = COMPUTENUMERICALGRADIENT(J, theta) computes the numerical
    # gradient of the function J around theta. Calling y = J(theta) should
    # return the function value at theta.

    # Notes: The following code implements numerical gradient checking, and
    #        returns the numerical gradient.It sets numgrad(i) to (a numerical
    #        approximation of) the partial derivative of J with respect to the
    #        i-th input argument, evaluated at theta. (i.e., numgrad(i) should
    #        be the (approximately) the partial derivative of J with respect
    #       to theta(i).)

    numgrad = np.zeros_like(theta)
    perturb = np.zeros_like(theta)
    eps = 1.e-4

    for p in range(len(theta)):
        # Set perturbation vector
        perturb[p] = eps
        params1 = theta - perturb
        params2 = theta + perturb
        loss1 = cost_fn(Y, R, shapes, lambda_c, params1)
        loss2 = cost_fn(Y, R, shapes, lambda_c, params2)
        # Compute Numerical Gradient
        numgrad[p] = (loss2 - loss1) / (2. * eps)
        perturb[p] = 0.

    return numgrad
#

def check_costfn(lambda_c=0.0):

    # Create small problem
    X_t = np.random.rand(4, 3)
    Theta_t = np.random.rand(5, 3)

    # Zap out most entries
    Y = np.dot(X_t, Theta_t.T)
    Y[np.random.rand(Y.shape[0], Y.shape[1]) > 0.5] = 0.
    R = np.zeros_like(Y)
    R[Y != 0.] = 1.

    # Run Gradient Checking
    X = np.random.randn(4, 3)
    Theta = np.random.randn(5, 3)
    num_users = Y.shape[1]
    num_movies = Y.shape[0]
    num_features = Theta_t.shape[1]
    theta_arg = np.concatenate((X.ravel(), Theta.ravel()))
    shapes = [num_users, num_movies, num_features]

    numgrad = compute_numerical_grad(
                cofi_costfunc, Y, R, shapes, lambda_c, theta_arg
              )

    grad_ret = cofi_costfunc_grad(Y, R, shapes, lambda_c, theta_arg)
    grad = grad_ret.ravel()

    print(' [numgrad : grad] ')
    for i in range(len(numgrad)):
        print(' {0:>10.4f} : {1:>10.4f} '.format(numgrad[i],grad[i]))

    print('The above two columns you get should be very similar.\n', \
         '(Left-Your Numerical Gradient, Right-Analytical Gradient)\n\n')

    diff = np.linalg.norm(numgrad-grad) / np.linalg.norm(numgrad+grad)

    print('If your backpropagation implementation is correct, then \n' \
         'the relative difference will be small (less than 1e-9). \n' \
         '\nRelative Difference: ', diff)
#

## cofi_tf.py
#
#   -*- coding: utf-8 -*-

#   cofi_tf.py
#        date. 11/15/2016
#

import os
import numpy as np
import scipy.io as sio
import tensorflow as tf

WARM_START = False

def load_data(fn='./scipyopt/data/ex8_movies.mat'):
    if os.path.exists(fn):
        data = sio.loadmat(fn)
    else:
        print('File not extits.')
        data = None

    return data
#

def load_movielist(fn='./scipyopt/data/movie_ids.txt'):
    mlist = []
    sep = ' '
    with open(fn, 'rt', encoding="utf-8", errors="surrogateescape") as fp:
        while True:
            try:
                line = fp.readline()
                line = line.rstrip('\n')
            except:
                bytes = fp.read().encode('utf-8', 'replace')
                line = bytes.decode('utf-8')
            if not line:
                break

            words = line.split(' ')
            id = int(words[0])
            title = sep.join(words[1:-1])

            mlist.append((id, title))
            lastline = line

    mlist_len = len(mlist)

    print('\n\n {} movies data is loaded.'.format(mlist_len))
    print(' ******** example ********')
    if mlist_len >= 5:
        for i in range(5):
            print('    {0} : {1} '.format(i, mlist[i]))
    print(' *************************')

    return mlist
#


def Ypred(X, Theta):
    '''
      calculate rating with parameter X and Theta
      args.:
        X:      movie contents parameter
        Theta:  user characteristic parameter
    '''
    feat_dim1 = tf.shape(X)[1]
    feat_dim2 = tf.shape(Theta)[1]

    tf.assert_equal(feat_dim1, feat_dim2)
    rating = tf.matmul(X, tf.transpose(Theta))

    return rating
#

if __name__ == '__main__':
    # Data load
    data = load_data()
    Y_np = data['Y']       # rating matrix Y
    R_np = data['R']       # rating matrix R
    num_users = Y_np.shape[1]
    num_movies = Y_np.shape[0]
    num_features = 10

    # Parameter definition
    if WARM_START:
        data_param = load_data(fn='./scipyopt/data/ex8_movieParams.mat')
        X_np = data_param['X'].astype(np.float32)
        Theta_np = data_param['Theta'].astype(np.float32)
        x = tf.Variable(X_np)
        theta = tf.Variable(Theta_np)
    else:   # COLD START
        x = tf.Variable(tf.random_normal([num_movies, num_features],
            mean=0.0, stddev=0.05))
        theta = tf.Variable(tf.random_normal([num_users, num_features],
            mean=0.0, stddev=0.05))

    Y_ph = tf.placeholder(tf.float32, shape=[None, num_users])
    R_ph = tf.placeholder(tf.float32, shape=[None, num_users])

    # Cost Function, etc.
    cost = tf.reduce_sum(((Ypred(x, theta) - Y_ph) * R_ph) ** 2)
    L2_sqr = tf.nn.l2_loss(x) + tf.nn.l2_loss(theta)
    lambda_c = 1.0      # L2 norm coefficient
    loss = cost + lambda_c * L2_sqr

    lr = 1.e-5
    train_step = tf.train.GradientDescentOptimizer(lr).minimize(loss)
    init_op = tf.initialize_all_variables()

    # Train
    with tf.Session() as sess:
        sess.run(init_op)

        for i in range(10001):
            sess.run(train_step, feed_dict={Y_ph: Y_np, R_ph: Y_np})

            if i % 1000 == 0:
                loss_mon = loss.eval({Y_ph: Y_np, R_ph: Y_np})
                print(' step, loss = {:6d}: {:10.1f}'.format(i, loss_mon))

        # evaluate ranking with final parameters
        ymat = Ypred(x, theta).eval()
        sio.savemat('./ymat_tf.mat', {'Y': ymat})
        print('ymat is saved to "ymat_tf.mat".')


## ex8_cofi.py
## Machine Learning Online Class
#
#   ex8_cofi.py     date. 11/8/2016
#
#  Exercise 8 | Anomaly Detection and Collaborative Filtering
#

import numpy as np
import scipy.io as sio
import scipy.optimize as spopt
from check_costfun import check_costfn
from check_costfun import cofi_costfunc, cofi_costfunc_grad

def load_movielist(fn='../data/movie_ids.txt'):
    mlist = []
    sep = ' '
    with open(fn, 'rt', encoding="utf-8", errors="surrogateescape") as fp:
        while True:
            try:
                line = fp.readline()
                line = line.rstrip('\n')
            except:
                bytes = fp.read().encode('utf-8', 'replace')
                line = bytes.decode('utf-8')
            if not line:
                break

            words = line.split(' ')
            id = int(words[0])
            title = sep.join(words[1:-1])

            mlist.append((id, title))
            lastline = line

    mlist_len = len(mlist)

    print('\n\n {} movies data is loaded.'.format(mlist_len))
    print(' ******** example ********')
    if mlist_len >= 5:
        for i in range(5):
            print('    {0} : {1} '.format(i, mlist[i]))
    print(' *************************')

    return mlist
#


## =============== Part 1: Loading movie ratings dataset ================
#  You will start by loading the movie ratings dataset to understand the
#  structure of the data.
#
print('Loading movie ratings dataset.\n\n')

#  Load data
dataset = sio.loadmat('../data/ex8_movies.mat')
Y = dataset['Y']
R = dataset['R']


#  Y is a 1682x943 matrix, containing ratings (1-5) of 1682 movies on
#  943 users
#
#  R is a 1682x943 matrix, where R(i,j) = 1 if and only if user j gave a
#  rating to movie i

#  From the matrix, we can compute statistics like average rating.
print('Average rating for movie 1 (Toy Story): {:.3f} / 5 \n\n'.format(
        np.mean(Y[0, R[0, :]])))


## ============ Part 2: Collaborative Filtering Cost Function ===========
#  You will now implement the cost function for collaborative filtering.
#  To help you debug your cost function, we have included set of weights
#  that we trained on that. Specifically, you should complete the code in
#  cofiCostFunc.m to return J.
#  Load pre-trained weights (X, Theta, num_users, num_movies, num_features)

dataset = sio.loadmat('../data/ex8_movieParams.mat')
X = dataset['X']
Theta = dataset['Theta']

#  Reduce the data set size so that this runs faster
num_users = 4
num_movies = 5
num_features = 3
X = X[:num_movies, :num_features]
Theta = Theta[:num_users, :num_features]
Y = Y[:num_movies, :num_users]
R = R[:num_movies, :num_users]
shapes = [num_users, num_movies, num_features]
param_flat = np.concatenate((X.ravel(), Theta.ravel()))

#  Evaluate cost function
J = cofi_costfunc(Y, R, shapes, 0, param_flat)

print('Cost at loaded parameters: {:.4f} '.format(J))
print('(this value should be about 22.22)\n')


## ============== Part 3: Collaborative Filtering Gradient ==============
#  Once your cost function matches up with ours, you should now implement
#  the collaborative filtering gradient function. Specifically, you should
#  complete the code in cofiCostFunc.m to return the grad argument.
#
print('\nChecking Gradients (without regularization) ... \n')

#  Check gradients by running checkNNGradients
# check_costfn()


## ========= Part 4: Collaborative Filtering Cost Regularization ========
#  Now, you should implement regularization for the cost function for
#  collaborative filtering. You can implement it by adding the cost of
#  regularization to the original cost computation.
#

#  Evaluate cost function
J = cofi_costfunc(Y, R, shapes, 1.5, param_flat)
print('Cost at loaded parameters (lambda = 1.5): {:.4f} '.format(J))
print('(this value should be about 31.34)\n')


## ======= Part 5: Collaborative Filtering Gradient Regularization ======
#  Once your cost matches up with ours, you should proceed to implement
#  regularization for the gradient.
#

#
print('\nChecking Gradients (with regularization) ... \n')

#  Check gradients by running checkNNGradients
check_costfn(1.5)


## ============== Part 6: Entering ratings for a new user ===============
#  Before we will train the collaborative filtering model, we will first
#  add ratings that correspond to a new user that we just observed. This
#  part of the code will also allow you to put in your own ratings for the
#  movies in our dataset!
#
movieList = load_movielist()


#  Initialize my ratings
my_ratings = np.zeros((1682, 1))

# Check the file movie_idx.txt for id of each movie in our dataset
# For example, Toy Story (1995) has ID 1, so to rate it "4", you can set
my_ratings[0] = 4

# Or suppose did not enjoy Silence of the Lambs (1991), you can set
my_ratings[97] = 2

# We have selected a few movies we liked / did not like and the ratings we
# gave are as follows:
my_ratings[6] = 3
my_ratings[11] = 5
my_ratings[53] = 4
my_ratings[63]= 5
my_ratings[65]= 3
my_ratings[68] = 5
my_ratings[182] = 4
my_ratings[225] = 5
my_ratings[354]= 5

print('\n\nNew user ratings:\n')
for i in range(len(my_ratings)):
    if my_ratings[i] > 0:
        print('Rated {0} for {1}'.format(my_ratings[i], movieList[i]))


## ================== Part 7: Learning Movie Ratings ====================
#  Now, you will train the collaborative filtering model on a movie rating
#  dataset of 1682 movies and 943 users
#

print('\nTraining collaborative filtering...\n')

#  Re-Load data
dataset = sio.loadmat('../data/ex8_movies.mat')
Y = dataset['Y']
R = dataset['R']

#  Y is a 1682x943 matrix, containing ratings (1-5) of 1682 movies by
#  943 users
#
#  R is a 1682x943 matrix, where R(i,j) = 1 if and only if user j gave a
#  rating to movie i

#  Add our own ratings to the data matrix
Y = np.hstack([my_ratings, Y])
R1 = my_ratings.copy()
R1[R1 != 0] = 1
R = np.hstack([R1, R])

#  Normalize Ratings
m, n = Y.shape
Ymean = np.zeros((m, 1))
Ynormed = np.zeros_like(Y)
for i in range(m):
    idx = [j for j in range(R.shape[1]) if R[i, j] != 0]
    Ymean[i] = np.mean(Y[i, idx])
    Ynormed[i, idx] = Y[i, idx] - Ymean[i]


#  Useful Values
num_users = Y.shape[1]
num_movies = Y.shape[0]
num_features = 10
shapes = [num_users, num_movies, num_features]

# Set Initial Parameters (Theta, X)
X = np.random.randn(num_movies, num_features)
Theta = np.random.randn(num_users, num_features)

theta_ini = np.concatenate((X.flatten(), Theta.flatten()))

def compute_cost_sp(theta):
    global Y, R, shapes, lambda_c
    j =  cofi_costfunc(Y, R, shapes, lambda_c, params=theta)

    return j

def compute_grad_sp(theta):
    global Y, R, shapes, lambda_c
    j_grad = cofi_costfunc_grad(Y, R, shapes, lambda_c, params=theta)

    return j_grad


# Set options for Scipy Optimize minimize
options = {'gtol': 1.e-6, 'disp': True}
lambda_c = 1.5

res1 = spopt.minimize(compute_cost_sp, theta_ini, method='CG', \
         jac=compute_grad_sp, options=options)
theta =  res1.x

# Unfold the returned theta back into U and W
X = theta[:num_movies*num_features].reshape((num_movies, num_features))
Theta = theta[num_movies*num_features:].reshape((num_users, num_features))

print('Recommender system learning completed.\n')


## ================== Part 8: Recommendation for you ====================
#  After training the model, you can now make recommendations by computing
#  the predictions matrix.
#

p = np.dot(X, Theta.T)
p_orig = p[:, 1:]
sio.savemat('../data/pmat.mat', {'Y': p_orig})

my_predictions = p[:, 0].ravel() + Ymean.ravel()

movieList = load_movielist()
movieList = [item[1] for item in movieList]

r = np.sort(my_predictions)
ix = np.argsort(my_predictions)
r = r[::-1]
ix = ix[::-1]

print('\nTop recommendations for you: ')
for i in range(10):
    j = ix[i]
    print('Predicting rating {0:10.4f} for movie {1:s}'.format( \
                my_predictions[j], movieList[j]))


print('\nOriginal ratings provided: ')
for i in range(len(my_ratings)):
    if my_ratings[i] > 0:
        print('Rated {0:10.2f} for {1:s}'.format(
                float(my_ratings[i]), movieList[i]))
	#
	# check_costfun.py
	# date. 11/8/2016
	#

	import numpy as np

	## =============== cofiCostFunc.m part ==================================
	def cofi_costfunc(Y, R, shapes, lambda_c, params=None):
	# [J, grad] = COFICOSTFUNC(params, Y, R, num_users, num_movies, ...
	# num_features, lambda) returns the cost and gradient for the
	# collaborative filtering problem.
	# Unfold the U and W matrices from params
	num_users = shapes[0]
	num_movies = shapes[1]
	num_features = shapes[2]
	boundary = num_movies * num_features
	params_c1 = np.copy(params[:boundary])
	params_c2 = np.copy(params[boundary:])
	X = params_c1.reshape((num_movies, num_features))
	Theta = params_c2.reshape((num_users, num_features))

	# You need to return the following values correctly
	Jcost = 0.5 * (((np.dot(X, Theta.T) - Y) * R) ** 2).sum() \
	+ 0.5 * lambda_c * ((X.ravel() 2).sum() + (Theta.ravel() 2).sum())

	return Jcost

	def cofi_costfunc_grad(Y, R, shapes, lambda_c, params=None):
	# [J, grad] = COFICOSTFUNC(params, Y, R, num_users, num_movies, ...
	# num_features, lambda) returns the cost and gradient for the
	# collaborative filtering problem.
	# Unfold the U and W matrices from params
	num_users = shapes[0]
	num_movies = shapes[1]
	num_features = shapes[2]
	boundary = num_movies * num_features
	params_c1 = np.copy(params[:boundary])
	params_c2 = np.copy(params[boundary:])
	X = params_c1.reshape((num_movies, num_features))
	Theta = params_c2.reshape((num_users, num_features))

	# You need to return the following values correctly
	X_grad = np.dot(((np.dot(X, Theta.T) - Y) * R), Theta) + lambda_c * X
	Tgsub = (np.dot(X, Theta.T) - Y) * R
	Theta_grad = np.dot(Tgsub.T, X) + lambda_c * Theta
	Jgrad = np.concatenate((X_grad.ravel(), Theta_grad.ravel()))

	return Jgrad

	def compute_numerical_grad(cost_fn, Y, R, shapes, lambda_c, theta):
	# COMPUTENUMERICALGRADIENT Computes the gradient using "finite differences"
	# and gives us a numerical estimate of the gradient.#
	# check_costfun.py
	# date. 11/8/2016
	#

	import numpy as np

	## =============== cofiCostFunc.m part ==================================
	def cofi_costfunc(Y, R, shapes, lambda_c, params=None):
	# [J, grad] = COFICOSTFUNC(params, Y, R, num_users, num_movies, ...
	# num_features, lambda) returns the cost and gradient for the
	# collaborative filtering problem.
	# Unfold the U and W matrices from params
	num_users = shapes[0]
	num_movies = shapes[1]
	num_features = shapes[2]
	boundary = num_movies * num_features
	params_c1 = np.copy(params[:boundary])
	params_c2 = np.copy(params[boundary:])
	X = params_c1.reshape((num_movies, num_features))
	Theta = params_c2.reshape((num_users, num_features))

	# You need to return the following values correctly
	Jcost = 0.5 * (((np.dot(X, Theta.T) - Y) * R) ** 2).sum() \
	+ 0.5 * lambda_c * ((X.ravel() 2).sum() + (Theta.ravel() 2).sum())

	return Jcost

	def cofi_costfunc_grad(Y, R, shapes, lambda_c, params=None):
	# [J, grad] = COFICOSTFUNC(params, Y, R, num_users, num_movies, ...
	# num_features, lambda) returns the cost and gradient for the
	# collaborative filtering problem.
	# Unfold the U and W matrices from params
	num_users = shapes[0]
	num_movies = shapes[1]
	num_features = shapes[2]
	boundary = num_movies * num_features
	params_c1 = np.copy(params[:boundary])
	params_c2 = np.copy(params[boundary:])
	X = params_c1.reshape((num_movies, num_features))
	Theta = params_c2.reshape((num_users, num_features))

	# You need to return the following values correctly
	X_grad = np.dot(((np.dot(X, Theta.T) - Y) * R), Theta) + lambda_c * X
	Tgsub = (np.dot(X, Theta.T) - Y) * R
	Theta_grad = np.dot(Tgsub.T, X) + lambda_c * Theta
	Jgrad = np.concatenate((X_grad.ravel(), Theta_grad.ravel()))

	return Jgrad

	def compute_numerical_grad(cost_fn, Y, R, shapes, lambda_c, theta):
	# COMPUTENUMERICALGRADIENT Computes the gradient using "finite differences"
	# and gives us a numerical estimate of the gradient.
	# numgrad = COMPUTENUMERICALGRADIENT(J, theta) computes the numerical
	# gradient of the function J around theta. Calling y = J(theta) should
	# return the function value at theta.

	# Notes: The following code implements numerical gradient checking, and
	# returns the numerical gradient.It sets numgrad(i) to (a numerical
	# approximation of) the partial derivative of J with respect to the
	# i-th input argument, evaluated at theta. (i.e., numgrad(i) should
	# be the (approximately) the partial derivative of J with respect
	# to theta(i).)

	numgrad = np.zeros_like(theta)
	perturb = np.zeros_like(theta)
	eps = 1.e-4

	for p in range(len(theta)):
	# Set perturbation vector
	perturb[p] = eps
	params1 = theta - perturb
	params2 = theta + perturb
	loss1 = cost_fn(Y, R, shapes, lambda_c, params1)
	loss2 = cost_fn(Y, R, shapes, lambda_c, params2)
	# Compute Numerical Gradient
	numgrad[p] = (loss2 - loss1) / (2. * eps)
	perturb[p] = 0.

	return numgrad
	#

	def check_costfn(lambda_c=0.0):

	# Create small problem
	X_t = np.random.rand(4, 3)
	Theta_t = np.random.rand(5, 3)

	# Zap out most entries
	Y = np.dot(X_t, Theta_t.T)
	Y[np.random.rand(Y.shape[0], Y.shape[1]) > 0.5] = 0.
	R = np.zeros_like(Y)
	R[Y != 0.] = 1.

	# Run Gradient Checking
	X = np.random.randn(4, 3)
	Theta = np.random.randn(5, 3)
	num_users = Y.shape[1]
	num_movies = Y.shape[0]
	num_features = Theta_t.shape[1]
	theta_arg = np.concatenate((X.ravel(), Theta.ravel()))
	shapes = [num_users, num_movies, num_features]

	numgrad = compute_numerical_grad(
	cofi_costfunc, Y, R, shapes, lambda_c, theta_arg
	)

	grad_ret = cofi_costfunc_grad(Y, R, shapes, lambda_c, theta_arg)
	grad = grad_ret.ravel()

	print(' [numgrad : grad] ')
	for i in range(len(numgrad)):
	print(' {0:>10.4f} : {1:>10.4f} '.format(numgrad[i],grad[i]))

	print('The above two columns you get should be very similar.\n', \
	'(Left-Your Numerical Gradient, Right-Analytical Gradient)\n\n')

	diff = np.linalg.norm(numgrad-grad) / np.linalg.norm(numgrad+grad)

	print('If your backpropagation implementation is correct, then \n' \
	'the relative difference will be small (less than 1e-9). \n' \
	'\nRelative Difference: ', diff)
	#

	# numgrad = COMPUTENUMERICALGRADIENT(J, theta) computes the numerical
	# gradient of the function J around theta. Calling y = J(theta) should
	# return the function value at theta.

	# Notes: The following code implements numerical gradient checking, and
	# returns the numerical gradient.It sets numgrad(i) to (a numerical
	# approximation of) the partial derivative of J with respect to the
	# i-th input argument, evaluated at theta. (i.e., numgrad(i) should
	# be the (approximately) the partial derivative of J with respect
	# to theta(i).)

	numgrad = np.zeros_like(theta)
	perturb = np.zeros_like(theta)
	eps = 1.e-4

	for p in range(len(theta)):
	# Set perturbation vector
	perturb[p] = eps
	params1 = theta - perturb
	params2 = theta + perturb
	loss1 = cost_fn(Y, R, shapes, lambda_c, params1)
	loss2 = cost_fn(Y, R, shapes, lambda_c, params2)
	# Compute Numerical Gradient
	numgrad[p] = (loss2 - loss1) / (2. * eps)
	perturb[p] = 0.

	return numgrad
	#

	def check_costfn(lambda_c=0.0):

	# Create small problem
	X_t = np.random.rand(4, 3)
	Theta_t = np.random.rand(5, 3)

	# Zap out most entries
	Y = np.dot(X_t, Theta_t.T)
	Y[np.random.rand(Y.shape[0], Y.shape[1]) > 0.5] = 0.
	R = np.zeros_like(Y)
	R[Y != 0.] = 1.

	# Run Gradient Checking
	X = np.random.randn(4, 3)
	Theta = np.random.randn(5, 3)
	num_users = Y.shape[1]
	num_movies = Y.shape[0]
	num_features = Theta_t.shape[1]
	theta_arg = np.concatenate((X.ravel(), Theta.ravel()))
	shapes = [num_users, num_movies, num_features]

	numgrad = compute_numerical_grad(
	cofi_costfunc, Y, R, shapes, lambda_c, theta_arg
	)

	grad_ret = cofi_costfunc_grad(Y, R, shapes, lambda_c, theta_arg)
	grad = grad_ret.ravel()

	print(' [numgrad : grad] ')
	for i in range(len(numgrad)):
	print(' {0:>10.4f} : {1:>10.4f} '.format(numgrad[i],grad[i]))

	print('The above two columns you get should be very similar.\n', \
	'(Left-Your Numerical Gradient, Right-Analytical Gradient)\n\n')

	diff = np.linalg.norm(numgrad-grad) / np.linalg.norm(numgrad+grad)

	print('If your backpropagation implementation is correct, then \n' \
	'the relative difference will be small (less than 1e-9). \n' \
	'\nRelative Difference: ', diff)
	#
	#
	# -- coding: utf-8 --

	# cofi_tf.py
	# date. 11/15/2016
	#

	import os
	import numpy as np
	import scipy.io as sio
	import tensorflow as tf

	WARM_START = False

	def load_data(fn='./scipyopt/data/ex8_movies.mat'):
	if os.path.exists(fn):
	data = sio.loadmat(fn)
	else:
	print('File not extits.')
	data = None

	return data
	#

	def load_movielist(fn='./scipyopt/data/movie_ids.txt'):
	mlist = []
	sep = ' '
	with open(fn, 'rt', encoding="utf-8", errors="surrogateescape") as fp:
	while True:
	try:
	line = fp.readline()
	line = line.rstrip('\n')
	except:
	bytes = fp.read().encode('utf-8', 'replace')
	line = bytes.decode('utf-8')
	if not line:
	break

	words = line.split(' ')
	id = int(words[0])
	title = sep.join(words[1:-1])

	mlist.append((id, title))
	lastline = line

	mlist_len = len(mlist)

	print('\n\n {} movies data is loaded.'.format(mlist_len))
	print(' ****** example ******')
	if mlist_len >= 5:
	for i in range(5):
	print(' {0} : {1} '.format(i, mlist[i]))
	print(' *************************')

	return mlist
	#


	def Ypred(X, Theta):
	'''
	calculate rating with parameter X and Theta
	args.:
	X: movie contents parameter
	Theta: user characteristic parameter
	'''
	feat_dim1 = tf.shape(X)[1]
	feat_dim2 = tf.shape(Theta)[1]

	tf.assert_equal(feat_dim1, feat_dim2)
	rating = tf.matmul(X, tf.transpose(Theta))

	return rating
	#

	if __name__ == '__main__':
	# Data load
	data = load_data()
	Y_np = data['Y'] # rating matrix Y
	R_np = data['R'] # rating matrix R
	num_users = Y_np.shape[1]
	num_movies = Y_np.shape[0]
	num_features = 10

	# Parameter definition
	if WARM_START:
	data_param = load_data(fn='./scipyopt/data/ex8_movieParams.mat')
	X_np = data_param['X'].astype(np.float32)
	Theta_np = data_param['Theta'].astype(np.float32)
	x = tf.Variable(X_np)
	theta = tf.Variable(Theta_np)
	else: # COLD START
	x = tf.Variable(tf.random_normal([num_movies, num_features],
	mean=0.0, stddev=0.05))
	theta = tf.Variable(tf.random_normal([num_users, num_features],
	mean=0.0, stddev=0.05))

	Y_ph = tf.placeholder(tf.float32, shape=[None, num_users])
	R_ph = tf.placeholder(tf.float32, shape=[None, num_users])

	# Cost Function, etc.
	cost = tf.reduce_sum(((Ypred(x, theta) - Y_ph) * R_ph) ** 2)
	L2_sqr = tf.nn.l2_loss(x) + tf.nn.l2_loss(theta)
	lambda_c = 1.0 # L2 norm coefficient
	loss = cost + lambda_c * L2_sqr

	lr = 1.e-5
	train_step = tf.train.GradientDescentOptimizer(lr).minimize(loss)
	init_op = tf.initialize_all_variables()

	# Train
	with tf.Session() as sess:
	sess.run(init_op)

	for i in range(10001):
	sess.run(train_step, feed_dict={Y_ph: Y_np, R_ph: Y_np})

	if i % 1000 == 0:
	loss_mon = loss.eval({Y_ph: Y_np, R_ph: Y_np})
	print(' step, loss = {:6d}: {:10.1f}'.format(i, loss_mon))

	# evaluate ranking with final parameters
	ymat = Ypred(x, theta).eval()
	sio.savemat('./ymat_tf.mat', {'Y': ymat})
	print('ymat is saved to "ymat_tf.mat".')
	## Machine Learning Online Class
	#
	# ex8_cofi.py date. 11/8/2016
	#
	# Exercise 8 \| Anomaly Detection and Collaborative Filtering
	#

	import numpy as np
	import scipy.io as sio
	import scipy.optimize as spopt
	from check_costfun import check_costfn
	from check_costfun import cofi_costfunc, cofi_costfunc_grad

	def load_movielist(fn='../data/movie_ids.txt'):
	mlist = []
	sep = ' '
	with open(fn, 'rt', encoding="utf-8", errors="surrogateescape") as fp:
	while True:
	try:
	line = fp.readline()
	line = line.rstrip('\n')
	except:
	bytes = fp.read().encode('utf-8', 'replace')
	line = bytes.decode('utf-8')
	if not line:
	break

	words = line.split(' ')
	id = int(words[0])
	title = sep.join(words[1:-1])

	mlist.append((id, title))
	lastline = line

	mlist_len = len(mlist)

	print('\n\n {} movies data is loaded.'.format(mlist_len))
	print(' ****** example ******')
	if mlist_len >= 5:
	for i in range(5):
	print(' {0} : {1} '.format(i, mlist[i]))
	print(' *************************')

	return mlist
	#


	## =============== Part 1: Loading movie ratings dataset ================
	# You will start by loading the movie ratings dataset to understand the
	# structure of the data.
	#
	print('Loading movie ratings dataset.\n\n')

	# Load data
	dataset = sio.loadmat('../data/ex8_movies.mat')
	Y = dataset['Y']
	R = dataset['R']


	# Y is a 1682x943 matrix, containing ratings (1-5) of 1682 movies on
	# 943 users
	#
	# R is a 1682x943 matrix, where R(i,j) = 1 if and only if user j gave a
	# rating to movie i

	# From the matrix, we can compute statistics like average rating.
	print('Average rating for movie 1 (Toy Story): {:.3f} / 5 \n\n'.format(
	np.mean(Y[0, R[0, :]])))



	## ============ Part 2: Collaborative Filtering Cost Function ===========
	# You will now implement the cost function for collaborative filtering.
	# To help you debug your cost function, we have included set of weights
	# that we trained on that. Specifically, you should complete the code in
	# cofiCostFunc.m to return J.
	# Load pre-trained weights (X, Theta, num_users, num_movies, num_features)

	dataset = sio.loadmat('../data/ex8_movieParams.mat')
	X = dataset['X']
	Theta = dataset['Theta']

	# Reduce the data set size so that this runs faster
	num_users = 4
	num_movies = 5
	num_features = 3
	X = X[:num_movies, :num_features]
	Theta = Theta[:num_users, :num_features]
	Y = Y[:num_movies, :num_users]
	R = R[:num_movies, :num_users]
	shapes = [num_users, num_movies, num_features]
	param_flat = np.concatenate((X.ravel(), Theta.ravel()))

	# Evaluate cost function
	J = cofi_costfunc(Y, R, shapes, 0, param_flat)

	print('Cost at loaded parameters: {:.4f} '.format(J))
	print('(this value should be about 22.22)\n')



	## ============== Part 3: Collaborative Filtering Gradient ==============
	# Once your cost function matches up with ours, you should now implement
	# the collaborative filtering gradient function. Specifically, you should
	# complete the code in cofiCostFunc.m to return the grad argument.
	#
	print('\nChecking Gradients (without regularization) ... \n')

	# Check gradients by running checkNNGradients
	# check_costfn()


	## ========= Part 4: Collaborative Filtering Cost Regularization ========
	# Now, you should implement regularization for the cost function for
	# collaborative filtering. You can implement it by adding the cost of
	# regularization to the original cost computation.
	#

	# Evaluate cost function
	J = cofi_costfunc(Y, R, shapes, 1.5, param_flat)
	print('Cost at loaded parameters (lambda = 1.5): {:.4f} '.format(J))
	print('(this value should be about 31.34)\n')


	## ======= Part 5: Collaborative Filtering Gradient Regularization ======
	# Once your cost matches up with ours, you should proceed to implement
	# regularization for the gradient.
	#

	#
	print('\nChecking Gradients (with regularization) ... \n')

	# Check gradients by running checkNNGradients
	check_costfn(1.5)


	## ============== Part 6: Entering ratings for a new user ===============
	# Before we will train the collaborative filtering model, we will first
	# add ratings that correspond to a new user that we just observed. This
	# part of the code will also allow you to put in your own ratings for the
	# movies in our dataset!
	#
	movieList = load_movielist()


	# Initialize my ratings
	my_ratings = np.zeros((1682, 1))

	# Check the file movie_idx.txt for id of each movie in our dataset
	# For example, Toy Story (1995) has ID 1, so to rate it "4", you can set
	my_ratings[0] = 4

	# Or suppose did not enjoy Silence of the Lambs (1991), you can set
	my_ratings[97] = 2

	# We have selected a few movies we liked / did not like and the ratings we
	# gave are as follows:
	my_ratings[6] = 3
	my_ratings[11] = 5
	my_ratings[53] = 4
	my_ratings[63]= 5
	my_ratings[65]= 3
	my_ratings[68] = 5
	my_ratings[182] = 4
	my_ratings[225] = 5
	my_ratings[354]= 5

	print('\n\nNew user ratings:\n')
	for i in range(len(my_ratings)):
	if my_ratings[i] > 0:
	print('Rated {0} for {1}'.format(my_ratings[i], movieList[i]))


	## ================== Part 7: Learning Movie Ratings ====================
	# Now, you will train the collaborative filtering model on a movie rating
	# dataset of 1682 movies and 943 users
	#

	print('\nTraining collaborative filtering...\n')

	# Re-Load data
	dataset = sio.loadmat('../data/ex8_movies.mat')
	Y = dataset['Y']
	R = dataset['R']

	# Y is a 1682x943 matrix, containing ratings (1-5) of 1682 movies by
	# 943 users
	#
	# R is a 1682x943 matrix, where R(i,j) = 1 if and only if user j gave a
	# rating to movie i

	# Add our own ratings to the data matrix
	Y = np.hstack([my_ratings, Y])
	R1 = my_ratings.copy()
	R1[R1 != 0] = 1
	R = np.hstack([R1, R])

	# Normalize Ratings
	m, n = Y.shape
	Ymean = np.zeros((m, 1))
	Ynormed = np.zeros_like(Y)
	for i in range(m):
	idx = [j for j in range(R.shape[1]) if R[i, j] != 0]
	Ymean[i] = np.mean(Y[i, idx])
	Ynormed[i, idx] = Y[i, idx] - Ymean[i]


	# Useful Values
	num_users = Y.shape[1]
	num_movies = Y.shape[0]
	num_features = 10
	shapes = [num_users, num_movies, num_features]

	# Set Initial Parameters (Theta, X)
	X = np.random.randn(num_movies, num_features)
	Theta = np.random.randn(num_users, num_features)

	theta_ini = np.concatenate((X.flatten(), Theta.flatten()))

	def compute_cost_sp(theta):
	global Y, R, shapes, lambda_c
	j = cofi_costfunc(Y, R, shapes, lambda_c, params=theta)

	return j

	def compute_grad_sp(theta):
	global Y, R, shapes, lambda_c
	j_grad = cofi_costfunc_grad(Y, R, shapes, lambda_c, params=theta)

	return j_grad


	# Set options for Scipy Optimize minimize
	options = {'gtol': 1.e-6, 'disp': True}
	lambda_c = 1.5

	res1 = spopt.minimize(compute_cost_sp, theta_ini, method='CG', \
	jac=compute_grad_sp, options=options)
	theta = res1.x

	# Unfold the returned theta back into U and W
	X = theta[:num_movies*num_features].reshape((num_movies, num_features))
	Theta = theta[num_movies*num_features:].reshape((num_users, num_features))

	print('Recommender system learning completed.\n')


	## ================== Part 8: Recommendation for you ====================
	# After training the model, you can now make recommendations by computing
	# the predictions matrix.
	#

	p = np.dot(X, Theta.T)
	p_orig = p[:, 1:]
	sio.savemat('../data/pmat.mat', {'Y': p_orig})

	my_predictions = p[:, 0].ravel() + Ymean.ravel()

	movieList = load_movielist()
	movieList = [item[1] for item in movieList]

	r = np.sort(my_predictions)
	ix = np.argsort(my_predictions)
	r = r[::-1]
	ix = ix[::-1]

	print('\nTop recommendations for you: ')
	for i in range(10):
	j = ix[i]
	print('Predicting rating {0:10.4f} for movie {1:s}'.format( \
	my_predictions[j], movieList[j]))


	print('\nOriginal ratings provided: ')
	for i in range(len(my_ratings)):
	if my_ratings[i] > 0:
	print('Rated {0:10.2f} for {1:s}'.format(
	float(my_ratings[i]), movieList[i]))