Created
January 22, 2017 14:48
-
-
Save gmodena/aa07a0be84d944c3808ed22dd00f7acf to your computer and use it in GitHub Desktop.
An order 2 Factorization Machine implemented on top of tensorflow
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Steffen Rendle (2012): Factorization Machines with libFM, | |
# in ACM Trans. Intell. Syst. Technol., 3(3), May. | |
# http://doi.acm.org/10.1145/2168752.2168771 | |
import tensorflow as tf | |
import numpy as np | |
N_EPOCHS = 1000 | |
x_data = np.matrix([[ 19., 0., 0., 0., 1., 1., 0., 0., 0.], | |
[ 33., 0., 0., 1., 0., 0., 1., 0., 0.], | |
[ 55., 0., 1., 0., 0., 0., 0., 1., 0.], | |
[ 20., 1., 0., 0., 0., 0., 0., 0., 1.]], | |
dtype=np.float32) | |
y_data = np.repeat(1.0, x_data.shape[0]) | |
y_data.shape += (1,) | |
n, p = x_data.shape | |
k = 5 # number of factors | |
X = tf.placeholder('float', shape=[n, p], name='X') | |
y = tf.placeholder('float', shape=[n, 1], name='y') | |
# bias terms | |
w0 = tf.Variable(tf.zeros([1])) | |
W = tf.Variable(tf.zeros([p])) | |
# interaction factors | |
V = tf.Variable(tf.random_normal([k, p], stddev=0.01)) | |
# estimate of y | |
y_hat = tf.Variable(tf.zeros([n, 1]), name='y_hat') | |
## eq. 3 from Rendle, 2012. | |
linear_terms = tf.add(w0, | |
tf.reduce_sum( | |
tf.multiply(W, X), 1, keep_dims=True), | |
name='y_hat') | |
interactions = (tf.multiply(0.5, | |
tf.reduce_sum( | |
tf.sub( | |
tf.pow(tf.matmul(X, tf.transpose(V)), 2), | |
tf.matmul(tf.pow(X, 2), tf.transpose(tf.pow(V, 1)))), | |
1, keep_dims=True))) | |
y_hat = tf.add(linear_terms, interactions, name='y_hat') | |
# L2 regularized sum of squares loss function over W and V | |
lambda_w = tf.constant(0.01, name='lambda_w') | |
lambda_v = tf.constant(0.01, name='lambda_v') | |
l2_norm = (tf.reduce_sum( | |
tf.add( | |
tf.multiply(lambda_w, tf.pow(W, 2)), | |
tf.multiply(lambda_v, tf.pow(V, 2))))) | |
error = tf.reduce_sum(tf.square(tf.sub(y_hat, y))) | |
cost = tf.add(error, l2_norm) | |
# Train with gradient descent | |
eta = tf.constant(0.01, name='eta') | |
optimizer = tf.train.AdagradOptimizer(eta).minimize(cost) | |
# Launch the graph. | |
init = tf.global_variables_initializer() | |
with tf.Session() as sess: | |
sess.run(init) | |
for epoch in range(N_EPOCHS): | |
indices = np.arange(n) | |
np.random.shuffle(indices) | |
x_data, y_data = x_data[indices], y_data[indices] | |
sess.run(optimizer, feed_dict={X: x_data, y: y_data}) | |
print('Error: ', sess.run(error, feed_dict={X: x_data, y: y_data})) | |
print('Predictions:', sess.run(y_hat, feed_dict={X: x_data, y: y_data})) | |
print('Learnt weights:', sess.run(W, feed_dict={X: x_data, y: y_data})) | |
print('Learnt factors:', sess.run(V, feed_dict={X: x_data, y: y_data})) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment