Skip to content

Instantly share code, notes, and snippets.

@kristijanbartol
Last active May 3, 2018 15:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kristijanbartol/1b7b7c5d431415284217bbf63ca25c66 to your computer and use it in GitHub Desktop.
Save kristijanbartol/1b7b7c5d431415284217bbf63ca25c66 to your computer and use it in GitHub Desktop.
Optimizing f(x)=3*x using a single parameter (the model could be therefore written as m(a)=a*x). Testing custom gradients functionality in TensorFlow.
import tensorflow as tf
from tensorflow.python.framework import ops
import numpy as np
import time
ZERO_TOL = 1e-8
LOSS_TOL = 1e-3
SAMPLES = 100
EPOCHS = 100000
train_input = np.random.rand(SAMPLES)
train_label = 3 * train_input
class MyException(Exception):
pass
def _my_linear_grad(op, grad):
# second value is not used - it can be multiplied by zero with no side effects
return grad * op.inputs[1], grad * 0.
def my_linear(a, x):
return (a * x).astype(np.float32)
learning_rate = 1e-3
beta1 = 0.9999
x = tf.placeholder(dtype=tf.float32, shape=(), name='x')
y = tf.placeholder(dtype=tf.float32, shape=(), name='y')
a = tf.get_variable('a', dtype=tf.float32, initializer=1.)
tf_a = tf.get_variable('tf_a', dtype=tf.float32, initializer=1.)
with ops.op_scope([a, x], name="MyLinear") as name:
# custom gradient op name shouldn't conflict with any other TF op name
unique_name = 'PyFuncGrad@Unique'
# using tf.RegisterGradient to set _my_linear_grad function in backward pass for gradient op named rnd_name
tf.RegisterGradient(unique_name)(_my_linear_grad)
g = tf.get_default_graph()
# context manager used to override gradients for nodes created in its block
with g.gradient_override_map({"PyFunc": unique_name}):
# my_linear is used for forward pass - my_linear and my_linear_grad are wrapped inside a single TF node
p = tf.py_func(my_linear, [a, x], [tf.float32], stateful=True, name=name)
tf_p = tf_a * x
loss = tf.reduce_mean(tf.square(p - y))
tf_loss = tf.reduce_mean(tf.square(tf_p - y))
train_vars = [var for var in tf.trainable_variables()]
optim = tf.train.AdamOptimizer(learning_rate, beta1)
# compute_gradients returns a list so I can just concatenate them to calculate tf_loss, too
grads_and_vars = optim.compute_gradients(loss, var_list=train_vars)
grads_and_vars += optim.compute_gradients(tf_loss, var_list=train_vars)
train_op = optim.apply_gradients(grads_and_vars)
tf.summary.scalar('loss', loss)
with tf.Session() as sess:
train_writer = tf.summary.FileWriter('board', sess.graph)
merge = tf.summary.merge_all()
sess.run(tf.global_variables_initializer())
try:
for epoch in range(EPOCHS):
overall_loss = 0.
# update using each sample separately
for i in range(SAMPLES):
result = sess.run([loss, tf_loss, a, tf_a, merge, train_op], feed_dict={
x: train_input[i],
y: train_label[i]
})
if np.abs(result[0] - result[1]) > ZERO_TOL:
print('Invalid update!\nExpected: {}, Actual: {}'.format(result[1], result[0]))
raise MyException
print('epoch: {}, iter: {}, loss: {}\na: {}\n'.format(epoch, i, result[0], result[2]))
overall_loss += result[0]
overall_loss /= float(SAMPLES)
print('overall_loss: {}'.format(overall_loss))
#time.sleep(2.0)
# [NOTE] this moment will be delayed a bit as it has to "wait" for the epoch to finish
if overall_loss < LOSS_TOL:
print('Found parameter!\n---------------\n')
break
except MyException:
pass
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment