Skip to content

Instantly share code, notes, and snippets.

@PENGZhaoqing
Last active November 9, 2017 03:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save PENGZhaoqing/7a1b5d8919eae55a1de6063fbbb1fcec to your computer and use it in GitHub Desktop.
Save PENGZhaoqing/7a1b5d8919eae55a1de6063fbbb1fcec to your computer and use it in GitHub Desktop.
Mxnet manually gradients computation Vs. Graph loss symbol auto computation
import mxnet as mx
import numpy as np
import mxnet.ndarray as nd
fc1_weight = [[0.58294852, 0.42608512, 0.96363545, 0.24708573],
[0.4417113, 0.10523346, 0.80098576, 0.46849809],
[0.08320606, 0.02893325, 0.55576215, 0.52159727],
[0.675476, 0.84439869, 0.1482909, 0.26626008],
[0.52650772, 0.07457639, 0.44785518, 0.71549555],
[0.52523266, 0.19377938, 0.02757852, 0.97459566],
[0.92856014, 0.68095418, 0.26694229, 0.68441936],
[0.77398185, 0.99398437, 0.35615027, 0.82315567],
[0.42741064, 0.05671271, 0.67730508, 0.00568796],
[0.289452, 0.90104051, 0.01030181, 0.69920418],
[0.29214084, 0.38468911, 0.988573, 0.48585752],
[0.27455112, 0.27176394, 0.54156989, 0.84917456],
[0.57979248, 0.73031199, 0.88747021, 0.59988834],
[0.59616888, 0.93295241, 0.01882551, 0.53378031],
[0.61438601, 0.4328007, 0.87544448, 0.25117127],
[0.99582249, 0.90169104, 0.95259686, 0.28756852],
[0.34949697, 0.58722187, 0.56623397, 0.36667131],
[0.42461918, 0.93779852, 0.85555812, 0.08616323],
[0.54120875, 0.74651195, 0.76785371, 0.14003219],
[0.04704561, 0.42374879, 0.34553214, 0.39982563]]
fc2_weight = [[0.93034432, 0.97610128, 0.73023039, 0.66139541, 0.20614977, 0.71942276,
0.45235542, 0.97821716, 0.43852386, 0.41171249, 0.99458696, 0.15474394,
0.9290056, 0.42087649, 0.12136203, 0.24829071, 0.60336717, 0.42120195,
0.96704948, 0.03118107]]
input = [[0.0883304, 0.57117354, 0.85744546, 0.10524275],
[0.73636623, 0.77042306, 0.49162071, 0.77236564],
[0.89761976, 0.67380134, 0.93971695, 0.01783999],
[0.229045, 0.93465984, 0.03231421, 0.83666546],
[0.64807769, 0.66681773, 0.29663677, 0.69582481],
[0.67980967, 0.64803839, 0.75006107, 0.13230413],
[0.10044154, 0.99758292, 0.41892161, 0.09607127],
[0.85938775, 0.40909924, 0.58771238, 0.0030594],
[0.15055842, 0.03993676, 0.48678655, 0.38202192],
[0.40731432, 0.57124535, 0.99926446, 0.11714415]]
target = [[0.40862913],
[0.46507598],
[0.73679818],
[0.754124],
[0.32920501],
[0.6278464],
[0.80039361],
[0.52620516],
[0.5376822],
[0.34798077]]
target = nd.array(target, ctx=mx.gpu(0))
input = nd.array(input, ctx=mx.gpu(0))
init_params = {
'fc1_weight': nd.array(fc1_weight, ctx=mx.gpu(0)),
'fc2_weight': nd.array(fc2_weight, ctx=mx.gpu(0))
}
def network1(input, target):
data = mx.symbol.Variable('data')
fc1 = mx.symbol.FullyConnected(data=data, name='fc1', num_hidden=20, no_bias=True)
fc2 = mx.symbol.FullyConnected(data=fc1, name='fc2', num_hidden=1, no_bias=True)
model = mx.mod.Module(fc2, data_names=('data',),
label_names=None, context=mx.gpu(0))
model.bind(data_shapes=[('data', (10, 4))], label_shapes=None, inputs_need_grad=True, grad_req="write")
model.init_params(arg_params=init_params, force_init=True)
model.init_optimizer()
model.forward(mx.io.DataBatch(data=[input], label=None), is_train=True)
Y = model.get_outputs()[0]
model.backward(out_grads=[2 * (Y - target)])
grad_dict = model._exec_group.execs[0].grad_dict
return grad_dict['fc1_weight'].asnumpy()
def network2(input, target):
data = mx.symbol.Variable('data')
target_sym = mx.symbol.Variable('target')
fc1 = mx.symbol.FullyConnected(data=data, name='fc1', num_hidden=20, no_bias=True)
fc2 = mx.symbol.FullyConnected(data=fc1, name='fc2', num_hidden=1, no_bias=True)
loss = mx.symbol.sum(mx.symbol.square(fc2 - target_sym))
loss = mx.symbol.MakeLoss(loss)
out = mx.symbol.BlockGrad(fc2)
loss = mx.symbol.Group([loss, out])
model = mx.mod.Module(loss, data_names=('data', 'target'),
label_names=None, context=mx.gpu(0))
model.bind(data_shapes=[('data', (10, 4)), ('target', (10, 1))], label_shapes=None, inputs_need_grad=True,
grad_req="write")
model.init_params(arg_params=init_params, force_init=True)
model.init_optimizer()
model.forward(mx.io.DataBatch(data=[input, target], label=None), is_train=True)
model.backward()
grad_dict = model._exec_group.execs[0].grad_dict
return grad_dict['fc1_weight'].asnumpy()
def network3(input, target):
data = mx.symbol.Variable('data')
target_sym = mx.symbol.Variable('target')
fc1 = mx.symbol.FullyConnected(data=data, name='fc1', num_hidden=20, no_bias=True)
fc2 = mx.symbol.FullyConnected(data=fc1, name='fc2', num_hidden=1, no_bias=True)
loss = mx.symbol.square(fc2 - target_sym)
loss = mx.symbol.MakeLoss(loss)
model = mx.mod.Module(loss, data_names=('data', 'target'),
label_names=None, context=mx.gpu(0))
model.bind(data_shapes=[('data', (10, 4)), ('target', (10, 1))], label_shapes=None, inputs_need_grad=True,
grad_req="write")
model.init_params(arg_params=init_params, force_init=True)
model.init_optimizer()
model.forward(mx.io.DataBatch(data=[input, target], label=None), is_train=True)
model.backward()
grad_dict = model._exec_group.execs[0].grad_dict
return grad_dict['fc1_weight'].asnumpy()
def network4(input):
data = mx.symbol.Variable('data')
fc1 = mx.symbol.FullyConnected(data=data, name='fc1', num_hidden=20, no_bias=True)
fc2 = mx.symbol.FullyConnected(data=fc1, name='fc2', num_hidden=1, no_bias=True)
loss = mx.symbol.MakeLoss(mx.symbol.sum(-fc2))
model = mx.mod.Module(loss, data_names=('data',),
label_names=None, context=mx.gpu(0))
model.bind(data_shapes=[('data', (10, 4))], label_shapes=None, inputs_need_grad=True,
grad_req="write")
model.init_params(arg_params=init_params, force_init=True)
model.init_optimizer()
model.forward(mx.io.DataBatch(data=[input], label=None), is_train=True)
model.backward()
grad_dict = model._exec_group.execs[0].grad_dict
return grad_dict['fc1_weight'].asnumpy()
def network5(input):
data = mx.symbol.Variable('data')
fc1 = mx.symbol.FullyConnected(data=data, name='fc1', num_hidden=20, no_bias=True)
fc2 = mx.symbol.FullyConnected(data=fc1, name='fc2', num_hidden=1, no_bias=True)
model = mx.mod.Module(fc2, data_names=('data',),
label_names=None, context=mx.gpu(0))
model.bind(data_shapes=[('data', (10, 4))], label_shapes=None, inputs_need_grad=True,
grad_req="write")
model.init_params(arg_params=init_params, force_init=True)
model.init_optimizer()
model.forward(mx.io.DataBatch(data=[input], label=None), is_train=True)
gradient = -1 * nd.ones((10, 1), ctx=mx.gpu(0))
model.backward(out_grads=[gradient])
grad_dict = model._exec_group.execs[0].grad_dict
return grad_dict['fc1_weight'].asnumpy()
print network1(input, target) - network2(input, target)
print network2(input, target) - network3(input, target)
print network4(input) - network5(input)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment