Skip to content

Instantly share code, notes, and snippets.

@jermainewang
Last active June 11, 2017 02:11
Show Gist options
  • Save jermainewang/c80f6d493b9860495c3e22bafcffe229 to your computer and use it in GitHub Desktop.
Save jermainewang/c80f6d493b9860495c3e22bafcffe229 to your computer and use it in GitHub Desktop.
Naive lstm implementation
import mxnet as mx
import mxnet.ndarray as nd
#import minpy.ndarray as nd
context = mx.gpu(0)
def linear(X, W, bias):
return nd.dot(X, W) + bias
def sigmoid(x):
return .5 * (nd.tanh(.5 * x) + 1)
from mxnet.context import Context
Context.default_ctx = context
def gaussian(shape):
return nd.uniform(shape=shape, ctx=context)
D = 10
WX_SHAPE = (7, D)
Wxi = gaussian(shape=WX_SHAPE)
Wxf = gaussian(shape=WX_SHAPE)
Wxo = gaussian(shape=WX_SHAPE)
Wxg = gaussian(shape=WX_SHAPE)
bX_SHAPE = (D,)
bxi = nd.zeros(shape=bX_SHAPE)
bxf = nd.zeros(shape=bX_SHAPE)
bxo = nd.zeros(shape=bX_SHAPE)
bxg = nd.zeros(shape=bX_SHAPE)
WH_SHAPE = (D, D)
Whi = gaussian(shape=WH_SHAPE)
Whf = gaussian(shape=WH_SHAPE)
Who = gaussian(shape=WH_SHAPE)
Whg = gaussian(shape=WH_SHAPE)
bH_SHAPE = (D,)
bhi = nd.zeros(shape=bH_SHAPE)
bhf = nd.zeros(shape=bH_SHAPE)
bho = nd.zeros(shape=bH_SHAPE)
bhg = nd.zeros(shape=bH_SHAPE)
W = gaussian(shape=(D, 10))
b = nd.zeros(shape=(10,))
N = 4
X = gaussian(shape=(N, 784 // 7, 7))
from time import time
def step():
h = nd.zeros((N, D))
c = nd.zeros((N, D))
for i in range(784 // 7):
patch = nd.slice_axis(X, axis=1, begin=i, end=(i + 1))
i = sigmoid(linear(patch, Wxi, bxi) + linear(h, Whi, bhi))
f = sigmoid(linear(patch, Wxf, bxf) + linear(h, Whf, bhf))
o = sigmoid(linear(patch, Wxo, bxo) + linear(h, Who, bho))
g = nd.tanh(linear(patch, Wxg, bxg) + linear(h, Whg, bhg))
c = f * c + i * g
h = o * nd.tanh(c)
linear(h, W, b)
def main():
for index in range(30):
if index == 10:
t0 = time()
step()
print((time() - t0) / (index + 1 - 10))
if __name__ == '__main__':
main()
#import cProfile
#cProfile.run('main()')
import mxnet as mx
import mxnet.ndarray as nd
from mxnet.context import Context
#import minpy.ndarray as nd
context = mx.gpu(0)
Context.default_ctx = context
def linear(X, W, bias):
return nd.dot(X, W) + bias
def sigmoid(x):
return .5 * (nd.tanh(.5 * x) + 1)
def gaussian(shape):
return nd.uniform(shape=shape, ctx=context)
D = 10
WX_SHAPE = (D, 7)
Wxi = gaussian(shape=WX_SHAPE)
Wxf = gaussian(shape=WX_SHAPE)
Wxo = gaussian(shape=WX_SHAPE)
Wxg = gaussian(shape=WX_SHAPE)
bX_SHAPE = (D,)
bxi = nd.zeros(shape=bX_SHAPE)
bxf = nd.zeros(shape=bX_SHAPE)
bxo = nd.zeros(shape=bX_SHAPE)
bxg = nd.zeros(shape=bX_SHAPE)
WH_SHAPE = (D, D)
Whi = gaussian(shape=WH_SHAPE)
Whf = gaussian(shape=WH_SHAPE)
Who = gaussian(shape=WH_SHAPE)
Whg = gaussian(shape=WH_SHAPE)
bH_SHAPE = (D,)
bhi = nd.zeros(shape=bH_SHAPE)
bhf = nd.zeros(shape=bH_SHAPE)
bho = nd.zeros(shape=bH_SHAPE)
bhg = nd.zeros(shape=bH_SHAPE)
W = gaussian(shape=(D, 10))
b = nd.zeros(shape=(10,))
N = 4
X = gaussian(shape=(N, 784 // 7, 7))
def linear_sym():
X = mx.symbol.Variable('X')
sym = mx.symbol.FullyConnected(X, num_hidden=D)
return sym
def make_func(sym, shapes):
arg_shapes, out_shapes, aux_shapes = sym.infer_shape(**shapes)
executor = sym.simple_bind(context, 'write', **shapes)
def func(*args):
#for arg, executor_arg in zip(args, executor.arg_arrays):
#arg.copyto(executor_arg)
executor.forward(is_train=False)
return executor.outputs[0]
return func
linear_x = make_func(linear_sym(), {'X': (N, 1, 7)})
linear_h = make_func(linear_sym(), {'X': (N, D)})
from time import time
def step():
h = nd.zeros((N, D))
c = nd.zeros((N, D))
for i in range(784 // 7):
patch = nd.slice_axis(X, axis=1, begin=i, end=(i + 1))
i = sigmoid(linear_x(patch, Wxi, bxi) + linear_h(h, Whi, bhi))
f = sigmoid(linear_x(patch, Wxf, bxf) + linear_h(h, Whf, bhf))
o = sigmoid(linear_x(patch, Wxo, bxo) + linear_h(h, Who, bho))
g = nd.tanh(linear_x(patch, Wxg, bxg) + linear_h(h, Whg, bhg))
c = f * c + i * g
h = o * nd.tanh(c)
linear(h, W, b)
def main():
for index in range(30):
if index == 10:
t0 = time()
step()
print((time() - t0) / (index + 1 - 10))
if __name__ == '__main__':
main()
#import cProfile
#cProfile.run('main()')
import mxnet as mx
import mxnet.ndarray as nd
from mxnet.context import Context
#import minpy.ndarray as nd
context = mx.gpu(0)
Context.default_ctx = context
def linear(X, W, bias):
return nd.dot(X, W) + bias
def sigmoid(x):
return .5 * (nd.tanh(.5 * x) + 1)
def gaussian(shape):
return nd.uniform(shape=shape, ctx=context)
D = 10
WX_SHAPE = (D, 7)
Wxi = gaussian(shape=WX_SHAPE)
Wxf = gaussian(shape=WX_SHAPE)
Wxo = gaussian(shape=WX_SHAPE)
Wxg = gaussian(shape=WX_SHAPE)
bX_SHAPE = (D,)
bxi = nd.zeros(shape=bX_SHAPE)
bxf = nd.zeros(shape=bX_SHAPE)
bxo = nd.zeros(shape=bX_SHAPE)
bxg = nd.zeros(shape=bX_SHAPE)
WH_SHAPE = (D, D)
Whi = gaussian(shape=WH_SHAPE)
Whf = gaussian(shape=WH_SHAPE)
Who = gaussian(shape=WH_SHAPE)
Whg = gaussian(shape=WH_SHAPE)
bH_SHAPE = (D,)
bhi = nd.zeros(shape=bH_SHAPE)
bhf = nd.zeros(shape=bH_SHAPE)
bho = nd.zeros(shape=bH_SHAPE)
bhg = nd.zeros(shape=bH_SHAPE)
W = gaussian(shape=(D, 10))
b = nd.zeros(shape=(10,))
N = 4
X = gaussian(shape=(N, 784 // 7, 7))
def linear_sym():
X = mx.symbol.Variable('X')
sym = mx.symbol.FullyConnected(X, num_hidden=D)
return sym
def sigmoid_sym():
X = mx.symbol.Variable('X')
sym = mx.symbol.Activation(X, act_type='sigmoid')
return sym
def make_func(sym, shapes):
arg_shapes, out_shapes, aux_shapes = sym.infer_shape(**shapes)
executor = sym.simple_bind(context, 'write', **shapes)
def func(*args):
#for arg, executor_arg in zip(args, executor.arg_arrays):
#arg.copyto(executor_arg)
executor.forward(is_train=False)
return executor.outputs[0]
return func
linear_x = make_func(linear_sym(), {'X': (N, 1, 7)})
linear_h = make_func(linear_sym(), {'X': (N, D)})
sigmoid = make_func(sigmoid_sym(), {'X': (N, D)})
from time import time
def step():
h = nd.zeros((N, D))
c = nd.zeros((N, D))
for i in range(784 // 7):
patch = nd.slice_axis(X, axis=1, begin=i, end=(i + 1))
i = sigmoid(linear_x(patch, Wxi, bxi) + linear_h(h, Whi, bhi))
f = sigmoid(linear_x(patch, Wxf, bxf) + linear_h(h, Whf, bhf))
o = sigmoid(linear_x(patch, Wxo, bxo) + linear_h(h, Who, bho))
g = nd.tanh(linear_x(patch, Wxg, bxg) + linear_h(h, Whg, bhg))
c = f * c + i * g
h = o * nd.tanh(c)
linear(h, W, b)
def main():
for index in range(30):
if index == 10:
t0 = time()
step()
print((time() - t0) / (index + 1 - 10))
if __name__ == '__main__':
main()
#import cProfile
#cProfile.run('main()')
import mxnet as mx
import mxnet.ndarray as nd
from mxnet.context import Context
#import minpy.ndarray as nd
context = mx.gpu(0)
Context.default_ctx = context
def linear(X, W, bias):
return nd.dot(X, W) + bias
def sigmoid(x):
return .5 * (nd.tanh(.5 * x) + 1)
def gaussian(shape):
return nd.uniform(shape=shape, ctx=context)
D = 10
WX_SHAPE = (D, 7)
Wxi = gaussian(shape=WX_SHAPE)
Wxf = gaussian(shape=WX_SHAPE)
Wxo = gaussian(shape=WX_SHAPE)
Wxg = gaussian(shape=WX_SHAPE)
bX_SHAPE = (D,)
bxi = nd.zeros(shape=bX_SHAPE)
bxf = nd.zeros(shape=bX_SHAPE)
bxo = nd.zeros(shape=bX_SHAPE)
bxg = nd.zeros(shape=bX_SHAPE)
WH_SHAPE = (D, D)
Whi = gaussian(shape=WH_SHAPE)
Whf = gaussian(shape=WH_SHAPE)
Who = gaussian(shape=WH_SHAPE)
Whg = gaussian(shape=WH_SHAPE)
bH_SHAPE = (D,)
bhi = nd.zeros(shape=bH_SHAPE)
bhf = nd.zeros(shape=bH_SHAPE)
bho = nd.zeros(shape=bH_SHAPE)
bhg = nd.zeros(shape=bH_SHAPE)
W = gaussian(shape=(D, 10))
b = nd.zeros(shape=(10,))
N = 4
X = gaussian(shape=(N, 784 // 7, 7))
def linear_sym():
X = mx.symbol.Variable('X')
sym = mx.symbol.FullyConnected(X, num_hidden=D)
return sym
def sigmoid_sym():
X = mx.symbol.Variable('X')
sym = mx.symbol.Activation(X, act_type='sigmoid')
return sym
def gate_sym(act_type='sigmoid'):
X = mx.symbol.Variable('X')
H = mx.symbol.Variable('H')
X2H = mx.symbol.FullyConnected(X, num_hidden=D)
H2H = mx.symbol.FullyConnected(H, num_hidden=D)
sym = mx.symbol.Activation((X2H + H2H), act_type=act_type)
return sym
def make_func(sym, shapes):
arg_shapes, out_shapes, aux_shapes = sym.infer_shape(**shapes)
executor = sym.simple_bind(context, 'write', **shapes)
print(sym.list_arguments())
def func(*args):
#for arg, executor_arg in zip(args, executor.arg_arrays):
#arg.copyto(executor_arg)
executor.forward(is_train=False)
return executor.outputs[0]
return func
linear_x = make_func(linear_sym(), {'X': (N, 1, 7)})
linear_h = make_func(linear_sym(), {'X': (N, D)})
sigmoid = make_func(sigmoid_sym(), {'X': (N, D)})
gate = make_func(gate_sym(), {'X': (N, 1, 7), 'H': (N, D)})
gate_g = make_func(gate_sym('tanh'), {'X': (N, 1, 7), 'H': (N, D)})
from time import time
def step():
h = nd.zeros((N, D))
c = nd.zeros((N, D))
for i in range(784 // 7):
patch = nd.slice_axis(X, axis=1, begin=i, end=(i + 1))
i = gate(patch, Wxi, bxi, h, Whi, bhi) #sigmoid(linear_x(patch, Wxi, bxi) + linear_h(h, Whi, bhi))
f = gate(patch, Wxf, bxf, h, Whf, bhf) #sigmoid(linear_x(patch, Wxf, bxf) + linear_h(h, Whf, bhf))
o = gate(patch, Wxo, bxo, h, Who, bho) #sigmoid(linear_x(patch, Wxo, bxo) + linear_h(h, Who, bho))
g = gate_g(patch, Wxg, bxg, h, Whg, bhg) #nd.tanh(linear_x(patch, Wxg, bxg) + linear_h(h, Whg, bhg))
c = f * c + i * g
h = o * nd.tanh(c)
linear(h, W, b)
def main():
for index in range(30):
if index == 10:
t0 = time()
step()
print((time() - t0) / (index + 1 - 10))
if __name__ == '__main__':
main()
#import cProfile
#cProfile.run('main()')
import mxnet as mx
import mxnet.ndarray as nd
from mxnet.context import Context
#import minpy.ndarray as nd
context = mx.gpu(0)
Context.default_ctx = context
def linear(X, W, bias):
return nd.dot(X, W) + bias
def sigmoid(x):
return .5 * (nd.tanh(.5 * x) + 1)
def gaussian(shape):
return nd.uniform(shape=shape, ctx=context)
D = 10
WX_SHAPE = (D, 7)
Wxi = gaussian(shape=WX_SHAPE)
Wxf = gaussian(shape=WX_SHAPE)
Wxo = gaussian(shape=WX_SHAPE)
Wxg = gaussian(shape=WX_SHAPE)
bX_SHAPE = (D,)
bxi = nd.zeros(shape=bX_SHAPE)
bxf = nd.zeros(shape=bX_SHAPE)
bxo = nd.zeros(shape=bX_SHAPE)
bxg = nd.zeros(shape=bX_SHAPE)
WH_SHAPE = (D, D)
Whi = gaussian(shape=WH_SHAPE)
Whf = gaussian(shape=WH_SHAPE)
Who = gaussian(shape=WH_SHAPE)
Whg = gaussian(shape=WH_SHAPE)
bH_SHAPE = (D,)
bhi = nd.zeros(shape=bH_SHAPE)
bhf = nd.zeros(shape=bH_SHAPE)
bho = nd.zeros(shape=bH_SHAPE)
bhg = nd.zeros(shape=bH_SHAPE)
W = gaussian(shape=(D, 10))
b = nd.zeros(shape=(10,))
N = 4
X = gaussian(shape=(N, 784 // 7, 7))
def linear_sym():
X = mx.symbol.Variable('X')
sym = mx.symbol.FullyConnected(X, num_hidden=D)
return sym
def sigmoid_sym():
X = mx.symbol.Variable('X')
sym = mx.symbol.Activation(X, act_type='sigmoid')
return sym
def gate_sym(act_type='sigmoid'):
X = mx.symbol.Variable('X')
H = mx.symbol.Variable('H')
X2H = mx.symbol.FullyConnected(X, num_hidden=D)
H2H = mx.symbol.FullyConnected(H, num_hidden=D)
sym = mx.symbol.Activation((X2H + H2H), act_type=act_type)
return sym
def lstm_cell_sym():
def _gate_sym(X, H, name, act_type='sigmoid'):
X2H = mx.symbol.FullyConnected(X, num_hidden=D, name='x%s' % name)
H2H = mx.symbol.FullyConnected(H, num_hidden=D, name='h%s' % name)
sym = mx.symbol.Activation((X2H + H2H), act_type=act_type)
return sym
X = mx.symbol.Variable('X')
H = mx.symbol.Variable('H')
c = mx.symbol.Variable('C')
i = _gate_sym(X, H, 'i')
f = _gate_sym(X, H, 'f')
o = _gate_sym(X, H, 'o')
g = _gate_sym(X, H, 'g', act_type='tanh')
c = f * c + i * g
h = o * mx.symbol.Activation(c, act_type='tanh')
return h
def make_func(sym, shapes):
arg_shapes, out_shapes, aux_shapes = sym.infer_shape(**shapes)
executor = sym.simple_bind(context, 'write', **shapes)
print(sym.list_arguments())
def func(*args):
#for arg, executor_arg in zip(args, executor.arg_arrays):
#arg.copyto(executor_arg)
executor.forward(is_train=False)
return executor.outputs[0]
return func
linear_x = make_func(linear_sym(), {'X': (N, 1, 7)})
linear_h = make_func(linear_sym(), {'X': (N, D)})
sigmoid = make_func(sigmoid_sym(), {'X': (N, D)})
gate = make_func(gate_sym(), {'X': (N, 1, 7), 'H': (N, D)})
gate_g = make_func(gate_sym('tanh'), {'X': (N, 1, 7), 'H': (N, D)})
lstm_cell = make_func(lstm_cell_sym(), {'X': (N, 1, 7), 'H': (N, D), 'C': (N, D)})
from time import time, sleep
def step():
h = nd.zeros((N, D))
c = nd.zeros((N, D))
for i in range(784 // 7):
patch = nd.slice_axis(X, axis=1, begin=i, end=(i + 1))
h = lstm_cell(patch, Wxo, bxo, h, Who, bho, Wxf, bxf, Whf, bhf, \
c, Wxi, bxi, Whi, bhi, Wxg, bxg, Whg, bhg)
linear(h, W, b)
def main():
for index in range(30):
if index == 10:
t0 = time()
step()
print((time() - t0) / (index + 1 - 10))
sleep(3)
if __name__ == '__main__':
main()
#import cProfile
#cProfile.run('main()')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment