Last active
June 11, 2017 02:11
-
-
Save jermainewang/c80f6d493b9860495c3e22bafcffe229 to your computer and use it in GitHub Desktop.
Naive lstm implementation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import mxnet as mx | |
import mxnet.ndarray as nd | |
#import minpy.ndarray as nd | |
context = mx.gpu(0) | |
def linear(X, W, bias): | |
return nd.dot(X, W) + bias | |
def sigmoid(x): | |
return .5 * (nd.tanh(.5 * x) + 1) | |
from mxnet.context import Context | |
Context.default_ctx = context | |
def gaussian(shape): | |
return nd.uniform(shape=shape, ctx=context) | |
D = 10 | |
WX_SHAPE = (7, D) | |
Wxi = gaussian(shape=WX_SHAPE) | |
Wxf = gaussian(shape=WX_SHAPE) | |
Wxo = gaussian(shape=WX_SHAPE) | |
Wxg = gaussian(shape=WX_SHAPE) | |
bX_SHAPE = (D,) | |
bxi = nd.zeros(shape=bX_SHAPE) | |
bxf = nd.zeros(shape=bX_SHAPE) | |
bxo = nd.zeros(shape=bX_SHAPE) | |
bxg = nd.zeros(shape=bX_SHAPE) | |
WH_SHAPE = (D, D) | |
Whi = gaussian(shape=WH_SHAPE) | |
Whf = gaussian(shape=WH_SHAPE) | |
Who = gaussian(shape=WH_SHAPE) | |
Whg = gaussian(shape=WH_SHAPE) | |
bH_SHAPE = (D,) | |
bhi = nd.zeros(shape=bH_SHAPE) | |
bhf = nd.zeros(shape=bH_SHAPE) | |
bho = nd.zeros(shape=bH_SHAPE) | |
bhg = nd.zeros(shape=bH_SHAPE) | |
W = gaussian(shape=(D, 10)) | |
b = nd.zeros(shape=(10,)) | |
N = 4 | |
X = gaussian(shape=(N, 784 // 7, 7)) | |
from time import time | |
def step(): | |
h = nd.zeros((N, D)) | |
c = nd.zeros((N, D)) | |
for i in range(784 // 7): | |
patch = nd.slice_axis(X, axis=1, begin=i, end=(i + 1)) | |
i = sigmoid(linear(patch, Wxi, bxi) + linear(h, Whi, bhi)) | |
f = sigmoid(linear(patch, Wxf, bxf) + linear(h, Whf, bhf)) | |
o = sigmoid(linear(patch, Wxo, bxo) + linear(h, Who, bho)) | |
g = nd.tanh(linear(patch, Wxg, bxg) + linear(h, Whg, bhg)) | |
c = f * c + i * g | |
h = o * nd.tanh(c) | |
linear(h, W, b) | |
def main(): | |
for index in range(30): | |
if index == 10: | |
t0 = time() | |
step() | |
print((time() - t0) / (index + 1 - 10)) | |
if __name__ == '__main__': | |
main() | |
#import cProfile | |
#cProfile.run('main()') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import mxnet as mx | |
import mxnet.ndarray as nd | |
from mxnet.context import Context | |
#import minpy.ndarray as nd | |
context = mx.gpu(0) | |
Context.default_ctx = context | |
def linear(X, W, bias): | |
return nd.dot(X, W) + bias | |
def sigmoid(x): | |
return .5 * (nd.tanh(.5 * x) + 1) | |
def gaussian(shape): | |
return nd.uniform(shape=shape, ctx=context) | |
D = 10 | |
WX_SHAPE = (D, 7) | |
Wxi = gaussian(shape=WX_SHAPE) | |
Wxf = gaussian(shape=WX_SHAPE) | |
Wxo = gaussian(shape=WX_SHAPE) | |
Wxg = gaussian(shape=WX_SHAPE) | |
bX_SHAPE = (D,) | |
bxi = nd.zeros(shape=bX_SHAPE) | |
bxf = nd.zeros(shape=bX_SHAPE) | |
bxo = nd.zeros(shape=bX_SHAPE) | |
bxg = nd.zeros(shape=bX_SHAPE) | |
WH_SHAPE = (D, D) | |
Whi = gaussian(shape=WH_SHAPE) | |
Whf = gaussian(shape=WH_SHAPE) | |
Who = gaussian(shape=WH_SHAPE) | |
Whg = gaussian(shape=WH_SHAPE) | |
bH_SHAPE = (D,) | |
bhi = nd.zeros(shape=bH_SHAPE) | |
bhf = nd.zeros(shape=bH_SHAPE) | |
bho = nd.zeros(shape=bH_SHAPE) | |
bhg = nd.zeros(shape=bH_SHAPE) | |
W = gaussian(shape=(D, 10)) | |
b = nd.zeros(shape=(10,)) | |
N = 4 | |
X = gaussian(shape=(N, 784 // 7, 7)) | |
def linear_sym(): | |
X = mx.symbol.Variable('X') | |
sym = mx.symbol.FullyConnected(X, num_hidden=D) | |
return sym | |
def make_func(sym, shapes): | |
arg_shapes, out_shapes, aux_shapes = sym.infer_shape(**shapes) | |
executor = sym.simple_bind(context, 'write', **shapes) | |
def func(*args): | |
#for arg, executor_arg in zip(args, executor.arg_arrays): | |
#arg.copyto(executor_arg) | |
executor.forward(is_train=False) | |
return executor.outputs[0] | |
return func | |
linear_x = make_func(linear_sym(), {'X': (N, 1, 7)}) | |
linear_h = make_func(linear_sym(), {'X': (N, D)}) | |
from time import time | |
def step(): | |
h = nd.zeros((N, D)) | |
c = nd.zeros((N, D)) | |
for i in range(784 // 7): | |
patch = nd.slice_axis(X, axis=1, begin=i, end=(i + 1)) | |
i = sigmoid(linear_x(patch, Wxi, bxi) + linear_h(h, Whi, bhi)) | |
f = sigmoid(linear_x(patch, Wxf, bxf) + linear_h(h, Whf, bhf)) | |
o = sigmoid(linear_x(patch, Wxo, bxo) + linear_h(h, Who, bho)) | |
g = nd.tanh(linear_x(patch, Wxg, bxg) + linear_h(h, Whg, bhg)) | |
c = f * c + i * g | |
h = o * nd.tanh(c) | |
linear(h, W, b) | |
def main(): | |
for index in range(30): | |
if index == 10: | |
t0 = time() | |
step() | |
print((time() - t0) / (index + 1 - 10)) | |
if __name__ == '__main__': | |
main() | |
#import cProfile | |
#cProfile.run('main()') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import mxnet as mx | |
import mxnet.ndarray as nd | |
from mxnet.context import Context | |
#import minpy.ndarray as nd | |
context = mx.gpu(0) | |
Context.default_ctx = context | |
def linear(X, W, bias): | |
return nd.dot(X, W) + bias | |
def sigmoid(x): | |
return .5 * (nd.tanh(.5 * x) + 1) | |
def gaussian(shape): | |
return nd.uniform(shape=shape, ctx=context) | |
D = 10 | |
WX_SHAPE = (D, 7) | |
Wxi = gaussian(shape=WX_SHAPE) | |
Wxf = gaussian(shape=WX_SHAPE) | |
Wxo = gaussian(shape=WX_SHAPE) | |
Wxg = gaussian(shape=WX_SHAPE) | |
bX_SHAPE = (D,) | |
bxi = nd.zeros(shape=bX_SHAPE) | |
bxf = nd.zeros(shape=bX_SHAPE) | |
bxo = nd.zeros(shape=bX_SHAPE) | |
bxg = nd.zeros(shape=bX_SHAPE) | |
WH_SHAPE = (D, D) | |
Whi = gaussian(shape=WH_SHAPE) | |
Whf = gaussian(shape=WH_SHAPE) | |
Who = gaussian(shape=WH_SHAPE) | |
Whg = gaussian(shape=WH_SHAPE) | |
bH_SHAPE = (D,) | |
bhi = nd.zeros(shape=bH_SHAPE) | |
bhf = nd.zeros(shape=bH_SHAPE) | |
bho = nd.zeros(shape=bH_SHAPE) | |
bhg = nd.zeros(shape=bH_SHAPE) | |
W = gaussian(shape=(D, 10)) | |
b = nd.zeros(shape=(10,)) | |
N = 4 | |
X = gaussian(shape=(N, 784 // 7, 7)) | |
def linear_sym(): | |
X = mx.symbol.Variable('X') | |
sym = mx.symbol.FullyConnected(X, num_hidden=D) | |
return sym | |
def sigmoid_sym(): | |
X = mx.symbol.Variable('X') | |
sym = mx.symbol.Activation(X, act_type='sigmoid') | |
return sym | |
def make_func(sym, shapes): | |
arg_shapes, out_shapes, aux_shapes = sym.infer_shape(**shapes) | |
executor = sym.simple_bind(context, 'write', **shapes) | |
def func(*args): | |
#for arg, executor_arg in zip(args, executor.arg_arrays): | |
#arg.copyto(executor_arg) | |
executor.forward(is_train=False) | |
return executor.outputs[0] | |
return func | |
linear_x = make_func(linear_sym(), {'X': (N, 1, 7)}) | |
linear_h = make_func(linear_sym(), {'X': (N, D)}) | |
sigmoid = make_func(sigmoid_sym(), {'X': (N, D)}) | |
from time import time | |
def step(): | |
h = nd.zeros((N, D)) | |
c = nd.zeros((N, D)) | |
for i in range(784 // 7): | |
patch = nd.slice_axis(X, axis=1, begin=i, end=(i + 1)) | |
i = sigmoid(linear_x(patch, Wxi, bxi) + linear_h(h, Whi, bhi)) | |
f = sigmoid(linear_x(patch, Wxf, bxf) + linear_h(h, Whf, bhf)) | |
o = sigmoid(linear_x(patch, Wxo, bxo) + linear_h(h, Who, bho)) | |
g = nd.tanh(linear_x(patch, Wxg, bxg) + linear_h(h, Whg, bhg)) | |
c = f * c + i * g | |
h = o * nd.tanh(c) | |
linear(h, W, b) | |
def main(): | |
for index in range(30): | |
if index == 10: | |
t0 = time() | |
step() | |
print((time() - t0) / (index + 1 - 10)) | |
if __name__ == '__main__': | |
main() | |
#import cProfile | |
#cProfile.run('main()') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import mxnet as mx | |
import mxnet.ndarray as nd | |
from mxnet.context import Context | |
#import minpy.ndarray as nd | |
context = mx.gpu(0) | |
Context.default_ctx = context | |
def linear(X, W, bias): | |
return nd.dot(X, W) + bias | |
def sigmoid(x): | |
return .5 * (nd.tanh(.5 * x) + 1) | |
def gaussian(shape): | |
return nd.uniform(shape=shape, ctx=context) | |
D = 10 | |
WX_SHAPE = (D, 7) | |
Wxi = gaussian(shape=WX_SHAPE) | |
Wxf = gaussian(shape=WX_SHAPE) | |
Wxo = gaussian(shape=WX_SHAPE) | |
Wxg = gaussian(shape=WX_SHAPE) | |
bX_SHAPE = (D,) | |
bxi = nd.zeros(shape=bX_SHAPE) | |
bxf = nd.zeros(shape=bX_SHAPE) | |
bxo = nd.zeros(shape=bX_SHAPE) | |
bxg = nd.zeros(shape=bX_SHAPE) | |
WH_SHAPE = (D, D) | |
Whi = gaussian(shape=WH_SHAPE) | |
Whf = gaussian(shape=WH_SHAPE) | |
Who = gaussian(shape=WH_SHAPE) | |
Whg = gaussian(shape=WH_SHAPE) | |
bH_SHAPE = (D,) | |
bhi = nd.zeros(shape=bH_SHAPE) | |
bhf = nd.zeros(shape=bH_SHAPE) | |
bho = nd.zeros(shape=bH_SHAPE) | |
bhg = nd.zeros(shape=bH_SHAPE) | |
W = gaussian(shape=(D, 10)) | |
b = nd.zeros(shape=(10,)) | |
N = 4 | |
X = gaussian(shape=(N, 784 // 7, 7)) | |
def linear_sym(): | |
X = mx.symbol.Variable('X') | |
sym = mx.symbol.FullyConnected(X, num_hidden=D) | |
return sym | |
def sigmoid_sym(): | |
X = mx.symbol.Variable('X') | |
sym = mx.symbol.Activation(X, act_type='sigmoid') | |
return sym | |
def gate_sym(act_type='sigmoid'): | |
X = mx.symbol.Variable('X') | |
H = mx.symbol.Variable('H') | |
X2H = mx.symbol.FullyConnected(X, num_hidden=D) | |
H2H = mx.symbol.FullyConnected(H, num_hidden=D) | |
sym = mx.symbol.Activation((X2H + H2H), act_type=act_type) | |
return sym | |
def make_func(sym, shapes): | |
arg_shapes, out_shapes, aux_shapes = sym.infer_shape(**shapes) | |
executor = sym.simple_bind(context, 'write', **shapes) | |
print(sym.list_arguments()) | |
def func(*args): | |
#for arg, executor_arg in zip(args, executor.arg_arrays): | |
#arg.copyto(executor_arg) | |
executor.forward(is_train=False) | |
return executor.outputs[0] | |
return func | |
linear_x = make_func(linear_sym(), {'X': (N, 1, 7)}) | |
linear_h = make_func(linear_sym(), {'X': (N, D)}) | |
sigmoid = make_func(sigmoid_sym(), {'X': (N, D)}) | |
gate = make_func(gate_sym(), {'X': (N, 1, 7), 'H': (N, D)}) | |
gate_g = make_func(gate_sym('tanh'), {'X': (N, 1, 7), 'H': (N, D)}) | |
from time import time | |
def step(): | |
h = nd.zeros((N, D)) | |
c = nd.zeros((N, D)) | |
for i in range(784 // 7): | |
patch = nd.slice_axis(X, axis=1, begin=i, end=(i + 1)) | |
i = gate(patch, Wxi, bxi, h, Whi, bhi) #sigmoid(linear_x(patch, Wxi, bxi) + linear_h(h, Whi, bhi)) | |
f = gate(patch, Wxf, bxf, h, Whf, bhf) #sigmoid(linear_x(patch, Wxf, bxf) + linear_h(h, Whf, bhf)) | |
o = gate(patch, Wxo, bxo, h, Who, bho) #sigmoid(linear_x(patch, Wxo, bxo) + linear_h(h, Who, bho)) | |
g = gate_g(patch, Wxg, bxg, h, Whg, bhg) #nd.tanh(linear_x(patch, Wxg, bxg) + linear_h(h, Whg, bhg)) | |
c = f * c + i * g | |
h = o * nd.tanh(c) | |
linear(h, W, b) | |
def main(): | |
for index in range(30): | |
if index == 10: | |
t0 = time() | |
step() | |
print((time() - t0) / (index + 1 - 10)) | |
if __name__ == '__main__': | |
main() | |
#import cProfile | |
#cProfile.run('main()') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import mxnet as mx | |
import mxnet.ndarray as nd | |
from mxnet.context import Context | |
#import minpy.ndarray as nd | |
context = mx.gpu(0) | |
Context.default_ctx = context | |
def linear(X, W, bias): | |
return nd.dot(X, W) + bias | |
def sigmoid(x): | |
return .5 * (nd.tanh(.5 * x) + 1) | |
def gaussian(shape): | |
return nd.uniform(shape=shape, ctx=context) | |
D = 10 | |
WX_SHAPE = (D, 7) | |
Wxi = gaussian(shape=WX_SHAPE) | |
Wxf = gaussian(shape=WX_SHAPE) | |
Wxo = gaussian(shape=WX_SHAPE) | |
Wxg = gaussian(shape=WX_SHAPE) | |
bX_SHAPE = (D,) | |
bxi = nd.zeros(shape=bX_SHAPE) | |
bxf = nd.zeros(shape=bX_SHAPE) | |
bxo = nd.zeros(shape=bX_SHAPE) | |
bxg = nd.zeros(shape=bX_SHAPE) | |
WH_SHAPE = (D, D) | |
Whi = gaussian(shape=WH_SHAPE) | |
Whf = gaussian(shape=WH_SHAPE) | |
Who = gaussian(shape=WH_SHAPE) | |
Whg = gaussian(shape=WH_SHAPE) | |
bH_SHAPE = (D,) | |
bhi = nd.zeros(shape=bH_SHAPE) | |
bhf = nd.zeros(shape=bH_SHAPE) | |
bho = nd.zeros(shape=bH_SHAPE) | |
bhg = nd.zeros(shape=bH_SHAPE) | |
W = gaussian(shape=(D, 10)) | |
b = nd.zeros(shape=(10,)) | |
N = 4 | |
X = gaussian(shape=(N, 784 // 7, 7)) | |
def linear_sym(): | |
X = mx.symbol.Variable('X') | |
sym = mx.symbol.FullyConnected(X, num_hidden=D) | |
return sym | |
def sigmoid_sym(): | |
X = mx.symbol.Variable('X') | |
sym = mx.symbol.Activation(X, act_type='sigmoid') | |
return sym | |
def gate_sym(act_type='sigmoid'): | |
X = mx.symbol.Variable('X') | |
H = mx.symbol.Variable('H') | |
X2H = mx.symbol.FullyConnected(X, num_hidden=D) | |
H2H = mx.symbol.FullyConnected(H, num_hidden=D) | |
sym = mx.symbol.Activation((X2H + H2H), act_type=act_type) | |
return sym | |
def lstm_cell_sym(): | |
def _gate_sym(X, H, name, act_type='sigmoid'): | |
X2H = mx.symbol.FullyConnected(X, num_hidden=D, name='x%s' % name) | |
H2H = mx.symbol.FullyConnected(H, num_hidden=D, name='h%s' % name) | |
sym = mx.symbol.Activation((X2H + H2H), act_type=act_type) | |
return sym | |
X = mx.symbol.Variable('X') | |
H = mx.symbol.Variable('H') | |
c = mx.symbol.Variable('C') | |
i = _gate_sym(X, H, 'i') | |
f = _gate_sym(X, H, 'f') | |
o = _gate_sym(X, H, 'o') | |
g = _gate_sym(X, H, 'g', act_type='tanh') | |
c = f * c + i * g | |
h = o * mx.symbol.Activation(c, act_type='tanh') | |
return h | |
def make_func(sym, shapes): | |
arg_shapes, out_shapes, aux_shapes = sym.infer_shape(**shapes) | |
executor = sym.simple_bind(context, 'write', **shapes) | |
print(sym.list_arguments()) | |
def func(*args): | |
#for arg, executor_arg in zip(args, executor.arg_arrays): | |
#arg.copyto(executor_arg) | |
executor.forward(is_train=False) | |
return executor.outputs[0] | |
return func | |
linear_x = make_func(linear_sym(), {'X': (N, 1, 7)}) | |
linear_h = make_func(linear_sym(), {'X': (N, D)}) | |
sigmoid = make_func(sigmoid_sym(), {'X': (N, D)}) | |
gate = make_func(gate_sym(), {'X': (N, 1, 7), 'H': (N, D)}) | |
gate_g = make_func(gate_sym('tanh'), {'X': (N, 1, 7), 'H': (N, D)}) | |
lstm_cell = make_func(lstm_cell_sym(), {'X': (N, 1, 7), 'H': (N, D), 'C': (N, D)}) | |
from time import time, sleep | |
def step(): | |
h = nd.zeros((N, D)) | |
c = nd.zeros((N, D)) | |
for i in range(784 // 7): | |
patch = nd.slice_axis(X, axis=1, begin=i, end=(i + 1)) | |
h = lstm_cell(patch, Wxo, bxo, h, Who, bho, Wxf, bxf, Whf, bhf, \ | |
c, Wxi, bxi, Whi, bhi, Wxg, bxg, Whg, bhg) | |
linear(h, W, b) | |
def main(): | |
for index in range(30): | |
if index == 10: | |
t0 = time() | |
step() | |
print((time() - t0) / (index + 1 - 10)) | |
sleep(3) | |
if __name__ == '__main__': | |
main() | |
#import cProfile | |
#cProfile.run('main()') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment