jermainewang/lstm.py

## lstm.py
import mxnet as mx
import mxnet.ndarray as nd
#import minpy.ndarray as nd
context = mx.gpu(0)

def linear(X, W, bias):
    return nd.dot(X, W) + bias

def sigmoid(x):
    return .5 * (nd.tanh(.5 * x) + 1)

from mxnet.context import Context
Context.default_ctx = context

def gaussian(shape):
    return nd.uniform(shape=shape, ctx=context)

D = 10

WX_SHAPE = (7, D)
Wxi = gaussian(shape=WX_SHAPE)
Wxf = gaussian(shape=WX_SHAPE)
Wxo = gaussian(shape=WX_SHAPE)
Wxg = gaussian(shape=WX_SHAPE)

bX_SHAPE = (D,)
bxi = nd.zeros(shape=bX_SHAPE)
bxf = nd.zeros(shape=bX_SHAPE)
bxo = nd.zeros(shape=bX_SHAPE)
bxg = nd.zeros(shape=bX_SHAPE)

WH_SHAPE = (D, D)
Whi = gaussian(shape=WH_SHAPE)
Whf = gaussian(shape=WH_SHAPE)
Who = gaussian(shape=WH_SHAPE)
Whg = gaussian(shape=WH_SHAPE)

bH_SHAPE = (D,)
bhi = nd.zeros(shape=bH_SHAPE)
bhf = nd.zeros(shape=bH_SHAPE)
bho = nd.zeros(shape=bH_SHAPE)
bhg = nd.zeros(shape=bH_SHAPE)

W = gaussian(shape=(D, 10))
b = nd.zeros(shape=(10,))

N = 4
X = gaussian(shape=(N, 784 // 7, 7))

from time import time

def step():
    h = nd.zeros((N, D))
    c = nd.zeros((N, D))

    for i in range(784 // 7):
        patch = nd.slice_axis(X, axis=1, begin=i, end=(i + 1))
        i = sigmoid(linear(patch, Wxi, bxi) + linear(h, Whi, bhi))
        f = sigmoid(linear(patch, Wxf, bxf) + linear(h, Whf, bhf))
        o = sigmoid(linear(patch, Wxo, bxo) + linear(h, Who, bho))
        g = nd.tanh(linear(patch, Wxg, bxg) + linear(h, Whg, bhg))
        c = f * c + i * g
        h = o * nd.tanh(c)
    linear(h, W, b)

def main():
    for index in range(30):
        if index == 10:
            t0 = time()
        step()
    print((time() - t0) / (index + 1 - 10))

if __name__ == '__main__':
    main()
    #import cProfile
    #cProfile.run('main()')

## lstm1.py
import mxnet as mx
import mxnet.ndarray as nd
from mxnet.context import Context
#import minpy.ndarray as nd
context = mx.gpu(0)
Context.default_ctx = context

def linear(X, W, bias):
    return nd.dot(X, W) + bias

def sigmoid(x):
    return .5 * (nd.tanh(.5 * x) + 1)

def gaussian(shape):
    return nd.uniform(shape=shape, ctx=context)

D = 10

WX_SHAPE = (D, 7)
Wxi = gaussian(shape=WX_SHAPE)
Wxf = gaussian(shape=WX_SHAPE)
Wxo = gaussian(shape=WX_SHAPE)
Wxg = gaussian(shape=WX_SHAPE)

bX_SHAPE = (D,)
bxi = nd.zeros(shape=bX_SHAPE)
bxf = nd.zeros(shape=bX_SHAPE)
bxo = nd.zeros(shape=bX_SHAPE)
bxg = nd.zeros(shape=bX_SHAPE)

WH_SHAPE = (D, D)
Whi = gaussian(shape=WH_SHAPE)
Whf = gaussian(shape=WH_SHAPE)
Who = gaussian(shape=WH_SHAPE)
Whg = gaussian(shape=WH_SHAPE)

bH_SHAPE = (D,)
bhi = nd.zeros(shape=bH_SHAPE)
bhf = nd.zeros(shape=bH_SHAPE)
bho = nd.zeros(shape=bH_SHAPE)
bhg = nd.zeros(shape=bH_SHAPE)

W = gaussian(shape=(D, 10))
b = nd.zeros(shape=(10,))

N = 4
X = gaussian(shape=(N, 784 // 7, 7))

def linear_sym():
    X = mx.symbol.Variable('X')
    sym = mx.symbol.FullyConnected(X, num_hidden=D)
    return sym

def make_func(sym, shapes):
    arg_shapes, out_shapes, aux_shapes = sym.infer_shape(**shapes)
    executor = sym.simple_bind(context, 'write', **shapes)
    def func(*args):
        #for arg, executor_arg in zip(args, executor.arg_arrays):
            #arg.copyto(executor_arg)
        executor.forward(is_train=False)
        return executor.outputs[0]
    return func

linear_x = make_func(linear_sym(), {'X': (N, 1, 7)})
linear_h = make_func(linear_sym(), {'X': (N, D)})

from time import time

def step():
    h = nd.zeros((N, D))
    c = nd.zeros((N, D))

    for i in range(784 // 7):
        patch = nd.slice_axis(X, axis=1, begin=i, end=(i + 1))
        i = sigmoid(linear_x(patch, Wxi, bxi) + linear_h(h, Whi, bhi))
        f = sigmoid(linear_x(patch, Wxf, bxf) + linear_h(h, Whf, bhf))
        o = sigmoid(linear_x(patch, Wxo, bxo) + linear_h(h, Who, bho))
        g = nd.tanh(linear_x(patch, Wxg, bxg) + linear_h(h, Whg, bhg))
        c = f * c + i * g
        h = o * nd.tanh(c)
    linear(h, W, b)

def main():
    for index in range(30):
        if index == 10:
            t0 = time()
        step()
    print((time() - t0) / (index + 1 - 10))

if __name__ == '__main__':
    main()
    #import cProfile
    #cProfile.run('main()')

## lstm2.py
import mxnet as mx
import mxnet.ndarray as nd
from mxnet.context import Context
#import minpy.ndarray as nd
context = mx.gpu(0)
Context.default_ctx = context

def linear(X, W, bias):
    return nd.dot(X, W) + bias

def sigmoid(x):
    return .5 * (nd.tanh(.5 * x) + 1)

def gaussian(shape):
    return nd.uniform(shape=shape, ctx=context)

D = 10

WX_SHAPE = (D, 7)
Wxi = gaussian(shape=WX_SHAPE)
Wxf = gaussian(shape=WX_SHAPE)
Wxo = gaussian(shape=WX_SHAPE)
Wxg = gaussian(shape=WX_SHAPE)

bX_SHAPE = (D,)
bxi = nd.zeros(shape=bX_SHAPE)
bxf = nd.zeros(shape=bX_SHAPE)
bxo = nd.zeros(shape=bX_SHAPE)
bxg = nd.zeros(shape=bX_SHAPE)

WH_SHAPE = (D, D)
Whi = gaussian(shape=WH_SHAPE)
Whf = gaussian(shape=WH_SHAPE)
Who = gaussian(shape=WH_SHAPE)
Whg = gaussian(shape=WH_SHAPE)

bH_SHAPE = (D,)
bhi = nd.zeros(shape=bH_SHAPE)
bhf = nd.zeros(shape=bH_SHAPE)
bho = nd.zeros(shape=bH_SHAPE)
bhg = nd.zeros(shape=bH_SHAPE)

W = gaussian(shape=(D, 10))
b = nd.zeros(shape=(10,))

N = 4
X = gaussian(shape=(N, 784 // 7, 7))

def linear_sym():
    X = mx.symbol.Variable('X')
    sym = mx.symbol.FullyConnected(X, num_hidden=D)
    return sym

def sigmoid_sym():
    X = mx.symbol.Variable('X')
    sym = mx.symbol.Activation(X, act_type='sigmoid')
    return sym

def make_func(sym, shapes):
    arg_shapes, out_shapes, aux_shapes = sym.infer_shape(**shapes)
    executor = sym.simple_bind(context, 'write', **shapes)
    def func(*args):
        #for arg, executor_arg in zip(args, executor.arg_arrays):
            #arg.copyto(executor_arg)
        executor.forward(is_train=False)
        return executor.outputs[0]
    return func

linear_x = make_func(linear_sym(), {'X': (N, 1, 7)})
linear_h = make_func(linear_sym(), {'X': (N, D)})
sigmoid = make_func(sigmoid_sym(), {'X': (N, D)})

from time import time

def step():
    h = nd.zeros((N, D))
    c = nd.zeros((N, D))

    for i in range(784 // 7):
        patch = nd.slice_axis(X, axis=1, begin=i, end=(i + 1))
        i = sigmoid(linear_x(patch, Wxi, bxi) + linear_h(h, Whi, bhi))
        f = sigmoid(linear_x(patch, Wxf, bxf) + linear_h(h, Whf, bhf))
        o = sigmoid(linear_x(patch, Wxo, bxo) + linear_h(h, Who, bho))
        g = nd.tanh(linear_x(patch, Wxg, bxg) + linear_h(h, Whg, bhg))
        c = f * c + i * g
        h = o * nd.tanh(c)
    linear(h, W, b)

def main():
    for index in range(30):
        if index == 10:
            t0 = time()
        step()
    print((time() - t0) / (index + 1 - 10))

if __name__ == '__main__':
    main()
    #import cProfile
    #cProfile.run('main()')

## lstm3.py
import mxnet as mx
import mxnet.ndarray as nd
from mxnet.context import Context
#import minpy.ndarray as nd
context = mx.gpu(0)
Context.default_ctx = context

def linear(X, W, bias):
    return nd.dot(X, W) + bias

def sigmoid(x):
    return .5 * (nd.tanh(.5 * x) + 1)

def gaussian(shape):
    return nd.uniform(shape=shape, ctx=context)

D = 10

WX_SHAPE = (D, 7)
Wxi = gaussian(shape=WX_SHAPE)
Wxf = gaussian(shape=WX_SHAPE)
Wxo = gaussian(shape=WX_SHAPE)
Wxg = gaussian(shape=WX_SHAPE)

bX_SHAPE = (D,)
bxi = nd.zeros(shape=bX_SHAPE)
bxf = nd.zeros(shape=bX_SHAPE)
bxo = nd.zeros(shape=bX_SHAPE)
bxg = nd.zeros(shape=bX_SHAPE)

WH_SHAPE = (D, D)
Whi = gaussian(shape=WH_SHAPE)
Whf = gaussian(shape=WH_SHAPE)
Who = gaussian(shape=WH_SHAPE)
Whg = gaussian(shape=WH_SHAPE)

bH_SHAPE = (D,)
bhi = nd.zeros(shape=bH_SHAPE)
bhf = nd.zeros(shape=bH_SHAPE)
bho = nd.zeros(shape=bH_SHAPE)
bhg = nd.zeros(shape=bH_SHAPE)

W = gaussian(shape=(D, 10))
b = nd.zeros(shape=(10,))

N = 4
X = gaussian(shape=(N, 784 // 7, 7))

def linear_sym():
    X = mx.symbol.Variable('X')
    sym = mx.symbol.FullyConnected(X, num_hidden=D)
    return sym

def sigmoid_sym():
    X = mx.symbol.Variable('X')
    sym = mx.symbol.Activation(X, act_type='sigmoid')
    return sym

def gate_sym(act_type='sigmoid'):
    X = mx.symbol.Variable('X')
    H = mx.symbol.Variable('H')
    X2H = mx.symbol.FullyConnected(X, num_hidden=D)
    H2H = mx.symbol.FullyConnected(H, num_hidden=D)
    sym = mx.symbol.Activation((X2H + H2H), act_type=act_type)
    return sym

def make_func(sym, shapes):
    arg_shapes, out_shapes, aux_shapes = sym.infer_shape(**shapes)
    executor = sym.simple_bind(context, 'write', **shapes)
    print(sym.list_arguments())
    def func(*args):
        #for arg, executor_arg in zip(args, executor.arg_arrays):
            #arg.copyto(executor_arg)
        executor.forward(is_train=False)
        return executor.outputs[0]
    return func

linear_x = make_func(linear_sym(), {'X': (N, 1, 7)})
linear_h = make_func(linear_sym(), {'X': (N, D)})
sigmoid = make_func(sigmoid_sym(), {'X': (N, D)})
gate = make_func(gate_sym(), {'X': (N, 1, 7), 'H': (N, D)})
gate_g = make_func(gate_sym('tanh'), {'X': (N, 1, 7), 'H': (N, D)})

from time import time

def step():
    h = nd.zeros((N, D))
    c = nd.zeros((N, D))

    for i in range(784 // 7):
        patch = nd.slice_axis(X, axis=1, begin=i, end=(i + 1))
        i = gate(patch, Wxi, bxi, h, Whi, bhi) #sigmoid(linear_x(patch, Wxi, bxi) + linear_h(h, Whi, bhi))
        f = gate(patch, Wxf, bxf, h, Whf, bhf) #sigmoid(linear_x(patch, Wxf, bxf) + linear_h(h, Whf, bhf))
        o = gate(patch, Wxo, bxo, h, Who, bho) #sigmoid(linear_x(patch, Wxo, bxo) + linear_h(h, Who, bho))
        g = gate_g(patch, Wxg, bxg, h, Whg, bhg) #nd.tanh(linear_x(patch, Wxg, bxg) + linear_h(h, Whg, bhg))
        c = f * c + i * g
        h = o * nd.tanh(c)
    linear(h, W, b)

def main():
    for index in range(30):
        if index == 10:
            t0 = time()
        step()
    print((time() - t0) / (index + 1 - 10))

if __name__ == '__main__':
    main()
    #import cProfile
    #cProfile.run('main()')

## lstm4.py
import mxnet as mx
import mxnet.ndarray as nd
from mxnet.context import Context
#import minpy.ndarray as nd
context = mx.gpu(0)
Context.default_ctx = context

def linear(X, W, bias):
    return nd.dot(X, W) + bias

def sigmoid(x):
    return .5 * (nd.tanh(.5 * x) + 1)

def gaussian(shape):
    return nd.uniform(shape=shape, ctx=context)

D = 10

WX_SHAPE = (D, 7)
Wxi = gaussian(shape=WX_SHAPE)
Wxf = gaussian(shape=WX_SHAPE)
Wxo = gaussian(shape=WX_SHAPE)
Wxg = gaussian(shape=WX_SHAPE)

bX_SHAPE = (D,)
bxi = nd.zeros(shape=bX_SHAPE)
bxf = nd.zeros(shape=bX_SHAPE)
bxo = nd.zeros(shape=bX_SHAPE)
bxg = nd.zeros(shape=bX_SHAPE)

WH_SHAPE = (D, D)
Whi = gaussian(shape=WH_SHAPE)
Whf = gaussian(shape=WH_SHAPE)
Who = gaussian(shape=WH_SHAPE)
Whg = gaussian(shape=WH_SHAPE)

bH_SHAPE = (D,)
bhi = nd.zeros(shape=bH_SHAPE)
bhf = nd.zeros(shape=bH_SHAPE)
bho = nd.zeros(shape=bH_SHAPE)
bhg = nd.zeros(shape=bH_SHAPE)

W = gaussian(shape=(D, 10))
b = nd.zeros(shape=(10,))

N = 4
X = gaussian(shape=(N, 784 // 7, 7))

def linear_sym():
    X = mx.symbol.Variable('X')
    sym = mx.symbol.FullyConnected(X, num_hidden=D)
    return sym

def sigmoid_sym():
    X = mx.symbol.Variable('X')
    sym = mx.symbol.Activation(X, act_type='sigmoid')
    return sym

def gate_sym(act_type='sigmoid'):
    X = mx.symbol.Variable('X')
    H = mx.symbol.Variable('H')
    X2H = mx.symbol.FullyConnected(X, num_hidden=D)
    H2H = mx.symbol.FullyConnected(H, num_hidden=D)
    sym = mx.symbol.Activation((X2H + H2H), act_type=act_type)
    return sym

def lstm_cell_sym():
    def _gate_sym(X, H, name, act_type='sigmoid'):
        X2H = mx.symbol.FullyConnected(X, num_hidden=D, name='x%s' % name)
        H2H = mx.symbol.FullyConnected(H, num_hidden=D, name='h%s' % name)
        sym = mx.symbol.Activation((X2H + H2H), act_type=act_type)
        return sym
    X = mx.symbol.Variable('X')
    H = mx.symbol.Variable('H')
    c = mx.symbol.Variable('C')
    i = _gate_sym(X, H, 'i')
    f = _gate_sym(X, H, 'f')
    o = _gate_sym(X, H, 'o')
    g = _gate_sym(X, H, 'g', act_type='tanh')
    c = f * c + i * g
    h = o * mx.symbol.Activation(c, act_type='tanh')
    return h

def make_func(sym, shapes):
    arg_shapes, out_shapes, aux_shapes = sym.infer_shape(**shapes)
    executor = sym.simple_bind(context, 'write', **shapes)
    print(sym.list_arguments())
    def func(*args):
        #for arg, executor_arg in zip(args, executor.arg_arrays):
            #arg.copyto(executor_arg)
        executor.forward(is_train=False)
        return executor.outputs[0]
    return func

linear_x = make_func(linear_sym(), {'X': (N, 1, 7)})
linear_h = make_func(linear_sym(), {'X': (N, D)})
sigmoid = make_func(sigmoid_sym(), {'X': (N, D)})
gate = make_func(gate_sym(), {'X': (N, 1, 7), 'H': (N, D)})
gate_g = make_func(gate_sym('tanh'), {'X': (N, 1, 7), 'H': (N, D)})
lstm_cell = make_func(lstm_cell_sym(), {'X': (N, 1, 7), 'H': (N, D), 'C': (N, D)})

from time import time, sleep

def step():
    h = nd.zeros((N, D))
    c = nd.zeros((N, D))

    for i in range(784 // 7):
        patch = nd.slice_axis(X, axis=1, begin=i, end=(i + 1))
        h = lstm_cell(patch, Wxo, bxo, h, Who, bho, Wxf, bxf, Whf, bhf, \
                c, Wxi, bxi, Whi, bhi, Wxg, bxg, Whg, bhg)
    linear(h, W, b)

def main():
    for index in range(30):
        if index == 10:
            t0 = time()
        step()
    print((time() - t0) / (index + 1 - 10))
    sleep(3)

if __name__ == '__main__':
    main()
    #import cProfile
    #cProfile.run('main()')
	import mxnet as mx
	import mxnet.ndarray as nd
	#import minpy.ndarray as nd
	context = mx.gpu(0)

	def linear(X, W, bias):
	return nd.dot(X, W) + bias

	def sigmoid(x):
	return .5 * (nd.tanh(.5 * x) + 1)

	from mxnet.context import Context
	Context.default_ctx = context

	def gaussian(shape):
	return nd.uniform(shape=shape, ctx=context)

	D = 10

	WX_SHAPE = (7, D)
	Wxi = gaussian(shape=WX_SHAPE)
	Wxf = gaussian(shape=WX_SHAPE)
	Wxo = gaussian(shape=WX_SHAPE)
	Wxg = gaussian(shape=WX_SHAPE)

	bX_SHAPE = (D,)
	bxi = nd.zeros(shape=bX_SHAPE)
	bxf = nd.zeros(shape=bX_SHAPE)
	bxo = nd.zeros(shape=bX_SHAPE)
	bxg = nd.zeros(shape=bX_SHAPE)

	WH_SHAPE = (D, D)
	Whi = gaussian(shape=WH_SHAPE)
	Whf = gaussian(shape=WH_SHAPE)
	Who = gaussian(shape=WH_SHAPE)
	Whg = gaussian(shape=WH_SHAPE)

	bH_SHAPE = (D,)
	bhi = nd.zeros(shape=bH_SHAPE)
	bhf = nd.zeros(shape=bH_SHAPE)
	bho = nd.zeros(shape=bH_SHAPE)
	bhg = nd.zeros(shape=bH_SHAPE)

	W = gaussian(shape=(D, 10))
	b = nd.zeros(shape=(10,))

	N = 4
	X = gaussian(shape=(N, 784 // 7, 7))

	from time import time

	def step():
	h = nd.zeros((N, D))
	c = nd.zeros((N, D))

	for i in range(784 // 7):
	patch = nd.slice_axis(X, axis=1, begin=i, end=(i + 1))
	i = sigmoid(linear(patch, Wxi, bxi) + linear(h, Whi, bhi))
	f = sigmoid(linear(patch, Wxf, bxf) + linear(h, Whf, bhf))
	o = sigmoid(linear(patch, Wxo, bxo) + linear(h, Who, bho))
	g = nd.tanh(linear(patch, Wxg, bxg) + linear(h, Whg, bhg))
	c = f * c + i * g
	h = o * nd.tanh(c)
	linear(h, W, b)

	def main():
	for index in range(30):
	if index == 10:
	t0 = time()
	step()
	print((time() - t0) / (index + 1 - 10))

	if __name__ == '__main__':
	main()
	#import cProfile
	#cProfile.run('main()')