-
-
Save takatakamanbou/bf23cd63d9243e8ab610 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import theano | |
import theano.tensor as T | |
import theano.tensor.signal.downsample as Tsd | |
import nnet0207 as nnet | |
class Layer(): | |
# afunc: activation function (see nnet) | |
# Xdim: dimension of the input image ( Xrow, Xcol ) | |
# Xnch: number of the input channels | |
# Wdim: dimension of the convolution filters ( Wrow, Wcol ) | |
# Wnch: number of the filter channels | |
# ds: downsampling scale for max-pooling ( ds_vertical, ds_horizontal ) | |
# Wini_range: parameter for weight initialization (see nnet) | |
# | |
def __init__( self, afunc, Xdim, Xnch, Wdim, Wnch, ds, Wini_range ): | |
# parameters of the input | |
Xrow, Xcol = Xdim | |
Xshape = ( Xnch, Xrow, Xcol ) | |
self.Xshape = Xshape | |
# parameters of the convolution layer | |
Wrow, Wcol = Wdim | |
Wshape = ( Wnch, Xnch, Wrow, Wcol ) | |
self.Wshape = Wshape | |
self.ds = ds | |
Yrow, Ycol = Xrow - Wrow + 1, Xcol - Wcol + 1 | |
Yshape = ( Wnch, Yrow, Ycol ) | |
self.Yshape = Yshape | |
# parameters of the pooling layer | |
Zrow = int( np.ceil( float( Yrow ) / ds[0] ) ) | |
Zcol = int( np.ceil( float( Ycol ) / ds[1] ) ) | |
Zshape = ( Wnch, Zrow, Zcol ) | |
self.Zshape = Zshape | |
self.Dout = Wnch * Zrow * Zcol | |
# theano shared variables | |
self.W = theano.shared( nnet.random( Wshape, Wini_range ) ) | |
self.dW = theano.shared( np.zeros( Wshape ) ) | |
# activation function of the layer | |
self.afunc = nnet.d_afunc[afunc] | |
def output( self, X ): | |
# X: Ndat x Xnch x Xrow x Xcol | |
Xshape = ( None, self.Xshape[0], self.Xshape[1], self.Xshape[2] ) | |
Wshape = self.Wshape | |
Yconv = T.nnet.conv.conv2d( X, self.W, image_shape = Xshape, filter_shape = Wshape ) # Ndat x Wnch x Yrow x Ycol | |
Ypool = Tsd.max_pool_2d( Yconv, self.ds ) # Ndat x Wnch x Zrow x Zcol | |
if self.afunc == 'linear': | |
Z = Ypool | |
else: | |
Z = self.afunc( Ypool ) | |
return Z |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import theano | |
import theano.tensor as T | |
import nnet0207 as nnet | |
import convnet0207 as convnet | |
class MLP(): | |
# Xdim: ( Xrow, Xcol ) W1dim: ( W1row, W1col ) | |
# ds1: ( ds_v, ds_h ) downscale factor | |
# | |
def __init__( self, Xdim, Xnch, W1dim, W1nch, ds1, K ): | |
# layers | |
self.L1 = convnet.Layer( 'linear', Xdim, Xnch, W1dim, W1nch, ds1, 0.1 ) | |
#self.L1 = convnet.Layer( 'ReLu', Xdim, Xnch, W1dim, W1nch, ds1, 0.1 ) | |
self.H = self.L1.Dout | |
self.L2 = nnet.Layer( 'softmax', self.H, K, 0.1 ) | |
# theano functions | |
self.output = self._Tfunc_output() | |
self.cost = self._Tfunc_cost() | |
self.train = self._Tfunc_train() | |
### theano function for output computation | |
# | |
def _Tfunc_output( self ): | |
X = T.tensor4() # Ndat x Xnch x Xrow x Xcol | |
Y, Z = _T_output( self.L1, self.L2, X ) | |
return theano.function( [ X ], [ Y, Z ] ) | |
### theano function for cost computation | |
# | |
def _Tfunc_cost( self ): | |
Z = T.dmatrix() # N x K | |
t = T.dmatrix() # N x K | |
cost = _T_cost( Z, t ) | |
return theano.function( [ Z, t ], cost ) | |
### theano function for gradient descent learning | |
# | |
def _Tfunc_train( self ): | |
W1, dW1 = self.L1.W, self.L1.dW | |
W2, dW2, b2, db2 = self.L2.W, self.L2.dW, self.L2.b, self.L2.db | |
X = T.tensor4( 'X' ) | |
t = T.dmatrix( 't' ) # N x K | |
eta = T.dscalar( 'eta' ) | |
mu = T.dscalar( 'mu' ) | |
Y2, Z2 = _T_output( self.L1, self.L2, X ) | |
cost = T.mean( _T_cost( Z2, t ) ) | |
gradW1, gradW2, gradb2 = T.grad( cost, [ W1, W2, b2 ] ) | |
dW1_new = -eta * gradW1 + mu * dW1 | |
dW2_new = -eta * gradW2 + mu * dW2 | |
db2_new = -eta * gradb2 + mu * db2 | |
W1_new = W1 + dW1_new | |
W2_new = W2 + dW2_new | |
b2_new = b2 + db2_new | |
updatesList = [ | |
( W1, W1_new ), ( dW1, dW1_new ), | |
( W2, W2_new ), ( b2, b2_new ), ( dW2, dW2_new ), ( db2, db2_new ) ] | |
return theano.function( [ X, t, eta, mu ], cost, updates = updatesList ) | |
def _T_output( L1, L2, X ): | |
Z1 = L1.output( X ) | |
Y2, Z2 = L2.output( Z1.reshape( ( Z1.shape[0], -1 ) ) ) | |
return Y2, Z2 | |
def _T_cost( Z, t ): | |
return T.nnet.categorical_crossentropy( Z, t ) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import theano | |
import theano.tensor as T | |
import nnet0207 as nnet | |
import convnet0207 as convnet | |
class MLP(): | |
# Xdim: ( Xrow, Xcol ) | |
# W1dim: ( Wrow, Wcol ) ds1: ( ds_v, ds_h ) downscale factor | |
# W2dim: ( Wrow, Wcol ) ds2: ( ds_v, ds_h ) downscale factor | |
# | |
def __init__( self, Xdim, Xnch, W1dim, W1nch, ds1, W2dim, W2nch, ds2, K ): | |
# layers | |
self.L1 = convnet.Layer( 'linear', Xdim, Xnch, W1dim, W1nch, ds1, 0.1 ) | |
Z1nch = self.L1.Zshape[0] | |
Z1dim = self.L1.Zshape[1:] | |
self.H1 = self.L1.Dout | |
self.L2 = convnet.Layer( 'linear', Z1dim, Z1nch, W2dim, W2nch, ds2, 0.1 ) | |
self.H2 = self.L2.Dout | |
self.L3 = nnet.Layer( 'softmax', self.H2, K, 0.1 ) | |
# theano functions | |
self.output = self._Tfunc_output() | |
self.cost = self._Tfunc_cost() | |
self.train = self._Tfunc_train() | |
### theano function for output computation | |
# | |
def _Tfunc_output( self ): | |
X = T.tensor4() # Ndat x Xnch x Xrow x Xcol | |
Y, Z = _T_output( self.L1, self.L2, self.L3, X ) | |
return theano.function( [ X ], [ Y, Z ] ) | |
### theano function for cost computation | |
# | |
def _Tfunc_cost( self ): | |
Z = T.dmatrix() # N x K | |
t = T.dmatrix() # N x K | |
cost = _T_cost( Z, t ) | |
return theano.function( [ Z, t ], cost ) | |
### theano function for gradient descent learning | |
# | |
def _Tfunc_train( self ): | |
W1, dW1 = self.L1.W, self.L1.dW | |
W2, dW2 = self.L2.W, self.L2.dW | |
W3, dW3, b3, db3 = self.L3.W, self.L3.dW, self.L3.b, self.L3.db | |
X = T.tensor4( 'X' ) | |
t = T.dmatrix( 't' ) # N x K | |
eta = T.dscalar( 'eta' ) | |
mu = T.dscalar( 'mu' ) | |
Y3, Z3 = _T_output( self.L1, self.L2, self.L3, X ) | |
cost = T.mean( _T_cost( Z3, t ) ) | |
gradW1, gradW2, gradW3, gradb3 = T.grad( cost, [ W1, W2, W3, b3 ] ) | |
dW1_new = -eta * gradW1 + mu * dW1 | |
dW2_new = -eta * gradW2 + mu * dW2 | |
dW3_new = -eta * gradW3 + mu * dW3 | |
db3_new = -eta * gradb3 + mu * db3 | |
W1_new = W1 + dW1_new | |
W2_new = W2 + dW2_new | |
W3_new = W3 + dW3_new | |
b3_new = b3 + db3_new | |
updatesList = [ | |
( W1, W1_new ), ( dW1, dW1_new ), | |
( W2, W2_new ), ( dW2, dW2_new ), | |
( W3, W3_new ), ( b3, b3_new ), ( dW3, dW3_new ), ( db3, db3_new ) ] | |
return theano.function( [ X, t, eta, mu ], cost, updates = updatesList ) | |
def _T_output( L1, L2, L3, X ): | |
Z1 = L1.output( X ) | |
Z2 = L2.output( Z1 ) | |
Y3, Z3 = L3.output( Z2.reshape( ( Z2.shape[0], -1 ) ) ) | |
return Y3, Z3 | |
def _T_cost( Z, t ): | |
return T.nnet.categorical_crossentropy( Z, t ) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import theano | |
import theano.tensor as T | |
import nnet0207 as nnet | |
class MLP(): | |
def __init__( self, D, H, K ): | |
# layers | |
self.L1 = nnet.Layer( 'ReLu', D, H, 0.1 ) | |
self.L2 = nnet.Layer( 'softmax', H, K, 0.1 ) | |
# theano functions | |
self.output = self._Tfunc_output() | |
self.cost = self._Tfunc_cost() | |
self.train = self._Tfunc_train() | |
### theano function for output computation | |
# | |
def _Tfunc_output( self ): | |
X = T.dmatrix() # N x D | |
Y, Z = _T_output( self.L1, self.L2, X ) | |
return theano.function( [ X ], [ Y, Z ] ) | |
### theano function for cost computation | |
# | |
def _Tfunc_cost( self ): | |
Z = T.dmatrix() # N x K | |
t = T.dmatrix() # N x K | |
cost = _T_cost( Z, t ) | |
return theano.function( [ Z, t ], cost ) | |
### theano function for gradient descent learning | |
# | |
def _Tfunc_train( self ): | |
W1, dW1, b1, db1 = self.L1.W, self.L1.dW, self.L1.b, self.L1.db | |
W2, dW2, b2, db2 = self.L2.W, self.L2.dW, self.L2.b, self.L2.db | |
X = T.dmatrix( 'X' ) # N x D | |
t = T.dmatrix( 't' ) # N x K | |
eta = T.dscalar( 'eta' ) | |
mu = T.dscalar( 'mu' ) | |
Y2, Z2 = _T_output( self.L1, self.L2, X ) | |
cost = T.mean( _T_cost( Z2, t ) ) | |
gradW1, gradb1, gradW2, gradb2 = T.grad( cost, [ W1, b1, W2, b2 ] ) | |
dW1_new = -eta * gradW1 + mu * dW1 | |
db1_new = -eta * gradb1 + mu * db1 | |
dW2_new = -eta * gradW2 + mu * dW2 | |
db2_new = -eta * gradb2 + mu * db2 | |
W1_new = W1 + dW1_new | |
b1_new = b1 + db1_new | |
W2_new = W2 + dW2_new | |
b2_new = b2 + db2_new | |
updatesList = [ | |
( W1, W1_new ), ( b1, b1_new ), ( dW1, dW1_new ), ( db1, db1_new ), | |
( W2, W2_new ), ( b2, b2_new ), ( dW2, dW2_new ), ( db2, db2_new ) ] | |
return theano.function( [ X, t, eta, mu ], cost, updates = updatesList ) | |
def _T_output( L1, L2, X ): | |
Y1, Z1 = L1.output( X ) | |
Y2, Z2 = L2.output( Z1 ) | |
return Y2, Z2 | |
def _T_cost( Z, t ): | |
return T.nnet.categorical_crossentropy( Z, t ) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import theano | |
import theano.tensor as T | |
import nnet0207 as nnet | |
class MLP(): | |
def __init__( self, D, H1, H2, K ): | |
# layers | |
self.L1 = nnet.Layer( 'ReLu', D, H1, 0.1 ) | |
self.L2 = nnet.Layer( 'ReLu', H1, H2, 0.1 ) | |
self.L3 = nnet.Layer( 'softmax', H2, K, 0.1 ) | |
# theano functions | |
self.output = self._Tfunc_output() | |
self.cost = self._Tfunc_cost() | |
self.train = self._Tfunc_train() | |
### theano function for output computation | |
# | |
def _Tfunc_output( self ): | |
X = T.dmatrix() # N x D | |
Y, Z = _T_output( self.L1, self.L2, self.L3, X ) | |
return theano.function( [ X ], [ Y, Z ] ) | |
### theano function for cost computation | |
# | |
def _Tfunc_cost( self ): | |
Z = T.dmatrix() # N x K | |
t = T.dmatrix() # N x K | |
cost = _T_cost( Z, t ) | |
return theano.function( [ Z, t ], cost ) | |
### theano function for gradient descent learning | |
# | |
def _Tfunc_train( self ): | |
W1, dW1, b1, db1 = self.L1.W, self.L1.dW, self.L1.b, self.L1.db | |
W2, dW2, b2, db2 = self.L2.W, self.L2.dW, self.L2.b, self.L2.db | |
W3, dW3, b3, db3 = self.L3.W, self.L3.dW, self.L3.b, self.L3.db | |
X = T.dmatrix( 'X' ) # N x D | |
t = T.dmatrix( 't' ) # N x K | |
eta = T.dscalar( 'eta' ) | |
mu = T.dscalar( 'mu' ) | |
Y3, Z3 = _T_output( self.L1, self.L2, self.L3, X ) | |
cost = T.mean( _T_cost( Z3, t ) ) | |
grad = T.grad( cost, [ W1, b1, W2, b2, W3, b3 ] ) | |
gradW1, gradb1, gradW2, gradb2, gradW3, gradb3 = grad | |
dW1_new = -eta * gradW1 + mu * dW1 | |
db1_new = -eta * gradb1 + mu * db1 | |
dW2_new = -eta * gradW2 + mu * dW2 | |
db2_new = -eta * gradb2 + mu * db2 | |
dW3_new = -eta * gradW3 + mu * dW3 | |
db3_new = -eta * gradb3 + mu * db3 | |
W1_new = W1 + dW1_new | |
b1_new = b1 + db1_new | |
W2_new = W2 + dW2_new | |
b2_new = b2 + db2_new | |
W3_new = W3 + dW3_new | |
b3_new = b3 + db3_new | |
updatesList = [ | |
( W1, W1_new ), ( b1, b1_new ), ( dW1, dW1_new ), ( db1, db1_new ), | |
( W2, W2_new ), ( b2, b2_new ), ( dW2, dW2_new ), ( db2, db2_new ), | |
( W3, W3_new ), ( b3, b3_new ), ( dW3, dW3_new ), ( db3, db3_new ), | |
] | |
return theano.function( [ X, t, eta, mu ], cost, updates = updatesList ) | |
def _T_output( L1, L2, L3, X ): | |
Y1, Z1 = L1.output( X ) | |
Y2, Z2 = L2.output( Z1 ) | |
Y3, Z3 = L3.output( Z2 ) | |
return Y3, Z3 | |
def _T_cost( Z, t ): | |
return T.nnet.categorical_crossentropy( Z, t ) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import scipy as sp | |
import mnist0117 as mnist | |
import convnet_2layer0207 as convnet_2layer | |
import convnet_3layer0207 as convnet_3layer | |
def gendat( LT ): | |
mn = mnist.MNIST( LT ) | |
label = mn.getLabel() | |
N = label.shape[0] | |
K = 10 | |
tmp = mn.getImage() / 255 # => in [0,1] | |
X = tmp.reshape( ( tmp.shape[0], 1, tmp.shape[1], tmp.shape[2] ) ) | |
t = np.zeros( ( N, K ), dtype = bool ) | |
for ik in range( K ): | |
t[label == ik, ik] = True | |
return X, label, t | |
def errorrate( mlp, X, t, label ): | |
Y, Z = mlp.output( X ) | |
mnLL = np.mean( mlp.cost( Z, t ) ) | |
er = np.mean( label != np.argmax( Z, axis = 1 ) ) | |
return mnLL, er | |
if __name__ == "__main__": | |
np.random.seed( 0 ) | |
##### setting the training data & the validation data | |
# | |
X, label, t = gendat( 'L' ) | |
XL, labelL, tL = X[:50000], label[:50000], t[:50000] | |
XV, labelV, tV = X[50000:], label[50000:], t[50000:] | |
NL, Xnch, Xrow, Xcol = XL.shape | |
NV, Xnch, Xrow, Xcol = XV.shape | |
K = t.shape[1] | |
Xdim = ( Xrow, Xcol ) | |
##### mini batch indicies for stochastic gradient ascent | |
# | |
idx = np.random.permutation( NL ) | |
batchsize = 100 | |
nbatch = NL / batchsize | |
assert( NL % batchsize == 0 ) | |
idxB = np.zeros( ( nbatch, NL ), dtype = bool ) | |
for ib in range( nbatch ): | |
idxB[ib, idx.reshape( ( nbatch, batchsize ))[ib, :]] = True | |
##### training | |
# | |
W1dim, W1nch, ds1 = ( 5, 5 ), 16, ( 4, 4 ) | |
W2dim, W2nch, ds2 = None, None, None | |
#W2dim, W2nch, ds2 = ( 5, 5 ), 16, ( 4, 4 ) | |
eta, mu = 0.05, 0.8 | |
nepoch = 50 | |
if W2dim == None: | |
mlp = convnet_2layer.MLP( Xdim, Xnch, W1dim, W1nch, ds1, K ) | |
print '### 2-layer convnet' | |
print '# Xdim:', Xdim, ' Xnch:', Xnch, ' W1dim:', W1dim, ' W1nch:', W1nch, ' ds1:', ds1, ' H:', mlp.H | |
else: | |
mlp = convnet_3layer.MLP( Xdim, Xnch, W1dim, W1nch, ds1, W2dim, W2nch, ds2, K ) | |
print '### 3-layer convnet' | |
print '# Xdim:', Xdim, ' Xnch:', Xnch, ' W1dim:', W1dim, ' W1nch:', W1nch, ' ds1:', ds1, ' H1:', mlp.H1 | |
print '# W2dim:', W2dim, ' W2nch:', W2nch, ' ds2:', ds2, ' H2:', mlp.H2 | |
print '### training: NL = ', NL, ' NV = ', NV, ' K = ', K, ' batchsize = ', batchsize | |
for i in range( nepoch ): | |
# printing error rates etc. | |
if i % 10 == 0: | |
mnLLL, erL = errorrate( mlp, XL, tL, labelL ) | |
mnLLV, erV = errorrate( mlp, XV, tV, labelV ) | |
print i, mnLLL, erL * 100, erV * 100 | |
# training (selecting each batch in random order) | |
for ib in np.random.permutation( nbatch ): | |
ii = idxB[ib, :] | |
mlp.train( XL[ii], tL[ii], eta, mu ) | |
i = nepoch | |
mnLLL, erL = errorrate( mlp, XL, tL, labelL ) | |
mnLLV, erV = errorrate( mlp, XV, tV, labelV ) | |
print i, mnLLL, erL * 100, erV * 100 | |
##### setting the test data | |
# | |
XT, labelT, tT = gendat( 'T' ) | |
NT, Nstack, Xrow, Xcol = XT.shape | |
print '# NT = ', NT | |
mnLLT, erT = errorrate( mlp, XT, tT, labelT ) | |
print i, mnLLT, erL * 100, erV * 100, erT * 100 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import scipy as sp | |
import mnist0117 as mnist | |
def gendat( LT ): | |
mn = mnist.MNIST( LT ) | |
label = mn.getLabel() | |
N = label.shape[0] | |
K = 10 | |
X = mn.getImage().reshape( ( N, -1 ) ) / 255 # => in [0,1] | |
t = np.zeros( ( N, K ), dtype = bool ) | |
for ik in range( K ): | |
t[label == ik, ik] = True | |
return X, label, t | |
def errorrate( mlp, X, t, label ): | |
Y, Z = mlp.output( X ) | |
mnLL = np.mean( mlp.cost( Z, t ) ) | |
er = np.mean( label != np.argmax( Z, axis = 1 ) ) | |
return mnLL, er | |
if __name__ == "__main__": | |
import mlp_2layer0207 as mlp_2layer | |
import mlp_3layer0207 as mlp_3layer | |
np.random.seed( 0 ) | |
##### setting the training data & the validation data | |
# | |
X, label, t = gendat( 'L' ) | |
XL, labelL, tL = X[:50000], label[:50000], t[:50000] | |
XV, labelV, tV = X[50000:], label[50000:], t[50000:] | |
NL, D = XL.shape | |
NV, D = XV.shape | |
K = t.shape[1] | |
##### mini batch indicies for stochastic gradient ascent | |
# | |
idx = np.random.permutation( NL ) | |
batchsize = 1000 | |
nbatch = NL / batchsize | |
assert( NL % batchsize == 0 ) | |
idxB = np.zeros( ( nbatch, NL ), dtype = bool ) | |
for ib in range( nbatch ): | |
idxB[ib, idx.reshape( ( nbatch, batchsize ))[ib, :]] = True | |
##### training | |
# | |
#H1, H2 = 500, 0 | |
H1, H2 = 500, 1000 | |
#H1, H2 = 1000, 500 | |
eta = 0.5 | |
mu = 0.8 | |
nepoch = 20 | |
if H2 <= 0: | |
mlp = mlp_2layer.MLP( D, H1, K ) | |
print '### 2-layer MLP: D = ', D, ' H = ', H1, ' K = ', K | |
else: | |
mlp = mlp_3layer.MLP( D, H1, H2, K ) | |
print '### 3-layer MLP: D = ', D, ' H1 = ', H1, ' H2 = ', H2, ' K = ', K | |
print '### training: NL = ', NL, 'NV = ', NV, ' D = ', D, ' K = ', K, ' batchsize = ', batchsize | |
for i in range( nepoch ): | |
# printing error rates etc. | |
if i % 10 == 0: | |
mnLLL, erL = errorrate( mlp, XL, tL, labelL ) | |
mnLLV, erV = errorrate( mlp, XV, tV, labelV ) | |
print i, mnLLL, erL * 100, erV * 100 | |
# training (selecting each batch in random order) | |
for ib in np.random.permutation( nbatch ): | |
ii = idxB[ib, :] | |
mlp.train( XL[ii], tL[ii], eta, mu ) | |
i = nepoch | |
mnLLL, erL = errorrate( mlp, XL, tL, labelL ) | |
mnLLV, erV = errorrate( mlp, XV, tV, labelV ) | |
print i, mnLLL, erL * 100, erV * 100 | |
##### setting the test data | |
# | |
XT, labelT, tT = gendat( 'T' ) | |
NT, D = XT.shape | |
print '# NT = ', NT | |
mnLLT, erT = errorrate( mlp, XT, tT, labelT ) | |
print i, mnLLT, erL * 100, erV * 100, erT * 100 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import theano | |
import theano.tensor as T | |
d_afunc = { 'linear': 'linear', | |
'sigmoid': T.nnet.sigmoid, | |
'softmax': T.nnet.softmax, | |
'ReLu': lambda Y: T.switch( Y > 0, Y, 0 ) } | |
class Layer(): | |
def __init__( self, afunc, Din, Nunit, Wini_range ): | |
self.Din = Din | |
self.Nunit = Nunit | |
# theano shared variables for weights & biases | |
self.W = theano.shared( random( ( Nunit, Din ), Wini_range ) ) | |
self.b = theano.shared( random( Nunit, Wini_range ) ) | |
self.dW = theano.shared( np.zeros( ( Nunit, Din ) ) ) | |
self.db = theano.shared( np.zeros( Nunit ) ) | |
# activation function of the layer | |
self.afunc = d_afunc[afunc] | |
def output( self, X ): | |
Y = T.dot( X, self.W.T ) + self.b # Ndat x Nunit | |
if self.afunc == 'linear': | |
Z = Y | |
else: | |
Z = self.afunc( Y ) | |
return Y, Z | |
### random numbers for weight initialization | |
# | |
def random( shape, r ): | |
# [ -r/2, r/2 ) | |
return r * ( np.random.random_sample( shape ) - 0.5 ) | |
### Rectified Linear activation function | |
# | |
def relu( Y ): | |
return T.switch( Y > 0, Y, 0 ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment