-
-
Save takatakamanbou/e1ca2b82632d0f7877e6 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import theano | |
import theano.tensor as T | |
import theano.tensor.signal.downsample as Tsd | |
import nnet0211 as nnet | |
########## Convolution Layer ########## | |
class ConvLayer( object ): | |
def __init__( self, Xdim, Wdim, afunc, withBias, Wini = 0.01 ): | |
# dimension of the input | |
Xnch, Xrow, Xcol = Xdim | |
self.Xshape = Xdim | |
# dimension of the convolution filters | |
Wnch, Wrow, Wcol = Wdim | |
self.Wshape = ( Wnch, Xnch, Wrow, Wcol ) | |
# dimension of the output | |
Yrow, Ycol = Xrow - Wrow + 1, Xcol - Wcol + 1 | |
self.Yshape = ( Wnch, Yrow, Ycol ) | |
self.Dout = Wnch * Yrow * Ycol | |
# activation function of the layer | |
self.afunc = nnet.d_afunc[afunc] | |
self.withBias = withBias | |
# theano shared variables | |
self.W = theano.shared( nnet.randomN( self.Wshape, Wini ) ) | |
self.dW = theano.shared( np.zeros( self.Wshape ) ) | |
if withBias: | |
self.b = theano.shared( np.zeros( Wnch ) ) | |
self.db = theano.shared( np.zeros( Wnch ) ) | |
def output( self, X ): | |
# X: Ndat x Xshape, Y: Ndat x Yshape | |
Xs = ( None, self.Xshape[0], self.Xshape[1], self.Xshape[2] ) | |
Ws = self.Wshape | |
Y = T.nnet.conv.conv2d( X, self.W, image_shape = Xs, filter_shape = Ws ) | |
if self.withBias: | |
b = self.b.dimshuffle( 'x', 0, 'x', 'x' ) # 1 x nch x 1 x 1 | |
Y += b | |
Z = self.afunc( Y ) | |
return Y, Z # Ndat x Yshape | |
########## Pooling Layer ########## | |
class PoolLayer( object ): | |
def __init__( self, Xdim, ds, afunc, withBias ): | |
# dimension of the input | |
Xnch, Xrow, Xcol = Xdim | |
self.Xshape = Xdim | |
# parameters of the pooling layer | |
self.ds = ds | |
# assuming ignore_border = False | |
Yrow = int( np.ceil( float( Xrow ) / ds[0] ) ) | |
Ycol = int( np.ceil( float( Xcol ) / ds[1] ) ) | |
self.Yshape = ( Xnch, Yrow, Ycol ) | |
self.Dout = Xnch * Yrow * Ycol | |
# activation function of the layer | |
self.afunc = nnet.d_afunc[afunc] | |
self.withBias = withBias | |
# theano shared variables | |
if withBias: | |
self.b = theano.shared( np.zeros( Xnch ) ) | |
self.db = theano.shared( np.zeros( Xnch ) ) | |
def output( self, X ): | |
# X: Ndat x Xshape | |
Y = Tsd.max_pool_2d( X, self.ds ) # Ndat x Yshape | |
if self.withBias: | |
b = self.b.dimshuffle( 'x', 0, 'x', 'x' ) # 1 x nch x 1 x 1 | |
Y += b | |
Z = self.afunc( Y ) | |
return Y, Z | |
########## Full-Connection Layer ########## | |
class FullLayer( nnet.Layer ): | |
def __init__( self, Din, Nunit, afunc, withBias = True, Wini = 0.01, | |
T4toMat = False ): | |
super( FullLayer, self ).__init__( Din, Nunit, afunc, withBias, Wini ) | |
self.T4toMat = T4toMat | |
def super_output( self, X ): | |
return super( FullLayer, self ).output( X ) | |
def output( self, X ): | |
if self.T4toMat: | |
return self.super_output( X.reshape( ( X.shape[0], -1 ) ) ) | |
else: | |
return self.super_output( X ) | |
########## Convolutional Neural Net ########## | |
class CNN( object ): | |
def __init__( self, Layers ): | |
# layers - list of Layer instances | |
self.Layers = Layers | |
# theano functions | |
self.output = self._Tfunc_output() | |
self.cost = self._Tfunc_cost() | |
self.train = self._Tfunc_train() | |
### theano function for output computation | |
# | |
def _Tfunc_output( self ): | |
X = T.tensor4() # Ndat x Xnch x Xrow x Xcol | |
Y, Z = nnet._T_output( self.Layers, X ) | |
return theano.function( [ X ], [ Y, Z ] ) | |
### theano function for cost computation | |
# | |
def _Tfunc_cost( self ): | |
Z = T.dmatrix() # N x K | |
t = T.dmatrix() # N x K | |
cost = nnet._T_cost( Z, t ) | |
return theano.function( [ Z, t ], cost ) | |
### theano function for gradient descent learning | |
# | |
def _Tfunc_train( self ): | |
X = T.tensor4( 'X' ) | |
t = T.dmatrix( 't' ) | |
eta = T.dscalar( 'eta' ) | |
mu = T.dscalar( 'mu' ) | |
lam = T.dscalar( 'lambda' ) | |
Y, Z = nnet._T_output( self.Layers, X ) | |
cost = T.mean( nnet._T_cost( Z, t ) ) | |
updatesList = [] | |
for layer in self.Layers: | |
# PoolLayer doesn't have W & dW | |
if not isinstance( layer, PoolLayer ): | |
gradW = T.grad( cost, layer.W ) | |
#dWnew = -eta * gradW + mu * layer.dW | |
dWnew = -eta * ( gradW + lam * layer.W ) + mu * layer.dW | |
Wnew = layer.W + dWnew | |
updatesList.append( ( layer.W, Wnew ) ) | |
updatesList.append( ( layer.dW, dWnew ) ) | |
if layer.withBias: | |
gradb = T.grad( cost, layer.b ) | |
# no weight decay for bias | |
dbnew = -eta * gradb + mu * layer.db | |
bnew = layer.b + dbnew | |
updatesList.append( ( layer.b, bnew ) ) | |
updatesList.append( ( layer.db, dbnew ) ) | |
return theano.function( [ X, t, eta, mu, lam ], cost, updates = updatesList ) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import scipy as sp | |
import mnist0117 as mnist | |
import convnet0211 as convnet | |
def gendat( LT ): | |
mn = mnist.MNIST( LT ) | |
label = mn.getLabel() | |
N = label.shape[0] | |
K = 10 | |
tmp = mn.getImage() / 255 # => in [0,1] | |
X = tmp.reshape( ( tmp.shape[0], 1, tmp.shape[1], tmp.shape[2] ) ) | |
t = np.zeros( ( N, K ), dtype = bool ) | |
for ik in range( K ): | |
t[label == ik, ik] = True | |
return X, label, t | |
def errorrate( mlp, X, t, label ): | |
Y, Z = mlp.output( X ) | |
mnLL = np.mean( mlp.cost( Z, t ) ) | |
er = np.mean( label != np.argmax( Z, axis = 1 ) ) | |
return mnLL, er | |
# Conv-Pool-Softmax | |
def CPS( Xnch, Xrow, Xcol, K ): | |
Xdim = ( Xnch, Xrow, Xcol ) | |
W1dim = ( 16, 5, 5 ) | |
ds1 = ( 4, 4 ) | |
L1conv = convnet.ConvLayer( Xdim, W1dim, 'linear', withBias = False ) | |
#L1pool = convnet.PoolLayer( L1conv.Yshape, ds1, 'linear', withBias = False ) | |
L1pool = convnet.PoolLayer( L1conv.Yshape, ds1, 'ReLu', withBias = True ) | |
H1 = L1pool.Dout | |
L2 = convnet.FullLayer( H1, K, 'softmax', withBias = True, T4toMat = True ) | |
cnn = convnet.CNN( [ L1conv, L1pool, L2 ] ) | |
print '### Conv-Pool-Softmax Xdim:', Xdim | |
print '# W1dim:', W1dim, ' ds1:', ds1, ' H1:', H1 | |
return cnn | |
# Conv-Pool-Conv-Pool-Softmax | |
def CPCPS( Xnch, Xrow, Xcol, K ): | |
Xdim = ( Xnch, Xrow, Xcol ) | |
W1dim = ( 16, 5, 5 ) | |
ds1 = ( 4, 4 ) | |
#ds1 = ( 2, 2 ) | |
W2dim = ( 16, 5, 5 ) | |
ds2 = ( 4, 4 ) | |
#ds2 = ( 2, 2 ) | |
L1conv = convnet.ConvLayer( Xdim, W1dim, 'linear', withBias = False ) | |
#L1pool = convnet.PoolLayer( L1conv.Yshape, ds1, 'linear', withBias = False ) | |
L1pool = convnet.PoolLayer( L1conv.Yshape, ds1, 'ReLu', withBias = True ) | |
H1 = L1pool.Dout | |
L2conv = convnet.ConvLayer( L1pool.Yshape, W2dim, 'linear', withBias = False ) | |
#L2pool = convnet.PoolLayer( L2conv.Yshape, ds2, 'linear', withBias = False ) | |
L2pool = convnet.PoolLayer( L2conv.Yshape, ds2, 'ReLu', withBias = True ) | |
H2 = L2pool.Dout | |
L3 = convnet.FullLayer( H2, K, 'softmax', withBias = True, T4toMat = True ) | |
cnn = convnet.CNN( [ L1conv, L1pool, L2conv, L2pool, L3 ] ) | |
print '### Conv-Pool-Conv-Pool-Softmax Xdim:', Xdim | |
print '# W1dim:', W1dim, ' ds1:', ds1, ' H1:', H1 | |
print '# W2dim:', W2dim, ' ds2:', ds2, ' H2:', H2 | |
return cnn | |
# Conv-Pool-ReLu-Softmax | |
def CPRS( Xnch, Xrow, Xcol, K ): | |
Xdim = ( Xnch, Xrow, Xcol ) | |
W1dim = ( 16, 5, 5 ) | |
#W1dim = ( 64, 5, 5 ) | |
ds1 = ( 4, 4 ) | |
L1conv = convnet.ConvLayer( Xdim, W1dim, 'linear', withBias = False ) | |
#L1pool = convnet.PoolLayer( L1conv.Yshape, ds1, 'linear', withBias = False ) | |
L1pool = convnet.PoolLayer( L1conv.Yshape, ds1, 'ReLu', withBias = True ) | |
H1 = L1pool.Dout | |
H2 = 400 | |
L2 = convnet.FullLayer( H1, H2, 'ReLu', withBias = True, T4toMat = True ) | |
L3 = convnet.FullLayer( H2, K, 'softmax', withBias = True, T4toMat = False ) | |
cnn = convnet.CNN( [ L1conv, L1pool, L2, L3 ] ) | |
print '### Conv-Pool-ReLu-Softmax Xdim:', Xdim | |
print '# W1dim:', W1dim, ' ds1:', ds1, ' H1:', H1 | |
print '# H2:', H2 | |
return cnn | |
if __name__ == "__main__": | |
np.random.seed( 0 ) | |
##### setting the training data & the validation data | |
# | |
X, label, t = gendat( 'L' ) | |
xm = np.mean( X, axis = 0 ) | |
X -= xm | |
XL, labelL, tL = X[:50000], label[:50000], t[:50000] | |
XV, labelV, tV = X[50000:], label[50000:], t[50000:] | |
NL, Xnch, Xrow, Xcol = XL.shape | |
NV, Xnch, Xrow, Xcol = XV.shape | |
K = t.shape[1] | |
Xdim = ( Xrow, Xcol ) | |
##### mini batch indicies for stochastic gradient ascent | |
# | |
idx = np.random.permutation( NL ) | |
batchsize = 100 | |
nbatch = NL / batchsize | |
assert( NL % batchsize == 0 ) | |
idxB = np.zeros( ( nbatch, NL ), dtype = bool ) | |
for ib in range( nbatch ): | |
idxB[ib, idx.reshape( ( nbatch, batchsize ))[ib, :]] = True | |
##### training | |
# | |
#cnn = CPS( Xnch, Xrow, Xcol, K ) | |
cnn = CPCPS( Xnch, Xrow, Xcol, K ) | |
#cnn = CPRS( Xnch, Xrow, Xcol, K ) | |
eta, mu, lam = 0.05, 0.9, 0.0 | |
nepoch = 50 | |
print '### training: NL = ', NL, ' NV = ', NV, ' K = ', K, ' batchsize = ', batchsize | |
for i in range( nepoch ): | |
# printing error rates etc. | |
if i % 10 == 0: | |
mnLLL, erL = errorrate( cnn, XL, tL, labelL ) | |
mnLLV, erV = errorrate( cnn, XV, tV, labelV ) | |
print '%d %.4f %.2f %.4f %.2f' % ( i, mnLLL, erL * 100, mnLLV, erV * 100 ) | |
# training (selecting each batch in random order) | |
for ib in np.random.permutation( nbatch ): | |
ii = idxB[ib, :] | |
cnn.train( XL[ii], tL[ii], eta, mu, lam ) | |
i = nepoch | |
mnLLL, erL = errorrate( cnn, XL, tL, labelL ) | |
mnLLV, erV = errorrate( cnn, XV, tV, labelV ) | |
print '%d %.4f %.2f %.4f %.2f' % ( i, mnLLL, erL * 100, mnLLV, erV * 100 ) | |
##### setting the test data | |
# | |
XT, labelT, tT = gendat( 'T' ) | |
XT -= xm | |
NT, Nstack, Xrow, Xcol = XT.shape | |
print '# NT = ', NT | |
mnLLT, erT = errorrate( cnn, XT, tT, labelT ) | |
print '%d %.4f %.2f %.4f %.2f %.4f %.2f' % ( i, mnLLL, erL * 100, mnLLV, erV * 100, mnLLT, erT * 100 ) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import scipy as sp | |
import mnist0117 as mnist | |
import nnet0211 as nnet | |
def gendat( LT ): | |
mn = mnist.MNIST( LT ) | |
label = mn.getLabel() | |
N = label.shape[0] | |
K = 10 | |
X = mn.getImage().reshape( ( N, -1 ) ) / 255 # => in [0,1] | |
t = np.zeros( ( N, K ), dtype = bool ) | |
for ik in range( K ): | |
t[label == ik, ik] = True | |
return X, label, t | |
def errorrate( mlp, X, t, label ): | |
Y, Z = mlp.output( X ) | |
mnLL = np.mean( mlp.cost( Z, t ) ) | |
er = np.mean( label != np.argmax( Z, axis = 1 ) ) | |
return mnLL, er | |
def MLP2( D, H1, K ): | |
print '### 2-layer MLP: D =', D, ' H =', H1, ' K =', K | |
L1 = nnet.Layer( D, H1, 'ReLu', withBias = True, Wini = 0.01 ) | |
L2 = nnet.Layer( H1, K, 'softmax', withBias = True, Wini = 0.01 ) | |
mlp = nnet.MLP( [ L1, L2 ] ) | |
return mlp | |
def MLP3( D, H1, H2, K ): | |
print '### 3-layer MLP: D =', D, ' H1 =', H1, ' H2 =', H2, ' K =', K | |
L1 = nnet.Layer( D, H1, 'ReLu', withBias = True, Wini = 0.01 ) | |
L2 = nnet.Layer( H1, H2, 'ReLu', withBias = True, Wini = 0.01 ) | |
L3 = nnet.Layer( H2, K, 'softmax', withBias = True, Wini = 0.01 ) | |
mlp = nnet.MLP( [ L1, L2, L3 ] ) | |
return mlp | |
if __name__ == "__main__": | |
np.random.seed( 0 ) | |
##### setting the training data & the validation data | |
# | |
X, label, t = gendat( 'L' ) | |
xm = np.mean( X, axis = 0 ) | |
X -= xm | |
XL, labelL, tL = X[:50000], label[:50000], t[:50000] | |
XV, labelV, tV = X[50000:], label[50000:], t[50000:] | |
NL, D = XL.shape | |
NV, D = XV.shape | |
K = t.shape[1] | |
##### mini batch indicies for stochastic gradient ascent | |
# | |
idx = np.random.permutation( NL ) | |
batchsize = 100 | |
nbatch = NL / batchsize | |
assert( NL % batchsize == 0 ) | |
idxB = np.zeros( ( nbatch, NL ), dtype = bool ) | |
for ib in range( nbatch ): | |
idxB[ib, idx.reshape( ( nbatch, batchsize ))[ib, :]] = True | |
##### training | |
# | |
#mlp = MLP2( D, 500, K ) | |
#mlp = MLP3( D, 500, 1000, K ) | |
mlp = MLP3( D, 1000, 500, K ) | |
eta = 0.1 | |
mu = 0.9 | |
lam = 0.00001 | |
nepoch = 50 | |
print '### training: NL = ', NL, 'NV = ', NV, ' D = ', D, ' K = ', K, ' batchsize = ', batchsize | |
for i in range( nepoch ): | |
# printing error rates etc. | |
if i % 10 == 0: | |
mnLLL, erL = errorrate( mlp, XL, tL, labelL ) | |
mnLLV, erV = errorrate( mlp, XV, tV, labelV ) | |
print '%d | %.4f %.2f | %.4f %.2f' % ( i, mnLLL, erL * 100, mnLLV, erV * 100 ) | |
# training (selecting each batch in random order) | |
for ib in np.random.permutation( nbatch ): | |
ii = idxB[ib, :] | |
mlp.train( XL[ii], tL[ii], eta, mu, lam ) | |
i = nepoch | |
mnLLL, erL = errorrate( mlp, XL, tL, labelL ) | |
mnLLV, erV = errorrate( mlp, XV, tV, labelV ) | |
print '%d | %.4f %.2f | %.4f %.2f' % ( i, mnLLL, erL * 100, mnLLV, erV * 100 ) | |
##### setting the test data | |
# | |
XT, labelT, tT = gendat( 'T' ) | |
XT -= xm | |
NT, D = XT.shape | |
print '# NT = ', NT | |
mnLLT, erT = errorrate( mlp, XT, tT, labelT ) | |
print '%d | %.4f %.2f | %.4f %.2f | %.4f %.2f' % ( i, mnLLL, erL * 100, mnLLV, erV * 100, mnLLT, erT * 100 ) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import theano | |
import theano.tensor as T | |
# activation functions | |
d_afunc = { 'linear': lambda Y: Y, | |
'sigmoid': T.nnet.sigmoid, | |
'softmax': T.nnet.softmax, | |
'ReLu': lambda Y: T.switch( Y > 0, Y, 0 ) } | |
### uniform random numbers for weight initialization | |
# | |
def randomU( shape, a ): | |
# [ -a, a ) | |
return 2 * a * ( np.random.random_sample( shape ) - 0.5 ) | |
### Gaussian random numbers for weight initialization | |
# | |
def randomN( shape, sig ): | |
# N(0,sig) | |
return sig * np.random.standard_normal( shape ) | |
########## Layer ########## | |
class Layer( object ): | |
def __init__( self, Din, Nunit, afunc, withBias = True, Wini = 0.01 ): | |
self.Din = Din | |
self.Nunit = Nunit | |
self.afunc = d_afunc[afunc] | |
self.withBias = withBias | |
# theano shared variables for weights & biases | |
self.W = theano.shared( randomN( ( Nunit, Din ), Wini ) ) | |
self.dW = theano.shared( np.zeros( ( Nunit, Din ) ) ) | |
if withBias: | |
self.b = theano.shared( np.zeros( Nunit ) ) | |
self.db = theano.shared( np.zeros( Nunit ) ) | |
def output( self, X ): | |
if self.withBias: | |
Y = T.dot( X, self.W.T ) + self.b # Ndat x Nunit | |
else: | |
Y = T.dot( X, self.W.T ) | |
Z = self.afunc( Y ) | |
return Y, Z | |
########## MLP ########## | |
class MLP( object ): | |
def __init__( self, Layers ): | |
# layers - list of Layer instances | |
self.Layers = Layers | |
# theano functions | |
self.output = self._Tfunc_output() | |
self.cost = self._Tfunc_cost() | |
self.train = self._Tfunc_train() | |
### theano function for output computation | |
# | |
def _Tfunc_output( self ): | |
X = T.dmatrix() # N x D | |
Y, Z = _T_output( self.Layers, X ) | |
return theano.function( [ X ], [ Y, Z ] ) | |
### theano function for cost computation | |
# | |
def _Tfunc_cost( self ): | |
Z = T.dmatrix() # N x K | |
t = T.dmatrix() # N x K | |
cost = _T_cost( Z, t ) | |
return theano.function( [ Z, t ], cost ) | |
### theano function for gradient descent learning | |
# | |
def _Tfunc_train( self ): | |
X = T.dmatrix( 'X' ) # N x D | |
t = T.dmatrix( 't' ) # N x K | |
eta = T.dscalar( 'eta' ) | |
mu = T.dscalar( 'mu' ) | |
lam = T.dscalar( 'lambda' ) | |
Y, Z = _T_output( self.Layers, X ) | |
cost = T.mean( _T_cost( Z, t ) ) | |
updatesList = [] | |
for layer in self.Layers: | |
gradW = T.grad( cost, layer.W ) | |
#dWnew = -eta * gradW + mu * layer.dW | |
dWnew = -eta * ( gradW + lam * layer.W ) + mu * layer.dW | |
Wnew = layer.W + dWnew | |
updatesList.append( ( layer.W, Wnew ) ) | |
updatesList.append( ( layer.dW, dWnew ) ) | |
if layer.withBias: | |
gradb = T.grad( cost, layer.b ) | |
# no weight decay for bias | |
dbnew = -eta * gradb + mu * layer.db | |
bnew = layer.b + dbnew | |
updatesList.append( ( layer.b, bnew ) ) | |
updatesList.append( ( layer.db, dbnew ) ) | |
return theano.function( [ X, t, eta, mu, lam ], cost, updates = updatesList ) | |
def _T_output( Layers, X ): | |
Zprev = X | |
for layer in Layers: | |
Y, Z = layer.output( Zprev ) | |
Zprev = Z | |
return Y, Z | |
def _T_cost( Z, t ): | |
return T.nnet.categorical_crossentropy( Z, t ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment