Skip to content

Instantly share code, notes, and snippets.

@takatakamanbou
Created February 17, 2015 05:24
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save takatakamanbou/e1ca2b82632d0f7877e6 to your computer and use it in GitHub Desktop.
Save takatakamanbou/e1ca2b82632d0f7877e6 to your computer and use it in GitHub Desktop.
import numpy as np
import theano
import theano.tensor as T
import theano.tensor.signal.downsample as Tsd
import nnet0211 as nnet
########## Convolution Layer ##########
class ConvLayer( object ):
def __init__( self, Xdim, Wdim, afunc, withBias, Wini = 0.01 ):
# dimension of the input
Xnch, Xrow, Xcol = Xdim
self.Xshape = Xdim
# dimension of the convolution filters
Wnch, Wrow, Wcol = Wdim
self.Wshape = ( Wnch, Xnch, Wrow, Wcol )
# dimension of the output
Yrow, Ycol = Xrow - Wrow + 1, Xcol - Wcol + 1
self.Yshape = ( Wnch, Yrow, Ycol )
self.Dout = Wnch * Yrow * Ycol
# activation function of the layer
self.afunc = nnet.d_afunc[afunc]
self.withBias = withBias
# theano shared variables
self.W = theano.shared( nnet.randomN( self.Wshape, Wini ) )
self.dW = theano.shared( np.zeros( self.Wshape ) )
if withBias:
self.b = theano.shared( np.zeros( Wnch ) )
self.db = theano.shared( np.zeros( Wnch ) )
def output( self, X ):
# X: Ndat x Xshape, Y: Ndat x Yshape
Xs = ( None, self.Xshape[0], self.Xshape[1], self.Xshape[2] )
Ws = self.Wshape
Y = T.nnet.conv.conv2d( X, self.W, image_shape = Xs, filter_shape = Ws )
if self.withBias:
b = self.b.dimshuffle( 'x', 0, 'x', 'x' ) # 1 x nch x 1 x 1
Y += b
Z = self.afunc( Y )
return Y, Z # Ndat x Yshape
########## Pooling Layer ##########
class PoolLayer( object ):
def __init__( self, Xdim, ds, afunc, withBias ):
# dimension of the input
Xnch, Xrow, Xcol = Xdim
self.Xshape = Xdim
# parameters of the pooling layer
self.ds = ds
# assuming ignore_border = False
Yrow = int( np.ceil( float( Xrow ) / ds[0] ) )
Ycol = int( np.ceil( float( Xcol ) / ds[1] ) )
self.Yshape = ( Xnch, Yrow, Ycol )
self.Dout = Xnch * Yrow * Ycol
# activation function of the layer
self.afunc = nnet.d_afunc[afunc]
self.withBias = withBias
# theano shared variables
if withBias:
self.b = theano.shared( np.zeros( Xnch ) )
self.db = theano.shared( np.zeros( Xnch ) )
def output( self, X ):
# X: Ndat x Xshape
Y = Tsd.max_pool_2d( X, self.ds ) # Ndat x Yshape
if self.withBias:
b = self.b.dimshuffle( 'x', 0, 'x', 'x' ) # 1 x nch x 1 x 1
Y += b
Z = self.afunc( Y )
return Y, Z
########## Full-Connection Layer ##########
class FullLayer( nnet.Layer ):
def __init__( self, Din, Nunit, afunc, withBias = True, Wini = 0.01,
T4toMat = False ):
super( FullLayer, self ).__init__( Din, Nunit, afunc, withBias, Wini )
self.T4toMat = T4toMat
def super_output( self, X ):
return super( FullLayer, self ).output( X )
def output( self, X ):
if self.T4toMat:
return self.super_output( X.reshape( ( X.shape[0], -1 ) ) )
else:
return self.super_output( X )
########## Convolutional Neural Net ##########
class CNN( object ):
def __init__( self, Layers ):
# layers - list of Layer instances
self.Layers = Layers
# theano functions
self.output = self._Tfunc_output()
self.cost = self._Tfunc_cost()
self.train = self._Tfunc_train()
### theano function for output computation
#
def _Tfunc_output( self ):
X = T.tensor4() # Ndat x Xnch x Xrow x Xcol
Y, Z = nnet._T_output( self.Layers, X )
return theano.function( [ X ], [ Y, Z ] )
### theano function for cost computation
#
def _Tfunc_cost( self ):
Z = T.dmatrix() # N x K
t = T.dmatrix() # N x K
cost = nnet._T_cost( Z, t )
return theano.function( [ Z, t ], cost )
### theano function for gradient descent learning
#
def _Tfunc_train( self ):
X = T.tensor4( 'X' )
t = T.dmatrix( 't' )
eta = T.dscalar( 'eta' )
mu = T.dscalar( 'mu' )
lam = T.dscalar( 'lambda' )
Y, Z = nnet._T_output( self.Layers, X )
cost = T.mean( nnet._T_cost( Z, t ) )
updatesList = []
for layer in self.Layers:
# PoolLayer doesn't have W & dW
if not isinstance( layer, PoolLayer ):
gradW = T.grad( cost, layer.W )
#dWnew = -eta * gradW + mu * layer.dW
dWnew = -eta * ( gradW + lam * layer.W ) + mu * layer.dW
Wnew = layer.W + dWnew
updatesList.append( ( layer.W, Wnew ) )
updatesList.append( ( layer.dW, dWnew ) )
if layer.withBias:
gradb = T.grad( cost, layer.b )
# no weight decay for bias
dbnew = -eta * gradb + mu * layer.db
bnew = layer.b + dbnew
updatesList.append( ( layer.b, bnew ) )
updatesList.append( ( layer.db, dbnew ) )
return theano.function( [ X, t, eta, mu, lam ], cost, updates = updatesList )
import numpy as np
import scipy as sp
import mnist0117 as mnist
import convnet0211 as convnet
def gendat( LT ):
mn = mnist.MNIST( LT )
label = mn.getLabel()
N = label.shape[0]
K = 10
tmp = mn.getImage() / 255 # => in [0,1]
X = tmp.reshape( ( tmp.shape[0], 1, tmp.shape[1], tmp.shape[2] ) )
t = np.zeros( ( N, K ), dtype = bool )
for ik in range( K ):
t[label == ik, ik] = True
return X, label, t
def errorrate( mlp, X, t, label ):
Y, Z = mlp.output( X )
mnLL = np.mean( mlp.cost( Z, t ) )
er = np.mean( label != np.argmax( Z, axis = 1 ) )
return mnLL, er
# Conv-Pool-Softmax
def CPS( Xnch, Xrow, Xcol, K ):
Xdim = ( Xnch, Xrow, Xcol )
W1dim = ( 16, 5, 5 )
ds1 = ( 4, 4 )
L1conv = convnet.ConvLayer( Xdim, W1dim, 'linear', withBias = False )
#L1pool = convnet.PoolLayer( L1conv.Yshape, ds1, 'linear', withBias = False )
L1pool = convnet.PoolLayer( L1conv.Yshape, ds1, 'ReLu', withBias = True )
H1 = L1pool.Dout
L2 = convnet.FullLayer( H1, K, 'softmax', withBias = True, T4toMat = True )
cnn = convnet.CNN( [ L1conv, L1pool, L2 ] )
print '### Conv-Pool-Softmax Xdim:', Xdim
print '# W1dim:', W1dim, ' ds1:', ds1, ' H1:', H1
return cnn
# Conv-Pool-Conv-Pool-Softmax
def CPCPS( Xnch, Xrow, Xcol, K ):
Xdim = ( Xnch, Xrow, Xcol )
W1dim = ( 16, 5, 5 )
ds1 = ( 4, 4 )
#ds1 = ( 2, 2 )
W2dim = ( 16, 5, 5 )
ds2 = ( 4, 4 )
#ds2 = ( 2, 2 )
L1conv = convnet.ConvLayer( Xdim, W1dim, 'linear', withBias = False )
#L1pool = convnet.PoolLayer( L1conv.Yshape, ds1, 'linear', withBias = False )
L1pool = convnet.PoolLayer( L1conv.Yshape, ds1, 'ReLu', withBias = True )
H1 = L1pool.Dout
L2conv = convnet.ConvLayer( L1pool.Yshape, W2dim, 'linear', withBias = False )
#L2pool = convnet.PoolLayer( L2conv.Yshape, ds2, 'linear', withBias = False )
L2pool = convnet.PoolLayer( L2conv.Yshape, ds2, 'ReLu', withBias = True )
H2 = L2pool.Dout
L3 = convnet.FullLayer( H2, K, 'softmax', withBias = True, T4toMat = True )
cnn = convnet.CNN( [ L1conv, L1pool, L2conv, L2pool, L3 ] )
print '### Conv-Pool-Conv-Pool-Softmax Xdim:', Xdim
print '# W1dim:', W1dim, ' ds1:', ds1, ' H1:', H1
print '# W2dim:', W2dim, ' ds2:', ds2, ' H2:', H2
return cnn
# Conv-Pool-ReLu-Softmax
def CPRS( Xnch, Xrow, Xcol, K ):
Xdim = ( Xnch, Xrow, Xcol )
W1dim = ( 16, 5, 5 )
#W1dim = ( 64, 5, 5 )
ds1 = ( 4, 4 )
L1conv = convnet.ConvLayer( Xdim, W1dim, 'linear', withBias = False )
#L1pool = convnet.PoolLayer( L1conv.Yshape, ds1, 'linear', withBias = False )
L1pool = convnet.PoolLayer( L1conv.Yshape, ds1, 'ReLu', withBias = True )
H1 = L1pool.Dout
H2 = 400
L2 = convnet.FullLayer( H1, H2, 'ReLu', withBias = True, T4toMat = True )
L3 = convnet.FullLayer( H2, K, 'softmax', withBias = True, T4toMat = False )
cnn = convnet.CNN( [ L1conv, L1pool, L2, L3 ] )
print '### Conv-Pool-ReLu-Softmax Xdim:', Xdim
print '# W1dim:', W1dim, ' ds1:', ds1, ' H1:', H1
print '# H2:', H2
return cnn
if __name__ == "__main__":
np.random.seed( 0 )
##### setting the training data & the validation data
#
X, label, t = gendat( 'L' )
xm = np.mean( X, axis = 0 )
X -= xm
XL, labelL, tL = X[:50000], label[:50000], t[:50000]
XV, labelV, tV = X[50000:], label[50000:], t[50000:]
NL, Xnch, Xrow, Xcol = XL.shape
NV, Xnch, Xrow, Xcol = XV.shape
K = t.shape[1]
Xdim = ( Xrow, Xcol )
##### mini batch indicies for stochastic gradient ascent
#
idx = np.random.permutation( NL )
batchsize = 100
nbatch = NL / batchsize
assert( NL % batchsize == 0 )
idxB = np.zeros( ( nbatch, NL ), dtype = bool )
for ib in range( nbatch ):
idxB[ib, idx.reshape( ( nbatch, batchsize ))[ib, :]] = True
##### training
#
#cnn = CPS( Xnch, Xrow, Xcol, K )
cnn = CPCPS( Xnch, Xrow, Xcol, K )
#cnn = CPRS( Xnch, Xrow, Xcol, K )
eta, mu, lam = 0.05, 0.9, 0.0
nepoch = 50
print '### training: NL = ', NL, ' NV = ', NV, ' K = ', K, ' batchsize = ', batchsize
for i in range( nepoch ):
# printing error rates etc.
if i % 10 == 0:
mnLLL, erL = errorrate( cnn, XL, tL, labelL )
mnLLV, erV = errorrate( cnn, XV, tV, labelV )
print '%d %.4f %.2f %.4f %.2f' % ( i, mnLLL, erL * 100, mnLLV, erV * 100 )
# training (selecting each batch in random order)
for ib in np.random.permutation( nbatch ):
ii = idxB[ib, :]
cnn.train( XL[ii], tL[ii], eta, mu, lam )
i = nepoch
mnLLL, erL = errorrate( cnn, XL, tL, labelL )
mnLLV, erV = errorrate( cnn, XV, tV, labelV )
print '%d %.4f %.2f %.4f %.2f' % ( i, mnLLL, erL * 100, mnLLV, erV * 100 )
##### setting the test data
#
XT, labelT, tT = gendat( 'T' )
XT -= xm
NT, Nstack, Xrow, Xcol = XT.shape
print '# NT = ', NT
mnLLT, erT = errorrate( cnn, XT, tT, labelT )
print '%d %.4f %.2f %.4f %.2f %.4f %.2f' % ( i, mnLLL, erL * 100, mnLLV, erV * 100, mnLLT, erT * 100 )
import numpy as np
import scipy as sp
import mnist0117 as mnist
import nnet0211 as nnet
def gendat( LT ):
mn = mnist.MNIST( LT )
label = mn.getLabel()
N = label.shape[0]
K = 10
X = mn.getImage().reshape( ( N, -1 ) ) / 255 # => in [0,1]
t = np.zeros( ( N, K ), dtype = bool )
for ik in range( K ):
t[label == ik, ik] = True
return X, label, t
def errorrate( mlp, X, t, label ):
Y, Z = mlp.output( X )
mnLL = np.mean( mlp.cost( Z, t ) )
er = np.mean( label != np.argmax( Z, axis = 1 ) )
return mnLL, er
def MLP2( D, H1, K ):
print '### 2-layer MLP: D =', D, ' H =', H1, ' K =', K
L1 = nnet.Layer( D, H1, 'ReLu', withBias = True, Wini = 0.01 )
L2 = nnet.Layer( H1, K, 'softmax', withBias = True, Wini = 0.01 )
mlp = nnet.MLP( [ L1, L2 ] )
return mlp
def MLP3( D, H1, H2, K ):
print '### 3-layer MLP: D =', D, ' H1 =', H1, ' H2 =', H2, ' K =', K
L1 = nnet.Layer( D, H1, 'ReLu', withBias = True, Wini = 0.01 )
L2 = nnet.Layer( H1, H2, 'ReLu', withBias = True, Wini = 0.01 )
L3 = nnet.Layer( H2, K, 'softmax', withBias = True, Wini = 0.01 )
mlp = nnet.MLP( [ L1, L2, L3 ] )
return mlp
if __name__ == "__main__":
np.random.seed( 0 )
##### setting the training data & the validation data
#
X, label, t = gendat( 'L' )
xm = np.mean( X, axis = 0 )
X -= xm
XL, labelL, tL = X[:50000], label[:50000], t[:50000]
XV, labelV, tV = X[50000:], label[50000:], t[50000:]
NL, D = XL.shape
NV, D = XV.shape
K = t.shape[1]
##### mini batch indicies for stochastic gradient ascent
#
idx = np.random.permutation( NL )
batchsize = 100
nbatch = NL / batchsize
assert( NL % batchsize == 0 )
idxB = np.zeros( ( nbatch, NL ), dtype = bool )
for ib in range( nbatch ):
idxB[ib, idx.reshape( ( nbatch, batchsize ))[ib, :]] = True
##### training
#
#mlp = MLP2( D, 500, K )
#mlp = MLP3( D, 500, 1000, K )
mlp = MLP3( D, 1000, 500, K )
eta = 0.1
mu = 0.9
lam = 0.00001
nepoch = 50
print '### training: NL = ', NL, 'NV = ', NV, ' D = ', D, ' K = ', K, ' batchsize = ', batchsize
for i in range( nepoch ):
# printing error rates etc.
if i % 10 == 0:
mnLLL, erL = errorrate( mlp, XL, tL, labelL )
mnLLV, erV = errorrate( mlp, XV, tV, labelV )
print '%d | %.4f %.2f | %.4f %.2f' % ( i, mnLLL, erL * 100, mnLLV, erV * 100 )
# training (selecting each batch in random order)
for ib in np.random.permutation( nbatch ):
ii = idxB[ib, :]
mlp.train( XL[ii], tL[ii], eta, mu, lam )
i = nepoch
mnLLL, erL = errorrate( mlp, XL, tL, labelL )
mnLLV, erV = errorrate( mlp, XV, tV, labelV )
print '%d | %.4f %.2f | %.4f %.2f' % ( i, mnLLL, erL * 100, mnLLV, erV * 100 )
##### setting the test data
#
XT, labelT, tT = gendat( 'T' )
XT -= xm
NT, D = XT.shape
print '# NT = ', NT
mnLLT, erT = errorrate( mlp, XT, tT, labelT )
print '%d | %.4f %.2f | %.4f %.2f | %.4f %.2f' % ( i, mnLLL, erL * 100, mnLLV, erV * 100, mnLLT, erT * 100 )
import numpy as np
import theano
import theano.tensor as T
# activation functions
d_afunc = { 'linear': lambda Y: Y,
'sigmoid': T.nnet.sigmoid,
'softmax': T.nnet.softmax,
'ReLu': lambda Y: T.switch( Y > 0, Y, 0 ) }
### uniform random numbers for weight initialization
#
def randomU( shape, a ):
# [ -a, a )
return 2 * a * ( np.random.random_sample( shape ) - 0.5 )
### Gaussian random numbers for weight initialization
#
def randomN( shape, sig ):
# N(0,sig)
return sig * np.random.standard_normal( shape )
########## Layer ##########
class Layer( object ):
def __init__( self, Din, Nunit, afunc, withBias = True, Wini = 0.01 ):
self.Din = Din
self.Nunit = Nunit
self.afunc = d_afunc[afunc]
self.withBias = withBias
# theano shared variables for weights & biases
self.W = theano.shared( randomN( ( Nunit, Din ), Wini ) )
self.dW = theano.shared( np.zeros( ( Nunit, Din ) ) )
if withBias:
self.b = theano.shared( np.zeros( Nunit ) )
self.db = theano.shared( np.zeros( Nunit ) )
def output( self, X ):
if self.withBias:
Y = T.dot( X, self.W.T ) + self.b # Ndat x Nunit
else:
Y = T.dot( X, self.W.T )
Z = self.afunc( Y )
return Y, Z
########## MLP ##########
class MLP( object ):
def __init__( self, Layers ):
# layers - list of Layer instances
self.Layers = Layers
# theano functions
self.output = self._Tfunc_output()
self.cost = self._Tfunc_cost()
self.train = self._Tfunc_train()
### theano function for output computation
#
def _Tfunc_output( self ):
X = T.dmatrix() # N x D
Y, Z = _T_output( self.Layers, X )
return theano.function( [ X ], [ Y, Z ] )
### theano function for cost computation
#
def _Tfunc_cost( self ):
Z = T.dmatrix() # N x K
t = T.dmatrix() # N x K
cost = _T_cost( Z, t )
return theano.function( [ Z, t ], cost )
### theano function for gradient descent learning
#
def _Tfunc_train( self ):
X = T.dmatrix( 'X' ) # N x D
t = T.dmatrix( 't' ) # N x K
eta = T.dscalar( 'eta' )
mu = T.dscalar( 'mu' )
lam = T.dscalar( 'lambda' )
Y, Z = _T_output( self.Layers, X )
cost = T.mean( _T_cost( Z, t ) )
updatesList = []
for layer in self.Layers:
gradW = T.grad( cost, layer.W )
#dWnew = -eta * gradW + mu * layer.dW
dWnew = -eta * ( gradW + lam * layer.W ) + mu * layer.dW
Wnew = layer.W + dWnew
updatesList.append( ( layer.W, Wnew ) )
updatesList.append( ( layer.dW, dWnew ) )
if layer.withBias:
gradb = T.grad( cost, layer.b )
# no weight decay for bias
dbnew = -eta * gradb + mu * layer.db
bnew = layer.b + dbnew
updatesList.append( ( layer.b, bnew ) )
updatesList.append( ( layer.db, dbnew ) )
return theano.function( [ X, t, eta, mu, lam ], cost, updates = updatesList )
def _T_output( Layers, X ):
Zprev = X
for layer in Layers:
Y, Z = layer.output( Zprev )
Zprev = Z
return Y, Z
def _T_cost( Z, t ):
return T.nnet.categorical_crossentropy( Z, t )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment