Skip to content

Instantly share code, notes, and snippets.

@takatakamanbou
Last active August 29, 2015 14:14
Show Gist options
  • Save takatakamanbou/bf23cd63d9243e8ab610 to your computer and use it in GitHub Desktop.
Save takatakamanbou/bf23cd63d9243e8ab610 to your computer and use it in GitHub Desktop.
import numpy as np
import theano
import theano.tensor as T
import theano.tensor.signal.downsample as Tsd
import nnet0207 as nnet
class Layer():
# afunc: activation function (see nnet)
# Xdim: dimension of the input image ( Xrow, Xcol )
# Xnch: number of the input channels
# Wdim: dimension of the convolution filters ( Wrow, Wcol )
# Wnch: number of the filter channels
# ds: downsampling scale for max-pooling ( ds_vertical, ds_horizontal )
# Wini_range: parameter for weight initialization (see nnet)
#
def __init__( self, afunc, Xdim, Xnch, Wdim, Wnch, ds, Wini_range ):
# parameters of the input
Xrow, Xcol = Xdim
Xshape = ( Xnch, Xrow, Xcol )
self.Xshape = Xshape
# parameters of the convolution layer
Wrow, Wcol = Wdim
Wshape = ( Wnch, Xnch, Wrow, Wcol )
self.Wshape = Wshape
self.ds = ds
Yrow, Ycol = Xrow - Wrow + 1, Xcol - Wcol + 1
Yshape = ( Wnch, Yrow, Ycol )
self.Yshape = Yshape
# parameters of the pooling layer
Zrow = int( np.ceil( float( Yrow ) / ds[0] ) )
Zcol = int( np.ceil( float( Ycol ) / ds[1] ) )
Zshape = ( Wnch, Zrow, Zcol )
self.Zshape = Zshape
self.Dout = Wnch * Zrow * Zcol
# theano shared variables
self.W = theano.shared( nnet.random( Wshape, Wini_range ) )
self.dW = theano.shared( np.zeros( Wshape ) )
# activation function of the layer
self.afunc = nnet.d_afunc[afunc]
def output( self, X ):
# X: Ndat x Xnch x Xrow x Xcol
Xshape = ( None, self.Xshape[0], self.Xshape[1], self.Xshape[2] )
Wshape = self.Wshape
Yconv = T.nnet.conv.conv2d( X, self.W, image_shape = Xshape, filter_shape = Wshape ) # Ndat x Wnch x Yrow x Ycol
Ypool = Tsd.max_pool_2d( Yconv, self.ds ) # Ndat x Wnch x Zrow x Zcol
if self.afunc == 'linear':
Z = Ypool
else:
Z = self.afunc( Ypool )
return Z
import numpy as np
import theano
import theano.tensor as T
import nnet0207 as nnet
import convnet0207 as convnet
class MLP():
# Xdim: ( Xrow, Xcol ) W1dim: ( W1row, W1col )
# ds1: ( ds_v, ds_h ) downscale factor
#
def __init__( self, Xdim, Xnch, W1dim, W1nch, ds1, K ):
# layers
self.L1 = convnet.Layer( 'linear', Xdim, Xnch, W1dim, W1nch, ds1, 0.1 )
#self.L1 = convnet.Layer( 'ReLu', Xdim, Xnch, W1dim, W1nch, ds1, 0.1 )
self.H = self.L1.Dout
self.L2 = nnet.Layer( 'softmax', self.H, K, 0.1 )
# theano functions
self.output = self._Tfunc_output()
self.cost = self._Tfunc_cost()
self.train = self._Tfunc_train()
### theano function for output computation
#
def _Tfunc_output( self ):
X = T.tensor4() # Ndat x Xnch x Xrow x Xcol
Y, Z = _T_output( self.L1, self.L2, X )
return theano.function( [ X ], [ Y, Z ] )
### theano function for cost computation
#
def _Tfunc_cost( self ):
Z = T.dmatrix() # N x K
t = T.dmatrix() # N x K
cost = _T_cost( Z, t )
return theano.function( [ Z, t ], cost )
### theano function for gradient descent learning
#
def _Tfunc_train( self ):
W1, dW1 = self.L1.W, self.L1.dW
W2, dW2, b2, db2 = self.L2.W, self.L2.dW, self.L2.b, self.L2.db
X = T.tensor4( 'X' )
t = T.dmatrix( 't' ) # N x K
eta = T.dscalar( 'eta' )
mu = T.dscalar( 'mu' )
Y2, Z2 = _T_output( self.L1, self.L2, X )
cost = T.mean( _T_cost( Z2, t ) )
gradW1, gradW2, gradb2 = T.grad( cost, [ W1, W2, b2 ] )
dW1_new = -eta * gradW1 + mu * dW1
dW2_new = -eta * gradW2 + mu * dW2
db2_new = -eta * gradb2 + mu * db2
W1_new = W1 + dW1_new
W2_new = W2 + dW2_new
b2_new = b2 + db2_new
updatesList = [
( W1, W1_new ), ( dW1, dW1_new ),
( W2, W2_new ), ( b2, b2_new ), ( dW2, dW2_new ), ( db2, db2_new ) ]
return theano.function( [ X, t, eta, mu ], cost, updates = updatesList )
def _T_output( L1, L2, X ):
Z1 = L1.output( X )
Y2, Z2 = L2.output( Z1.reshape( ( Z1.shape[0], -1 ) ) )
return Y2, Z2
def _T_cost( Z, t ):
return T.nnet.categorical_crossentropy( Z, t )
import numpy as np
import theano
import theano.tensor as T
import nnet0207 as nnet
import convnet0207 as convnet
class MLP():
# Xdim: ( Xrow, Xcol )
# W1dim: ( Wrow, Wcol ) ds1: ( ds_v, ds_h ) downscale factor
# W2dim: ( Wrow, Wcol ) ds2: ( ds_v, ds_h ) downscale factor
#
def __init__( self, Xdim, Xnch, W1dim, W1nch, ds1, W2dim, W2nch, ds2, K ):
# layers
self.L1 = convnet.Layer( 'linear', Xdim, Xnch, W1dim, W1nch, ds1, 0.1 )
Z1nch = self.L1.Zshape[0]
Z1dim = self.L1.Zshape[1:]
self.H1 = self.L1.Dout
self.L2 = convnet.Layer( 'linear', Z1dim, Z1nch, W2dim, W2nch, ds2, 0.1 )
self.H2 = self.L2.Dout
self.L3 = nnet.Layer( 'softmax', self.H2, K, 0.1 )
# theano functions
self.output = self._Tfunc_output()
self.cost = self._Tfunc_cost()
self.train = self._Tfunc_train()
### theano function for output computation
#
def _Tfunc_output( self ):
X = T.tensor4() # Ndat x Xnch x Xrow x Xcol
Y, Z = _T_output( self.L1, self.L2, self.L3, X )
return theano.function( [ X ], [ Y, Z ] )
### theano function for cost computation
#
def _Tfunc_cost( self ):
Z = T.dmatrix() # N x K
t = T.dmatrix() # N x K
cost = _T_cost( Z, t )
return theano.function( [ Z, t ], cost )
### theano function for gradient descent learning
#
def _Tfunc_train( self ):
W1, dW1 = self.L1.W, self.L1.dW
W2, dW2 = self.L2.W, self.L2.dW
W3, dW3, b3, db3 = self.L3.W, self.L3.dW, self.L3.b, self.L3.db
X = T.tensor4( 'X' )
t = T.dmatrix( 't' ) # N x K
eta = T.dscalar( 'eta' )
mu = T.dscalar( 'mu' )
Y3, Z3 = _T_output( self.L1, self.L2, self.L3, X )
cost = T.mean( _T_cost( Z3, t ) )
gradW1, gradW2, gradW3, gradb3 = T.grad( cost, [ W1, W2, W3, b3 ] )
dW1_new = -eta * gradW1 + mu * dW1
dW2_new = -eta * gradW2 + mu * dW2
dW3_new = -eta * gradW3 + mu * dW3
db3_new = -eta * gradb3 + mu * db3
W1_new = W1 + dW1_new
W2_new = W2 + dW2_new
W3_new = W3 + dW3_new
b3_new = b3 + db3_new
updatesList = [
( W1, W1_new ), ( dW1, dW1_new ),
( W2, W2_new ), ( dW2, dW2_new ),
( W3, W3_new ), ( b3, b3_new ), ( dW3, dW3_new ), ( db3, db3_new ) ]
return theano.function( [ X, t, eta, mu ], cost, updates = updatesList )
def _T_output( L1, L2, L3, X ):
Z1 = L1.output( X )
Z2 = L2.output( Z1 )
Y3, Z3 = L3.output( Z2.reshape( ( Z2.shape[0], -1 ) ) )
return Y3, Z3
def _T_cost( Z, t ):
return T.nnet.categorical_crossentropy( Z, t )
import numpy as np
import theano
import theano.tensor as T
import nnet0207 as nnet
class MLP():
def __init__( self, D, H, K ):
# layers
self.L1 = nnet.Layer( 'ReLu', D, H, 0.1 )
self.L2 = nnet.Layer( 'softmax', H, K, 0.1 )
# theano functions
self.output = self._Tfunc_output()
self.cost = self._Tfunc_cost()
self.train = self._Tfunc_train()
### theano function for output computation
#
def _Tfunc_output( self ):
X = T.dmatrix() # N x D
Y, Z = _T_output( self.L1, self.L2, X )
return theano.function( [ X ], [ Y, Z ] )
### theano function for cost computation
#
def _Tfunc_cost( self ):
Z = T.dmatrix() # N x K
t = T.dmatrix() # N x K
cost = _T_cost( Z, t )
return theano.function( [ Z, t ], cost )
### theano function for gradient descent learning
#
def _Tfunc_train( self ):
W1, dW1, b1, db1 = self.L1.W, self.L1.dW, self.L1.b, self.L1.db
W2, dW2, b2, db2 = self.L2.W, self.L2.dW, self.L2.b, self.L2.db
X = T.dmatrix( 'X' ) # N x D
t = T.dmatrix( 't' ) # N x K
eta = T.dscalar( 'eta' )
mu = T.dscalar( 'mu' )
Y2, Z2 = _T_output( self.L1, self.L2, X )
cost = T.mean( _T_cost( Z2, t ) )
gradW1, gradb1, gradW2, gradb2 = T.grad( cost, [ W1, b1, W2, b2 ] )
dW1_new = -eta * gradW1 + mu * dW1
db1_new = -eta * gradb1 + mu * db1
dW2_new = -eta * gradW2 + mu * dW2
db2_new = -eta * gradb2 + mu * db2
W1_new = W1 + dW1_new
b1_new = b1 + db1_new
W2_new = W2 + dW2_new
b2_new = b2 + db2_new
updatesList = [
( W1, W1_new ), ( b1, b1_new ), ( dW1, dW1_new ), ( db1, db1_new ),
( W2, W2_new ), ( b2, b2_new ), ( dW2, dW2_new ), ( db2, db2_new ) ]
return theano.function( [ X, t, eta, mu ], cost, updates = updatesList )
def _T_output( L1, L2, X ):
Y1, Z1 = L1.output( X )
Y2, Z2 = L2.output( Z1 )
return Y2, Z2
def _T_cost( Z, t ):
return T.nnet.categorical_crossentropy( Z, t )
import numpy as np
import theano
import theano.tensor as T
import nnet0207 as nnet
class MLP():
def __init__( self, D, H1, H2, K ):
# layers
self.L1 = nnet.Layer( 'ReLu', D, H1, 0.1 )
self.L2 = nnet.Layer( 'ReLu', H1, H2, 0.1 )
self.L3 = nnet.Layer( 'softmax', H2, K, 0.1 )
# theano functions
self.output = self._Tfunc_output()
self.cost = self._Tfunc_cost()
self.train = self._Tfunc_train()
### theano function for output computation
#
def _Tfunc_output( self ):
X = T.dmatrix() # N x D
Y, Z = _T_output( self.L1, self.L2, self.L3, X )
return theano.function( [ X ], [ Y, Z ] )
### theano function for cost computation
#
def _Tfunc_cost( self ):
Z = T.dmatrix() # N x K
t = T.dmatrix() # N x K
cost = _T_cost( Z, t )
return theano.function( [ Z, t ], cost )
### theano function for gradient descent learning
#
def _Tfunc_train( self ):
W1, dW1, b1, db1 = self.L1.W, self.L1.dW, self.L1.b, self.L1.db
W2, dW2, b2, db2 = self.L2.W, self.L2.dW, self.L2.b, self.L2.db
W3, dW3, b3, db3 = self.L3.W, self.L3.dW, self.L3.b, self.L3.db
X = T.dmatrix( 'X' ) # N x D
t = T.dmatrix( 't' ) # N x K
eta = T.dscalar( 'eta' )
mu = T.dscalar( 'mu' )
Y3, Z3 = _T_output( self.L1, self.L2, self.L3, X )
cost = T.mean( _T_cost( Z3, t ) )
grad = T.grad( cost, [ W1, b1, W2, b2, W3, b3 ] )
gradW1, gradb1, gradW2, gradb2, gradW3, gradb3 = grad
dW1_new = -eta * gradW1 + mu * dW1
db1_new = -eta * gradb1 + mu * db1
dW2_new = -eta * gradW2 + mu * dW2
db2_new = -eta * gradb2 + mu * db2
dW3_new = -eta * gradW3 + mu * dW3
db3_new = -eta * gradb3 + mu * db3
W1_new = W1 + dW1_new
b1_new = b1 + db1_new
W2_new = W2 + dW2_new
b2_new = b2 + db2_new
W3_new = W3 + dW3_new
b3_new = b3 + db3_new
updatesList = [
( W1, W1_new ), ( b1, b1_new ), ( dW1, dW1_new ), ( db1, db1_new ),
( W2, W2_new ), ( b2, b2_new ), ( dW2, dW2_new ), ( db2, db2_new ),
( W3, W3_new ), ( b3, b3_new ), ( dW3, dW3_new ), ( db3, db3_new ),
]
return theano.function( [ X, t, eta, mu ], cost, updates = updatesList )
def _T_output( L1, L2, L3, X ):
Y1, Z1 = L1.output( X )
Y2, Z2 = L2.output( Z1 )
Y3, Z3 = L3.output( Z2 )
return Y3, Z3
def _T_cost( Z, t ):
return T.nnet.categorical_crossentropy( Z, t )
import numpy as np
import scipy as sp
import mnist0117 as mnist
import convnet_2layer0207 as convnet_2layer
import convnet_3layer0207 as convnet_3layer
def gendat( LT ):
mn = mnist.MNIST( LT )
label = mn.getLabel()
N = label.shape[0]
K = 10
tmp = mn.getImage() / 255 # => in [0,1]
X = tmp.reshape( ( tmp.shape[0], 1, tmp.shape[1], tmp.shape[2] ) )
t = np.zeros( ( N, K ), dtype = bool )
for ik in range( K ):
t[label == ik, ik] = True
return X, label, t
def errorrate( mlp, X, t, label ):
Y, Z = mlp.output( X )
mnLL = np.mean( mlp.cost( Z, t ) )
er = np.mean( label != np.argmax( Z, axis = 1 ) )
return mnLL, er
if __name__ == "__main__":
np.random.seed( 0 )
##### setting the training data & the validation data
#
X, label, t = gendat( 'L' )
XL, labelL, tL = X[:50000], label[:50000], t[:50000]
XV, labelV, tV = X[50000:], label[50000:], t[50000:]
NL, Xnch, Xrow, Xcol = XL.shape
NV, Xnch, Xrow, Xcol = XV.shape
K = t.shape[1]
Xdim = ( Xrow, Xcol )
##### mini batch indicies for stochastic gradient ascent
#
idx = np.random.permutation( NL )
batchsize = 100
nbatch = NL / batchsize
assert( NL % batchsize == 0 )
idxB = np.zeros( ( nbatch, NL ), dtype = bool )
for ib in range( nbatch ):
idxB[ib, idx.reshape( ( nbatch, batchsize ))[ib, :]] = True
##### training
#
W1dim, W1nch, ds1 = ( 5, 5 ), 16, ( 4, 4 )
W2dim, W2nch, ds2 = None, None, None
#W2dim, W2nch, ds2 = ( 5, 5 ), 16, ( 4, 4 )
eta, mu = 0.05, 0.8
nepoch = 50
if W2dim == None:
mlp = convnet_2layer.MLP( Xdim, Xnch, W1dim, W1nch, ds1, K )
print '### 2-layer convnet'
print '# Xdim:', Xdim, ' Xnch:', Xnch, ' W1dim:', W1dim, ' W1nch:', W1nch, ' ds1:', ds1, ' H:', mlp.H
else:
mlp = convnet_3layer.MLP( Xdim, Xnch, W1dim, W1nch, ds1, W2dim, W2nch, ds2, K )
print '### 3-layer convnet'
print '# Xdim:', Xdim, ' Xnch:', Xnch, ' W1dim:', W1dim, ' W1nch:', W1nch, ' ds1:', ds1, ' H1:', mlp.H1
print '# W2dim:', W2dim, ' W2nch:', W2nch, ' ds2:', ds2, ' H2:', mlp.H2
print '### training: NL = ', NL, ' NV = ', NV, ' K = ', K, ' batchsize = ', batchsize
for i in range( nepoch ):
# printing error rates etc.
if i % 10 == 0:
mnLLL, erL = errorrate( mlp, XL, tL, labelL )
mnLLV, erV = errorrate( mlp, XV, tV, labelV )
print i, mnLLL, erL * 100, erV * 100
# training (selecting each batch in random order)
for ib in np.random.permutation( nbatch ):
ii = idxB[ib, :]
mlp.train( XL[ii], tL[ii], eta, mu )
i = nepoch
mnLLL, erL = errorrate( mlp, XL, tL, labelL )
mnLLV, erV = errorrate( mlp, XV, tV, labelV )
print i, mnLLL, erL * 100, erV * 100
##### setting the test data
#
XT, labelT, tT = gendat( 'T' )
NT, Nstack, Xrow, Xcol = XT.shape
print '# NT = ', NT
mnLLT, erT = errorrate( mlp, XT, tT, labelT )
print i, mnLLT, erL * 100, erV * 100, erT * 100
import numpy as np
import scipy as sp
import mnist0117 as mnist
def gendat( LT ):
mn = mnist.MNIST( LT )
label = mn.getLabel()
N = label.shape[0]
K = 10
X = mn.getImage().reshape( ( N, -1 ) ) / 255 # => in [0,1]
t = np.zeros( ( N, K ), dtype = bool )
for ik in range( K ):
t[label == ik, ik] = True
return X, label, t
def errorrate( mlp, X, t, label ):
Y, Z = mlp.output( X )
mnLL = np.mean( mlp.cost( Z, t ) )
er = np.mean( label != np.argmax( Z, axis = 1 ) )
return mnLL, er
if __name__ == "__main__":
import mlp_2layer0207 as mlp_2layer
import mlp_3layer0207 as mlp_3layer
np.random.seed( 0 )
##### setting the training data & the validation data
#
X, label, t = gendat( 'L' )
XL, labelL, tL = X[:50000], label[:50000], t[:50000]
XV, labelV, tV = X[50000:], label[50000:], t[50000:]
NL, D = XL.shape
NV, D = XV.shape
K = t.shape[1]
##### mini batch indicies for stochastic gradient ascent
#
idx = np.random.permutation( NL )
batchsize = 1000
nbatch = NL / batchsize
assert( NL % batchsize == 0 )
idxB = np.zeros( ( nbatch, NL ), dtype = bool )
for ib in range( nbatch ):
idxB[ib, idx.reshape( ( nbatch, batchsize ))[ib, :]] = True
##### training
#
#H1, H2 = 500, 0
H1, H2 = 500, 1000
#H1, H2 = 1000, 500
eta = 0.5
mu = 0.8
nepoch = 20
if H2 <= 0:
mlp = mlp_2layer.MLP( D, H1, K )
print '### 2-layer MLP: D = ', D, ' H = ', H1, ' K = ', K
else:
mlp = mlp_3layer.MLP( D, H1, H2, K )
print '### 3-layer MLP: D = ', D, ' H1 = ', H1, ' H2 = ', H2, ' K = ', K
print '### training: NL = ', NL, 'NV = ', NV, ' D = ', D, ' K = ', K, ' batchsize = ', batchsize
for i in range( nepoch ):
# printing error rates etc.
if i % 10 == 0:
mnLLL, erL = errorrate( mlp, XL, tL, labelL )
mnLLV, erV = errorrate( mlp, XV, tV, labelV )
print i, mnLLL, erL * 100, erV * 100
# training (selecting each batch in random order)
for ib in np.random.permutation( nbatch ):
ii = idxB[ib, :]
mlp.train( XL[ii], tL[ii], eta, mu )
i = nepoch
mnLLL, erL = errorrate( mlp, XL, tL, labelL )
mnLLV, erV = errorrate( mlp, XV, tV, labelV )
print i, mnLLL, erL * 100, erV * 100
##### setting the test data
#
XT, labelT, tT = gendat( 'T' )
NT, D = XT.shape
print '# NT = ', NT
mnLLT, erT = errorrate( mlp, XT, tT, labelT )
print i, mnLLT, erL * 100, erV * 100, erT * 100
import numpy as np
import theano
import theano.tensor as T
d_afunc = { 'linear': 'linear',
'sigmoid': T.nnet.sigmoid,
'softmax': T.nnet.softmax,
'ReLu': lambda Y: T.switch( Y > 0, Y, 0 ) }
class Layer():
def __init__( self, afunc, Din, Nunit, Wini_range ):
self.Din = Din
self.Nunit = Nunit
# theano shared variables for weights & biases
self.W = theano.shared( random( ( Nunit, Din ), Wini_range ) )
self.b = theano.shared( random( Nunit, Wini_range ) )
self.dW = theano.shared( np.zeros( ( Nunit, Din ) ) )
self.db = theano.shared( np.zeros( Nunit ) )
# activation function of the layer
self.afunc = d_afunc[afunc]
def output( self, X ):
Y = T.dot( X, self.W.T ) + self.b # Ndat x Nunit
if self.afunc == 'linear':
Z = Y
else:
Z = self.afunc( Y )
return Y, Z
### random numbers for weight initialization
#
def random( shape, r ):
# [ -r/2, r/2 )
return r * ( np.random.random_sample( shape ) - 0.5 )
### Rectified Linear activation function
#
def relu( Y ):
return T.switch( Y > 0, Y, 0 )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment