Skip to content

Instantly share code, notes, and snippets.

@takatakamanbou
Created July 12, 2015 14:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save takatakamanbou/01ed577cd35a9890dbe9 to your computer and use it in GitHub Desktop.
Save takatakamanbou/01ed577cd35a9890dbe9 to your computer and use it in GitHub Desktop.
import numpy as np
import scipy as sp
import datetime
import cifar10
import convnet150712 as convnet
# ZCA whitening
def ZCAtrans( Xraw, Uzca = None ):
Xraw2 = Xraw.reshape( ( Xraw.shape[0], -1 ) )
if Uzca == None:
# Xraw is assumed to be zero-mean
C = np.dot( Xraw2.T, Xraw2 ) / Xraw2.shape[0]
U, eva, V = np.linalg.svd( C ) # U[:, i] is the i-th eigenvector
sqeva = np.sqrt( eva + 0.001 )
Uzca = np.dot( U / sqeva[np.newaxis, :], U.T )
X = np.dot( Xraw2, Uzca ).reshape( Xraw.shape )
return X, Uzca
else:
X = np.dot( Xraw2, Uzca ).reshape( Xraw.shape )
return X
# mini batch indicies for stochastic gradient ascent
def makebatchindex( N, batchsize ):
idx = np.random.permutation( N )
nbatch = int( np.ceil( float( N ) / batchsize ) )
idxB = np.zeros( ( nbatch, N ), dtype = bool )
for ib in range( nbatch - 1 ):
idxB[ib, idx[ib*batchsize:(ib+1)*batchsize]] = True
ib = nbatch - 1
idxB[ib, idx[ib*batchsize:]] = True
return idxB
# computing the recognition rate
def recograte( cnn, X, label, batchsize ):
N = X.shape[0]
nbatch = int( np.ceil( float( N ) / batchsize ) )
LL = 0.0
cnt = 0
for ib in range( nbatch - 1 ):
ii = np.arange( ib*batchsize, (ib+1)*batchsize )
Y, Z = cnn.output( X[ii] )
LL += np.sum( cnn.cost( Z, label[ii] ) )
cnt += np.sum( label[ii] == np.argmax( Z, axis = 1 ) )
ib = nbatch - 1
ii = np.arange( ib*batchsize, N )
Y, Z = cnn.output( X[ii] )
LL += np.sum( cnn.cost( Z, label[ii] ) )
cnt += np.sum( label[ii] == np.argmax( Z, axis = 1 ) )
return LL / N, float( cnt ) / N
# Conv-Pool-ReLu-Softmax
def CPRS( Xnch, Xrow, Xcol, K ):
Xdim = ( Xnch, Xrow, Xcol )
W1dim = ( 16, 5, 5 )
#W1dim = ( 64, 5, 5 )
#W1dim = ( 256, 5, 5 )
ds1 = ( 4, 4 )
#ds1 = ( 2, 2 )
#st1 = None
#st1 = ( 4, 4 )
st1 = ( 2, 2 )
L1conv = convnet.ConvLayer( Xdim, W1dim, 'linear', withBias = False )
L1pool = convnet.PoolLayer( L1conv.Yshape, ds1, 'ReLu', withBias = True, st = st1 )
H1 = L1pool.Dout
H2 = 1000
L2 = convnet.FullLayer( H1, H2, 'ReLu', withBias = True, T4toMat = True )
L3 = convnet.FullLayer( H2, K, 'softmax', withBias = True, T4toMat = False )
cnn = convnet.CNN( [ L1conv, L1pool, L2, L3 ] )
print '### Conv-Pool-ReLu-Softmax Xdim:', Xdim
print '# W1dim:', W1dim, ' ds1:', ds1, ' st1:', st1, ' H1:', H1
print '# H2:', H2
return cnn
# Conv-Pool-ReLu-ReLu-Softmax
def CPRRS( Xnch, Xrow, Xcol, K ):
Xdim = ( Xnch, Xrow, Xcol )
W1dim = ( 16, 5, 5 )
#W1dim = ( 64, 5, 5 )
ds1 = ( 4, 4 )
#ds1 = ( 2, 2 )
st1 = None
#st1 = ( 4, 4 )
#st1 = ( 2, 2 )
L1conv = convnet.ConvLayer( Xdim, W1dim, 'linear', withBias = False )
L1pool = convnet.PoolLayer( L1conv.Yshape, ds1, 'ReLu', withBias = True, st = st1 )
H1 = L1pool.Dout
H2 = 1000
H3 = 1000
L2 = convnet.FullLayer( H1, H2, 'ReLu', withBias = True, T4toMat = True )
L3 = convnet.FullLayer( H2, H3, 'ReLu', withBias = True, T4toMat = False )
L4 = convnet.FullLayer( H3, K, 'softmax', withBias = True, T4toMat = False )
cnn = convnet.CNN( [ L1conv, L1pool, L2, L3, L4 ] )
print '### Conv-Pool-ReLu-ReLu-Softmax Xdim:', Xdim
print '# W1dim:', W1dim, ' ds1:', ds1, ' st1:', st1, ' H1:', H1
print '# H2:', H2, ' H3:', H3
return cnn
if __name__ == "__main__":
idstr = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
print '### ID: ', idstr
dirCIFAR10 = '../140823-pylearn2/data/cifar10/cifar-10-batches-py'
cifar = cifar10.CIFAR10( dirCIFAR10 )
ZCAwhitening = True
##### setting the training data & the validation data
#
Xraw, label, t = cifar.loadData( 'L' )
Xraw /= 255
xm = np.mean( Xraw, axis = 0 )
Xraw -= xm
if ZCAwhitening:
X, Uzca = ZCAtrans( Xraw, Uzca = None )
else:
X = Xraw
X = np.asarray( X, dtype = np.float32 )
label = np.asarray( label, dtype = np.int32 )
idxL, idxV = cifar.genIndexLV( label )
XL, labelL = X[idxL], label[idxL]
XV, labelV = X[idxV], label[idxV]
NL, Xnch, Xrow, Xcol = XL.shape
NV, Xnch, Xrow, Xcol = XV.shape
K = cifar.nclass
Xdim = ( Xrow, Xcol )
np.random.seed( 0 )
batchsize = 100
idxB = makebatchindex( NL, batchsize )
nbatch = idxB.shape[0]
##### training
#
cnn = CPRS( Xnch, Xrow, Xcol, K )
#cnn = CPRRS( Xnch, Xrow, Xcol, K )
eta, mu, lam = 0.01, 0.9, 0.0001
nepoch = 50
print '# eta = ', eta, ' mu = ', mu, ' lam = ', lam
print '# ZCAwhitening = ', ZCAwhitening
print '### training: NL = ', NL, ' NV = ', NV, ' K = ', K, ' batchsize = ', batchsize
for i in range( nepoch ):
# printing error rates etc.
if (i <= 5 ) or ( i % 10 == 0 ):
mnLLL, rrL = recograte( cnn, XL, labelL, batchsize )
mnLLV, rrV = recograte( cnn, XV, labelV, batchsize )
print '%d | %.4f %.2f | %.4f %.2f' % ( i, mnLLL, rrL * 100, mnLLV, rrV * 100 )
# training (selecting each batch in random order)
for ib in np.random.permutation( nbatch ):
ii = idxB[ib, :]
cnn.train( XL[ii], labelL[ii], eta, mu, lam )
i = nepoch
mnLLL, rrL = recograte( cnn, XL, labelL, batchsize )
mnLLV, rrV = recograte( cnn, XV, labelV, batchsize )
print '%d | %.4f %.2f | %.4f %.2f' % ( i, mnLLL, rrL * 100, mnLLV, rrV * 100 )
##### setting the test data
#
XTraw, labelT, tT = cifar.loadData( 'T' )
XTraw /= 255
XTraw -= xm
if ZCAwhitening:
XT = ZCAtrans( XTraw, Uzca = Uzca )
else:
XT = XTraw
XT = np.asarray( XT, dtype = np.float32 )
labelT = np.asarray( labelT, dtype = np.int32 )
NT, Nstack, Xrow, Xcol = XT.shape
print '# NT = ', NT
mnLLT, rrT = recograte( cnn, XT, labelT, batchsize )
print '%d | %.4f %.2f | %.4f %.2f | %.4f %.2f' % ( i, mnLLL, rrL * 100, mnLLV, rrV * 100, mnLLT, rrT * 100 )
import numpy as np
import theano
import theano.tensor as T
import theano.tensor.signal.downsample as Tsd
import nnet150712 as nnet
########## Convolution Layer ##########
class ConvLayer( object ):
def __init__( self, Xdim, Wdim, afunc, withBias, Wini = 0.01, floatX = theano.config.floatX ):
# dimension of the input
Xnch, Xrow, Xcol = Xdim
self.Xshape = Xdim
# dimension of the convolution filters
Wnch, Wrow, Wcol = Wdim
self.Wshape = ( Wnch, Xnch, Wrow, Wcol )
# dimension of the output
Yrow, Ycol = Xrow - Wrow + 1, Xcol - Wcol + 1
self.Yshape = ( Wnch, Yrow, Ycol )
self.Dout = Wnch * Yrow * Ycol
# activation function of the layer
self.afunc = nnet.d_afunc[afunc]
self.withBias = withBias
# theano shared variables
self.W = theano.shared( np.array( nnet.randomN( self.Wshape, Wini ), dtype = floatX ) )
self.dW = theano.shared( np.zeros( self.Wshape, dtype = floatX ) )
if withBias:
self.b = theano.shared( np.zeros( Wnch, dtype = floatX ) )
self.db = theano.shared( np.zeros( Wnch, dtype = floatX ) )
def output( self, X ):
# X: Ndat x Xshape, Y: Ndat x Yshape
Xs = ( None, self.Xshape[0], self.Xshape[1], self.Xshape[2] )
Ws = self.Wshape
Y = T.nnet.conv.conv2d( X, self.W, image_shape = Xs, filter_shape = Ws )
if self.withBias:
b = self.b.dimshuffle( 'x', 0, 'x', 'x' ) # 1 x nch x 1 x 1
Y += b
Z = self.afunc( Y )
return Y, Z # Ndat x Yshape
########## Pooling Layer ##########
class PoolLayer( object ):
def __init__( self, Xdim, ds, afunc, withBias, st = None, floatX = theano.config.floatX ):
# dimension of the input
Xnch, Xrow, Xcol = Xdim
self.Xshape = Xdim
# parameters of the pooling layer
self.ds = ds
self.st = st
self.ignore_border = False
rv = Tsd.DownsampleFactorMax.out_shape( self.Xshape, ds, ignore_border = self.ignore_border, st = st )
#self.Yshape = ( Xnch, rv[1], rv[2] )
self.Yshape = tuple( rv )
self.Dout = np.prod( self.Yshape )
# activation function of the layer
self.afunc = nnet.d_afunc[afunc]
self.withBias = withBias
# theano shared variables
if withBias:
self.b = theano.shared( np.zeros( Xnch, dtype = floatX ) )
self.db = theano.shared( np.zeros( Xnch, dtype = floatX ) )
def output( self, X ):
# X: Ndat x Xshape
Y = Tsd.max_pool_2d( X, self.ds, ignore_border = self.ignore_border, st = self.st ) # Ndat x Yshape
if self.withBias:
b = self.b.dimshuffle( 'x', 0, 'x', 'x' ) # 1 x nch x 1 x 1
Y += b
Z = self.afunc( Y )
return Y, Z
########## Full-Connection Layer ##########
class FullLayer( nnet.Layer ):
def __init__( self, Din, Nunit, afunc, withBias = True, Wini = 0.01, floatX = theano.config.floatX, T4toMat = False ):
super( FullLayer, self ).__init__( Din, Nunit, afunc, withBias, Wini, floatX )
self.T4toMat = T4toMat
def super_output( self, X ):
return super( FullLayer, self ).output( X )
def output( self, X ):
if self.T4toMat:
return self.super_output( X.reshape( ( X.shape[0], -1 ) ) )
else:
return self.super_output( X )
########## Convolutional Neural Net ##########
class CNN( object ):
def __init__( self, Layers ):
# layers - list of Layer instances
self.Layers = Layers
# theano functions
self.output = self._Tfunc_output()
self.cost = self._Tfunc_cost()
self.train = self._Tfunc_train()
### theano function for output computation
#
def _Tfunc_output( self ):
X = T.tensor4() # Ndat x Xnch x Xrow x Xcol
Y, Z = nnet._T_output( self.Layers, X )
return theano.function( [ X ], [ Y, Z ] )
### theano function for cost computation
#
def _Tfunc_cost( self ):
Z = T.matrix() # N x K
lab = T.ivector() # N-dim
cost = nnet._T_cost( Z, lab )
return theano.function( [ Z, lab ], cost )
### theano function for gradient descent learning
#
def _Tfunc_train( self ):
X = T.tensor4( 'X' )
lab = T.ivector( 'lab' )
eta = T.scalar( 'eta' )
mu = T.scalar( 'mu' )
lam = T.scalar( 'lambda' )
Y, Z = nnet._T_output( self.Layers, X )
cost = T.mean( nnet._T_cost( Z, lab ) )
updatesList = []
for il, layer in enumerate( self.Layers ):
# PoolLayer doesn't have W & dW
if not isinstance( layer, PoolLayer ):
gradW = T.grad( cost, layer.W )
#dWnew = -eta * gradW + mu * layer.dW
dWnew = -eta * ( gradW + lam * layer.W ) + mu * layer.dW
Wnew = layer.W + dWnew
updatesList.append( ( layer.W, Wnew ) )
updatesList.append( ( layer.dW, dWnew ) )
if layer.withBias:
gradb = T.grad( cost, layer.b )
# no weight decay for bias
dbnew = -eta * gradb + mu * layer.db
bnew = layer.b + dbnew
updatesList.append( ( layer.b, bnew ) )
updatesList.append( ( layer.db, dbnew ) )
return theano.function( [ X, lab, eta, mu, lam ], cost, updates = updatesList )
import numpy as np
import theano
import theano.tensor as T
# activation functions
d_afunc = { 'linear': lambda Y: Y,
'sigmoid': T.nnet.sigmoid,
'softmax': T.nnet.softmax,
'ReLu': lambda Y: T.switch( Y > 0, Y, 0 ) }
### uniform random numbers for weight initialization
#
def randomU( shape, a ):
# [ -a, a )
return 2 * a * ( np.random.random_sample( shape ) - 0.5 )
### Gaussian random numbers for weight initialization
#
def randomN( shape, sig ):
# N(0,sig)
return sig * np.random.standard_normal( shape )
########## Layer ##########
class Layer( object ):
def __init__( self, Din, Nunit, afunc, withBias = True, Wini = 0.01, floatX = theano.config.floatX ):
self.Din = Din
self.Nunit = Nunit
self.afunc = d_afunc[afunc]
self.withBias = withBias
# theano shared variables for weights & biases
self.W = theano.shared( np.array( randomN( ( Nunit, Din ), Wini ), dtype = floatX ) )
self.dW = theano.shared( np.zeros( ( Nunit, Din ), dtype = floatX ) )
if withBias:
self.b = theano.shared( np.zeros( Nunit, dtype = floatX ) )
self.db = theano.shared( np.zeros( Nunit, dtype = floatX ) )
def output( self, X ):
if self.withBias:
Y = T.dot( X, self.W.T ) + self.b # Ndat x Nunit
else:
Y = T.dot( X, self.W.T )
Z = self.afunc( Y )
return Y, Z
########## MLP ##########
class MLP( object ):
def __init__( self, Layers ):
# layers - list of Layer instances
self.Layers = Layers
# theano functions
self.output = self._Tfunc_output()
self.cost = self._Tfunc_cost()
self.train = self._Tfunc_train()
### theano function for output computation
#
def _Tfunc_output( self ):
X = T.matrix() # N x D
Y, Z = _T_output( self.Layers, X )
return theano.function( [ X ], [ Y, Z ] )
### theano function for cost computation
#
def _Tfunc_cost( self ):
Z = T.matrix() # N x K
lab = T.ivector() # N-dim
cost = _T_cost( Z, lab )
return theano.function( [ Z, lab ], cost )
### theano function for gradient descent learning
#
def _Tfunc_train( self ):
X = T.matrix( 'X' ) # N x D
lab = T.ivector( 'lab' ) # N-dim
eta = T.scalar( 'eta' )
mu = T.scalar( 'mu' )
lam = T.scalar( 'lambda' )
Y, Z = _T_output( self.Layers, X )
cost = T.mean( _T_cost( Z, lab ) )
updatesList = []
for layer in self.Layers:
gradW = T.grad( cost, layer.W )
#dWnew = -eta * gradW + mu * layer.dW
dWnew = -eta * ( gradW + lam * layer.W ) + mu * layer.dW
Wnew = layer.W + dWnew
updatesList.append( ( layer.W, Wnew ) )
updatesList.append( ( layer.dW, dWnew ) )
if layer.withBias:
gradb = T.grad( cost, layer.b )
# no weight decay for bias
dbnew = -eta * gradb + mu * layer.db
bnew = layer.b + dbnew
updatesList.append( ( layer.b, bnew ) )
updatesList.append( ( layer.db, dbnew ) )
return theano.function( [ X, lab, eta, mu, lam ], cost, updates = updatesList )
def _T_output( Layers, X ):
Zprev = X
for layer in Layers:
Y, Z = layer.output( Zprev )
Zprev = Z
return Y, Z
def _T_cost( Z, lab ):
return T.nnet.categorical_crossentropy( Z, lab )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment