Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
import numpy as np
import scipy as sp
import datetime
import cifar10
import convnet150712 as convnet
# ZCA whitening
def ZCAtrans( Xraw, Uzca = None ):
Xraw2 = Xraw.reshape( ( Xraw.shape[0], -1 ) )
if Uzca == None:
# Xraw is assumed to be zero-mean
C = np.dot( Xraw2.T, Xraw2 ) / Xraw2.shape[0]
U, eva, V = np.linalg.svd( C ) # U[:, i] is the i-th eigenvector
sqeva = np.sqrt( eva + 0.001 )
Uzca = np.dot( U / sqeva[np.newaxis, :], U.T )
X = np.dot( Xraw2, Uzca ).reshape( Xraw.shape )
return X, Uzca
else:
X = np.dot( Xraw2, Uzca ).reshape( Xraw.shape )
return X
# mini batch indicies for stochastic gradient ascent
def makebatchindex( N, batchsize ):
idx = np.random.permutation( N )
nbatch = int( np.ceil( float( N ) / batchsize ) )
idxB = np.zeros( ( nbatch, N ), dtype = bool )
for ib in range( nbatch - 1 ):
idxB[ib, idx[ib*batchsize:(ib+1)*batchsize]] = True
ib = nbatch - 1
idxB[ib, idx[ib*batchsize:]] = True
return idxB
# computing the recognition rate
def recograte( cnn, X, label, batchsize ):
N = X.shape[0]
nbatch = int( np.ceil( float( N ) / batchsize ) )
LL = 0.0
cnt = 0
for ib in range( nbatch - 1 ):
ii = np.arange( ib*batchsize, (ib+1)*batchsize )
Y, Z = cnn.output( X[ii] )
LL += np.sum( cnn.cost( Z, label[ii] ) )
cnt += np.sum( label[ii] == np.argmax( Z, axis = 1 ) )
ib = nbatch - 1
ii = np.arange( ib*batchsize, N )
Y, Z = cnn.output( X[ii] )
LL += np.sum( cnn.cost( Z, label[ii] ) )
cnt += np.sum( label[ii] == np.argmax( Z, axis = 1 ) )
return LL / N, float( cnt ) / N
# Conv-Pool-ReLu-Softmax
def CPRS( Xnch, Xrow, Xcol, K ):
Xdim = ( Xnch, Xrow, Xcol )
W1dim = ( 16, 5, 5 )
#W1dim = ( 64, 5, 5 )
#W1dim = ( 256, 5, 5 )
ds1 = ( 4, 4 )
#ds1 = ( 2, 2 )
#st1 = None
#st1 = ( 4, 4 )
st1 = ( 2, 2 )
L1conv = convnet.ConvLayer( Xdim, W1dim, 'linear', withBias = False )
L1pool = convnet.PoolLayer( L1conv.Yshape, ds1, 'ReLu', withBias = True, st = st1 )
H1 = L1pool.Dout
H2 = 1000
L2 = convnet.FullLayer( H1, H2, 'ReLu', withBias = True, T4toMat = True )
L3 = convnet.FullLayer( H2, K, 'softmax', withBias = True, T4toMat = False )
cnn = convnet.CNN( [ L1conv, L1pool, L2, L3 ] )
print '### Conv-Pool-ReLu-Softmax Xdim:', Xdim
print '# W1dim:', W1dim, ' ds1:', ds1, ' st1:', st1, ' H1:', H1
print '# H2:', H2
return cnn
# Conv-Pool-ReLu-ReLu-Softmax
def CPRRS( Xnch, Xrow, Xcol, K ):
Xdim = ( Xnch, Xrow, Xcol )
W1dim = ( 16, 5, 5 )
#W1dim = ( 64, 5, 5 )
ds1 = ( 4, 4 )
#ds1 = ( 2, 2 )
st1 = None
#st1 = ( 4, 4 )
#st1 = ( 2, 2 )
L1conv = convnet.ConvLayer( Xdim, W1dim, 'linear', withBias = False )
L1pool = convnet.PoolLayer( L1conv.Yshape, ds1, 'ReLu', withBias = True, st = st1 )
H1 = L1pool.Dout
H2 = 1000
H3 = 1000
L2 = convnet.FullLayer( H1, H2, 'ReLu', withBias = True, T4toMat = True )
L3 = convnet.FullLayer( H2, H3, 'ReLu', withBias = True, T4toMat = False )
L4 = convnet.FullLayer( H3, K, 'softmax', withBias = True, T4toMat = False )
cnn = convnet.CNN( [ L1conv, L1pool, L2, L3, L4 ] )
print '### Conv-Pool-ReLu-ReLu-Softmax Xdim:', Xdim
print '# W1dim:', W1dim, ' ds1:', ds1, ' st1:', st1, ' H1:', H1
print '# H2:', H2, ' H3:', H3
return cnn
if __name__ == "__main__":
idstr = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
print '### ID: ', idstr
dirCIFAR10 = '../140823-pylearn2/data/cifar10/cifar-10-batches-py'
cifar = cifar10.CIFAR10( dirCIFAR10 )
ZCAwhitening = True
##### setting the training data & the validation data
#
Xraw, label, t = cifar.loadData( 'L' )
Xraw /= 255
xm = np.mean( Xraw, axis = 0 )
Xraw -= xm
if ZCAwhitening:
X, Uzca = ZCAtrans( Xraw, Uzca = None )
else:
X = Xraw
X = np.asarray( X, dtype = np.float32 )
label = np.asarray( label, dtype = np.int32 )
idxL, idxV = cifar.genIndexLV( label )
XL, labelL = X[idxL], label[idxL]
XV, labelV = X[idxV], label[idxV]
NL, Xnch, Xrow, Xcol = XL.shape
NV, Xnch, Xrow, Xcol = XV.shape
K = cifar.nclass
Xdim = ( Xrow, Xcol )
np.random.seed( 0 )
batchsize = 100
idxB = makebatchindex( NL, batchsize )
nbatch = idxB.shape[0]
##### training
#
cnn = CPRS( Xnch, Xrow, Xcol, K )
#cnn = CPRRS( Xnch, Xrow, Xcol, K )
eta, mu, lam = 0.01, 0.9, 0.0001
nepoch = 50
print '# eta = ', eta, ' mu = ', mu, ' lam = ', lam
print '# ZCAwhitening = ', ZCAwhitening
print '### training: NL = ', NL, ' NV = ', NV, ' K = ', K, ' batchsize = ', batchsize
for i in range( nepoch ):
# printing error rates etc.
if (i <= 5 ) or ( i % 10 == 0 ):
mnLLL, rrL = recograte( cnn, XL, labelL, batchsize )
mnLLV, rrV = recograte( cnn, XV, labelV, batchsize )
print '%d | %.4f %.2f | %.4f %.2f' % ( i, mnLLL, rrL * 100, mnLLV, rrV * 100 )
# training (selecting each batch in random order)
for ib in np.random.permutation( nbatch ):
ii = idxB[ib, :]
cnn.train( XL[ii], labelL[ii], eta, mu, lam )
i = nepoch
mnLLL, rrL = recograte( cnn, XL, labelL, batchsize )
mnLLV, rrV = recograte( cnn, XV, labelV, batchsize )
print '%d | %.4f %.2f | %.4f %.2f' % ( i, mnLLL, rrL * 100, mnLLV, rrV * 100 )
##### setting the test data
#
XTraw, labelT, tT = cifar.loadData( 'T' )
XTraw /= 255
XTraw -= xm
if ZCAwhitening:
XT = ZCAtrans( XTraw, Uzca = Uzca )
else:
XT = XTraw
XT = np.asarray( XT, dtype = np.float32 )
labelT = np.asarray( labelT, dtype = np.int32 )
NT, Nstack, Xrow, Xcol = XT.shape
print '# NT = ', NT
mnLLT, rrT = recograte( cnn, XT, labelT, batchsize )
print '%d | %.4f %.2f | %.4f %.2f | %.4f %.2f' % ( i, mnLLL, rrL * 100, mnLLV, rrV * 100, mnLLT, rrT * 100 )
import numpy as np
import theano
import theano.tensor as T
import theano.tensor.signal.downsample as Tsd
import nnet150712 as nnet
########## Convolution Layer ##########
class ConvLayer( object ):
def __init__( self, Xdim, Wdim, afunc, withBias, Wini = 0.01, floatX = theano.config.floatX ):
# dimension of the input
Xnch, Xrow, Xcol = Xdim
self.Xshape = Xdim
# dimension of the convolution filters
Wnch, Wrow, Wcol = Wdim
self.Wshape = ( Wnch, Xnch, Wrow, Wcol )
# dimension of the output
Yrow, Ycol = Xrow - Wrow + 1, Xcol - Wcol + 1
self.Yshape = ( Wnch, Yrow, Ycol )
self.Dout = Wnch * Yrow * Ycol
# activation function of the layer
self.afunc = nnet.d_afunc[afunc]
self.withBias = withBias
# theano shared variables
self.W = theano.shared( np.array( nnet.randomN( self.Wshape, Wini ), dtype = floatX ) )
self.dW = theano.shared( np.zeros( self.Wshape, dtype = floatX ) )
if withBias:
self.b = theano.shared( np.zeros( Wnch, dtype = floatX ) )
self.db = theano.shared( np.zeros( Wnch, dtype = floatX ) )
def output( self, X ):
# X: Ndat x Xshape, Y: Ndat x Yshape
Xs = ( None, self.Xshape[0], self.Xshape[1], self.Xshape[2] )
Ws = self.Wshape
Y = T.nnet.conv.conv2d( X, self.W, image_shape = Xs, filter_shape = Ws )
if self.withBias:
b = self.b.dimshuffle( 'x', 0, 'x', 'x' ) # 1 x nch x 1 x 1
Y += b
Z = self.afunc( Y )
return Y, Z # Ndat x Yshape
########## Pooling Layer ##########
class PoolLayer( object ):
def __init__( self, Xdim, ds, afunc, withBias, st = None, floatX = theano.config.floatX ):
# dimension of the input
Xnch, Xrow, Xcol = Xdim
self.Xshape = Xdim
# parameters of the pooling layer
self.ds = ds
self.st = st
self.ignore_border = False
rv = Tsd.DownsampleFactorMax.out_shape( self.Xshape, ds, ignore_border = self.ignore_border, st = st )
#self.Yshape = ( Xnch, rv[1], rv[2] )
self.Yshape = tuple( rv )
self.Dout = np.prod( self.Yshape )
# activation function of the layer
self.afunc = nnet.d_afunc[afunc]
self.withBias = withBias
# theano shared variables
if withBias:
self.b = theano.shared( np.zeros( Xnch, dtype = floatX ) )
self.db = theano.shared( np.zeros( Xnch, dtype = floatX ) )
def output( self, X ):
# X: Ndat x Xshape
Y = Tsd.max_pool_2d( X, self.ds, ignore_border = self.ignore_border, st = self.st ) # Ndat x Yshape
if self.withBias:
b = self.b.dimshuffle( 'x', 0, 'x', 'x' ) # 1 x nch x 1 x 1
Y += b
Z = self.afunc( Y )
return Y, Z
########## Full-Connection Layer ##########
class FullLayer( nnet.Layer ):
def __init__( self, Din, Nunit, afunc, withBias = True, Wini = 0.01, floatX = theano.config.floatX, T4toMat = False ):
super( FullLayer, self ).__init__( Din, Nunit, afunc, withBias, Wini, floatX )
self.T4toMat = T4toMat
def super_output( self, X ):
return super( FullLayer, self ).output( X )
def output( self, X ):
if self.T4toMat:
return self.super_output( X.reshape( ( X.shape[0], -1 ) ) )
else:
return self.super_output( X )
########## Convolutional Neural Net ##########
class CNN( object ):
def __init__( self, Layers ):
# layers - list of Layer instances
self.Layers = Layers
# theano functions
self.output = self._Tfunc_output()
self.cost = self._Tfunc_cost()
self.train = self._Tfunc_train()
### theano function for output computation
#
def _Tfunc_output( self ):
X = T.tensor4() # Ndat x Xnch x Xrow x Xcol
Y, Z = nnet._T_output( self.Layers, X )
return theano.function( [ X ], [ Y, Z ] )
### theano function for cost computation
#
def _Tfunc_cost( self ):
Z = T.matrix() # N x K
lab = T.ivector() # N-dim
cost = nnet._T_cost( Z, lab )
return theano.function( [ Z, lab ], cost )
### theano function for gradient descent learning
#
def _Tfunc_train( self ):
X = T.tensor4( 'X' )
lab = T.ivector( 'lab' )
eta = T.scalar( 'eta' )
mu = T.scalar( 'mu' )
lam = T.scalar( 'lambda' )
Y, Z = nnet._T_output( self.Layers, X )
cost = T.mean( nnet._T_cost( Z, lab ) )
updatesList = []
for il, layer in enumerate( self.Layers ):
# PoolLayer doesn't have W & dW
if not isinstance( layer, PoolLayer ):
gradW = T.grad( cost, layer.W )
#dWnew = -eta * gradW + mu * layer.dW
dWnew = -eta * ( gradW + lam * layer.W ) + mu * layer.dW
Wnew = layer.W + dWnew
updatesList.append( ( layer.W, Wnew ) )
updatesList.append( ( layer.dW, dWnew ) )
if layer.withBias:
gradb = T.grad( cost, layer.b )
# no weight decay for bias
dbnew = -eta * gradb + mu * layer.db
bnew = layer.b + dbnew
updatesList.append( ( layer.b, bnew ) )
updatesList.append( ( layer.db, dbnew ) )
return theano.function( [ X, lab, eta, mu, lam ], cost, updates = updatesList )
import numpy as np
import theano
import theano.tensor as T
# activation functions
d_afunc = { 'linear': lambda Y: Y,
'sigmoid': T.nnet.sigmoid,
'softmax': T.nnet.softmax,
'ReLu': lambda Y: T.switch( Y > 0, Y, 0 ) }
### uniform random numbers for weight initialization
#
def randomU( shape, a ):
# [ -a, a )
return 2 * a * ( np.random.random_sample( shape ) - 0.5 )
### Gaussian random numbers for weight initialization
#
def randomN( shape, sig ):
# N(0,sig)
return sig * np.random.standard_normal( shape )
########## Layer ##########
class Layer( object ):
def __init__( self, Din, Nunit, afunc, withBias = True, Wini = 0.01, floatX = theano.config.floatX ):
self.Din = Din
self.Nunit = Nunit
self.afunc = d_afunc[afunc]
self.withBias = withBias
# theano shared variables for weights & biases
self.W = theano.shared( np.array( randomN( ( Nunit, Din ), Wini ), dtype = floatX ) )
self.dW = theano.shared( np.zeros( ( Nunit, Din ), dtype = floatX ) )
if withBias:
self.b = theano.shared( np.zeros( Nunit, dtype = floatX ) )
self.db = theano.shared( np.zeros( Nunit, dtype = floatX ) )
def output( self, X ):
if self.withBias:
Y = T.dot( X, self.W.T ) + self.b # Ndat x Nunit
else:
Y = T.dot( X, self.W.T )
Z = self.afunc( Y )
return Y, Z
########## MLP ##########
class MLP( object ):
def __init__( self, Layers ):
# layers - list of Layer instances
self.Layers = Layers
# theano functions
self.output = self._Tfunc_output()
self.cost = self._Tfunc_cost()
self.train = self._Tfunc_train()
### theano function for output computation
#
def _Tfunc_output( self ):
X = T.matrix() # N x D
Y, Z = _T_output( self.Layers, X )
return theano.function( [ X ], [ Y, Z ] )
### theano function for cost computation
#
def _Tfunc_cost( self ):
Z = T.matrix() # N x K
lab = T.ivector() # N-dim
cost = _T_cost( Z, lab )
return theano.function( [ Z, lab ], cost )
### theano function for gradient descent learning
#
def _Tfunc_train( self ):
X = T.matrix( 'X' ) # N x D
lab = T.ivector( 'lab' ) # N-dim
eta = T.scalar( 'eta' )
mu = T.scalar( 'mu' )
lam = T.scalar( 'lambda' )
Y, Z = _T_output( self.Layers, X )
cost = T.mean( _T_cost( Z, lab ) )
updatesList = []
for layer in self.Layers:
gradW = T.grad( cost, layer.W )
#dWnew = -eta * gradW + mu * layer.dW
dWnew = -eta * ( gradW + lam * layer.W ) + mu * layer.dW
Wnew = layer.W + dWnew
updatesList.append( ( layer.W, Wnew ) )
updatesList.append( ( layer.dW, dWnew ) )
if layer.withBias:
gradb = T.grad( cost, layer.b )
# no weight decay for bias
dbnew = -eta * gradb + mu * layer.db
bnew = layer.b + dbnew
updatesList.append( ( layer.b, bnew ) )
updatesList.append( ( layer.db, dbnew ) )
return theano.function( [ X, lab, eta, mu, lam ], cost, updates = updatesList )
def _T_output( Layers, X ):
Zprev = X
for layer in Layers:
Y, Z = layer.output( Zprev )
Zprev = Z
return Y, Z
def _T_cost( Z, lab ):
return T.nnet.categorical_crossentropy( Z, lab )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.