This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import scipy as sp | |
import datetime | |
import cifar10 | |
import convnet150712 as convnet | |
# ZCA whitening | |
def ZCAtrans( Xraw, Uzca = None ): | |
Xraw2 = Xraw.reshape( ( Xraw.shape[0], -1 ) ) | |
if Uzca == None: | |
# Xraw is assumed to be zero-mean | |
C = np.dot( Xraw2.T, Xraw2 ) / Xraw2.shape[0] | |
U, eva, V = np.linalg.svd( C ) # U[:, i] is the i-th eigenvector | |
sqeva = np.sqrt( eva + 0.001 ) | |
Uzca = np.dot( U / sqeva[np.newaxis, :], U.T ) | |
X = np.dot( Xraw2, Uzca ).reshape( Xraw.shape ) | |
return X, Uzca | |
else: | |
X = np.dot( Xraw2, Uzca ).reshape( Xraw.shape ) | |
return X | |
# mini batch indicies for stochastic gradient ascent | |
def makebatchindex( N, batchsize ): | |
idx = np.random.permutation( N ) | |
nbatch = int( np.ceil( float( N ) / batchsize ) ) | |
idxB = np.zeros( ( nbatch, N ), dtype = bool ) | |
for ib in range( nbatch - 1 ): | |
idxB[ib, idx[ib*batchsize:(ib+1)*batchsize]] = True | |
ib = nbatch - 1 | |
idxB[ib, idx[ib*batchsize:]] = True | |
return idxB | |
# computing the recognition rate | |
def recograte( cnn, X, label, batchsize ): | |
N = X.shape[0] | |
nbatch = int( np.ceil( float( N ) / batchsize ) ) | |
LL = 0.0 | |
cnt = 0 | |
for ib in range( nbatch - 1 ): | |
ii = np.arange( ib*batchsize, (ib+1)*batchsize ) | |
Y, Z = cnn.output( X[ii] ) | |
LL += np.sum( cnn.cost( Z, label[ii] ) ) | |
cnt += np.sum( label[ii] == np.argmax( Z, axis = 1 ) ) | |
ib = nbatch - 1 | |
ii = np.arange( ib*batchsize, N ) | |
Y, Z = cnn.output( X[ii] ) | |
LL += np.sum( cnn.cost( Z, label[ii] ) ) | |
cnt += np.sum( label[ii] == np.argmax( Z, axis = 1 ) ) | |
return LL / N, float( cnt ) / N | |
# Conv-Pool-ReLu-Softmax | |
def CPRS( Xnch, Xrow, Xcol, K ): | |
Xdim = ( Xnch, Xrow, Xcol ) | |
W1dim = ( 16, 5, 5 ) | |
#W1dim = ( 64, 5, 5 ) | |
#W1dim = ( 256, 5, 5 ) | |
ds1 = ( 4, 4 ) | |
#ds1 = ( 2, 2 ) | |
#st1 = None | |
#st1 = ( 4, 4 ) | |
st1 = ( 2, 2 ) | |
L1conv = convnet.ConvLayer( Xdim, W1dim, 'linear', withBias = False ) | |
L1pool = convnet.PoolLayer( L1conv.Yshape, ds1, 'ReLu', withBias = True, st = st1 ) | |
H1 = L1pool.Dout | |
H2 = 1000 | |
L2 = convnet.FullLayer( H1, H2, 'ReLu', withBias = True, T4toMat = True ) | |
L3 = convnet.FullLayer( H2, K, 'softmax', withBias = True, T4toMat = False ) | |
cnn = convnet.CNN( [ L1conv, L1pool, L2, L3 ] ) | |
print '### Conv-Pool-ReLu-Softmax Xdim:', Xdim | |
print '# W1dim:', W1dim, ' ds1:', ds1, ' st1:', st1, ' H1:', H1 | |
print '# H2:', H2 | |
return cnn | |
# Conv-Pool-ReLu-ReLu-Softmax | |
def CPRRS( Xnch, Xrow, Xcol, K ): | |
Xdim = ( Xnch, Xrow, Xcol ) | |
W1dim = ( 16, 5, 5 ) | |
#W1dim = ( 64, 5, 5 ) | |
ds1 = ( 4, 4 ) | |
#ds1 = ( 2, 2 ) | |
st1 = None | |
#st1 = ( 4, 4 ) | |
#st1 = ( 2, 2 ) | |
L1conv = convnet.ConvLayer( Xdim, W1dim, 'linear', withBias = False ) | |
L1pool = convnet.PoolLayer( L1conv.Yshape, ds1, 'ReLu', withBias = True, st = st1 ) | |
H1 = L1pool.Dout | |
H2 = 1000 | |
H3 = 1000 | |
L2 = convnet.FullLayer( H1, H2, 'ReLu', withBias = True, T4toMat = True ) | |
L3 = convnet.FullLayer( H2, H3, 'ReLu', withBias = True, T4toMat = False ) | |
L4 = convnet.FullLayer( H3, K, 'softmax', withBias = True, T4toMat = False ) | |
cnn = convnet.CNN( [ L1conv, L1pool, L2, L3, L4 ] ) | |
print '### Conv-Pool-ReLu-ReLu-Softmax Xdim:', Xdim | |
print '# W1dim:', W1dim, ' ds1:', ds1, ' st1:', st1, ' H1:', H1 | |
print '# H2:', H2, ' H3:', H3 | |
return cnn | |
if __name__ == "__main__": | |
idstr = datetime.datetime.now().strftime('%Y%m%d-%H%M%S') | |
print '### ID: ', idstr | |
dirCIFAR10 = '../140823-pylearn2/data/cifar10/cifar-10-batches-py' | |
cifar = cifar10.CIFAR10( dirCIFAR10 ) | |
ZCAwhitening = True | |
##### setting the training data & the validation data | |
# | |
Xraw, label, t = cifar.loadData( 'L' ) | |
Xraw /= 255 | |
xm = np.mean( Xraw, axis = 0 ) | |
Xraw -= xm | |
if ZCAwhitening: | |
X, Uzca = ZCAtrans( Xraw, Uzca = None ) | |
else: | |
X = Xraw | |
X = np.asarray( X, dtype = np.float32 ) | |
label = np.asarray( label, dtype = np.int32 ) | |
idxL, idxV = cifar.genIndexLV( label ) | |
XL, labelL = X[idxL], label[idxL] | |
XV, labelV = X[idxV], label[idxV] | |
NL, Xnch, Xrow, Xcol = XL.shape | |
NV, Xnch, Xrow, Xcol = XV.shape | |
K = cifar.nclass | |
Xdim = ( Xrow, Xcol ) | |
np.random.seed( 0 ) | |
batchsize = 100 | |
idxB = makebatchindex( NL, batchsize ) | |
nbatch = idxB.shape[0] | |
##### training | |
# | |
cnn = CPRS( Xnch, Xrow, Xcol, K ) | |
#cnn = CPRRS( Xnch, Xrow, Xcol, K ) | |
eta, mu, lam = 0.01, 0.9, 0.0001 | |
nepoch = 50 | |
print '# eta = ', eta, ' mu = ', mu, ' lam = ', lam | |
print '# ZCAwhitening = ', ZCAwhitening | |
print '### training: NL = ', NL, ' NV = ', NV, ' K = ', K, ' batchsize = ', batchsize | |
for i in range( nepoch ): | |
# printing error rates etc. | |
if (i <= 5 ) or ( i % 10 == 0 ): | |
mnLLL, rrL = recograte( cnn, XL, labelL, batchsize ) | |
mnLLV, rrV = recograte( cnn, XV, labelV, batchsize ) | |
print '%d | %.4f %.2f | %.4f %.2f' % ( i, mnLLL, rrL * 100, mnLLV, rrV * 100 ) | |
# training (selecting each batch in random order) | |
for ib in np.random.permutation( nbatch ): | |
ii = idxB[ib, :] | |
cnn.train( XL[ii], labelL[ii], eta, mu, lam ) | |
i = nepoch | |
mnLLL, rrL = recograte( cnn, XL, labelL, batchsize ) | |
mnLLV, rrV = recograte( cnn, XV, labelV, batchsize ) | |
print '%d | %.4f %.2f | %.4f %.2f' % ( i, mnLLL, rrL * 100, mnLLV, rrV * 100 ) | |
##### setting the test data | |
# | |
XTraw, labelT, tT = cifar.loadData( 'T' ) | |
XTraw /= 255 | |
XTraw -= xm | |
if ZCAwhitening: | |
XT = ZCAtrans( XTraw, Uzca = Uzca ) | |
else: | |
XT = XTraw | |
XT = np.asarray( XT, dtype = np.float32 ) | |
labelT = np.asarray( labelT, dtype = np.int32 ) | |
NT, Nstack, Xrow, Xcol = XT.shape | |
print '# NT = ', NT | |
mnLLT, rrT = recograte( cnn, XT, labelT, batchsize ) | |
print '%d | %.4f %.2f | %.4f %.2f | %.4f %.2f' % ( i, mnLLL, rrL * 100, mnLLV, rrV * 100, mnLLT, rrT * 100 ) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import theano | |
import theano.tensor as T | |
import theano.tensor.signal.downsample as Tsd | |
import nnet150712 as nnet | |
########## Convolution Layer ########## | |
class ConvLayer( object ): | |
def __init__( self, Xdim, Wdim, afunc, withBias, Wini = 0.01, floatX = theano.config.floatX ): | |
# dimension of the input | |
Xnch, Xrow, Xcol = Xdim | |
self.Xshape = Xdim | |
# dimension of the convolution filters | |
Wnch, Wrow, Wcol = Wdim | |
self.Wshape = ( Wnch, Xnch, Wrow, Wcol ) | |
# dimension of the output | |
Yrow, Ycol = Xrow - Wrow + 1, Xcol - Wcol + 1 | |
self.Yshape = ( Wnch, Yrow, Ycol ) | |
self.Dout = Wnch * Yrow * Ycol | |
# activation function of the layer | |
self.afunc = nnet.d_afunc[afunc] | |
self.withBias = withBias | |
# theano shared variables | |
self.W = theano.shared( np.array( nnet.randomN( self.Wshape, Wini ), dtype = floatX ) ) | |
self.dW = theano.shared( np.zeros( self.Wshape, dtype = floatX ) ) | |
if withBias: | |
self.b = theano.shared( np.zeros( Wnch, dtype = floatX ) ) | |
self.db = theano.shared( np.zeros( Wnch, dtype = floatX ) ) | |
def output( self, X ): | |
# X: Ndat x Xshape, Y: Ndat x Yshape | |
Xs = ( None, self.Xshape[0], self.Xshape[1], self.Xshape[2] ) | |
Ws = self.Wshape | |
Y = T.nnet.conv.conv2d( X, self.W, image_shape = Xs, filter_shape = Ws ) | |
if self.withBias: | |
b = self.b.dimshuffle( 'x', 0, 'x', 'x' ) # 1 x nch x 1 x 1 | |
Y += b | |
Z = self.afunc( Y ) | |
return Y, Z # Ndat x Yshape | |
########## Pooling Layer ########## | |
class PoolLayer( object ): | |
def __init__( self, Xdim, ds, afunc, withBias, st = None, floatX = theano.config.floatX ): | |
# dimension of the input | |
Xnch, Xrow, Xcol = Xdim | |
self.Xshape = Xdim | |
# parameters of the pooling layer | |
self.ds = ds | |
self.st = st | |
self.ignore_border = False | |
rv = Tsd.DownsampleFactorMax.out_shape( self.Xshape, ds, ignore_border = self.ignore_border, st = st ) | |
#self.Yshape = ( Xnch, rv[1], rv[2] ) | |
self.Yshape = tuple( rv ) | |
self.Dout = np.prod( self.Yshape ) | |
# activation function of the layer | |
self.afunc = nnet.d_afunc[afunc] | |
self.withBias = withBias | |
# theano shared variables | |
if withBias: | |
self.b = theano.shared( np.zeros( Xnch, dtype = floatX ) ) | |
self.db = theano.shared( np.zeros( Xnch, dtype = floatX ) ) | |
def output( self, X ): | |
# X: Ndat x Xshape | |
Y = Tsd.max_pool_2d( X, self.ds, ignore_border = self.ignore_border, st = self.st ) # Ndat x Yshape | |
if self.withBias: | |
b = self.b.dimshuffle( 'x', 0, 'x', 'x' ) # 1 x nch x 1 x 1 | |
Y += b | |
Z = self.afunc( Y ) | |
return Y, Z | |
########## Full-Connection Layer ########## | |
class FullLayer( nnet.Layer ): | |
def __init__( self, Din, Nunit, afunc, withBias = True, Wini = 0.01, floatX = theano.config.floatX, T4toMat = False ): | |
super( FullLayer, self ).__init__( Din, Nunit, afunc, withBias, Wini, floatX ) | |
self.T4toMat = T4toMat | |
def super_output( self, X ): | |
return super( FullLayer, self ).output( X ) | |
def output( self, X ): | |
if self.T4toMat: | |
return self.super_output( X.reshape( ( X.shape[0], -1 ) ) ) | |
else: | |
return self.super_output( X ) | |
########## Convolutional Neural Net ########## | |
class CNN( object ): | |
def __init__( self, Layers ): | |
# layers - list of Layer instances | |
self.Layers = Layers | |
# theano functions | |
self.output = self._Tfunc_output() | |
self.cost = self._Tfunc_cost() | |
self.train = self._Tfunc_train() | |
### theano function for output computation | |
# | |
def _Tfunc_output( self ): | |
X = T.tensor4() # Ndat x Xnch x Xrow x Xcol | |
Y, Z = nnet._T_output( self.Layers, X ) | |
return theano.function( [ X ], [ Y, Z ] ) | |
### theano function for cost computation | |
# | |
def _Tfunc_cost( self ): | |
Z = T.matrix() # N x K | |
lab = T.ivector() # N-dim | |
cost = nnet._T_cost( Z, lab ) | |
return theano.function( [ Z, lab ], cost ) | |
### theano function for gradient descent learning | |
# | |
def _Tfunc_train( self ): | |
X = T.tensor4( 'X' ) | |
lab = T.ivector( 'lab' ) | |
eta = T.scalar( 'eta' ) | |
mu = T.scalar( 'mu' ) | |
lam = T.scalar( 'lambda' ) | |
Y, Z = nnet._T_output( self.Layers, X ) | |
cost = T.mean( nnet._T_cost( Z, lab ) ) | |
updatesList = [] | |
for il, layer in enumerate( self.Layers ): | |
# PoolLayer doesn't have W & dW | |
if not isinstance( layer, PoolLayer ): | |
gradW = T.grad( cost, layer.W ) | |
#dWnew = -eta * gradW + mu * layer.dW | |
dWnew = -eta * ( gradW + lam * layer.W ) + mu * layer.dW | |
Wnew = layer.W + dWnew | |
updatesList.append( ( layer.W, Wnew ) ) | |
updatesList.append( ( layer.dW, dWnew ) ) | |
if layer.withBias: | |
gradb = T.grad( cost, layer.b ) | |
# no weight decay for bias | |
dbnew = -eta * gradb + mu * layer.db | |
bnew = layer.b + dbnew | |
updatesList.append( ( layer.b, bnew ) ) | |
updatesList.append( ( layer.db, dbnew ) ) | |
return theano.function( [ X, lab, eta, mu, lam ], cost, updates = updatesList ) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import theano | |
import theano.tensor as T | |
# activation functions | |
d_afunc = { 'linear': lambda Y: Y, | |
'sigmoid': T.nnet.sigmoid, | |
'softmax': T.nnet.softmax, | |
'ReLu': lambda Y: T.switch( Y > 0, Y, 0 ) } | |
### uniform random numbers for weight initialization | |
# | |
def randomU( shape, a ): | |
# [ -a, a ) | |
return 2 * a * ( np.random.random_sample( shape ) - 0.5 ) | |
### Gaussian random numbers for weight initialization | |
# | |
def randomN( shape, sig ): | |
# N(0,sig) | |
return sig * np.random.standard_normal( shape ) | |
########## Layer ########## | |
class Layer( object ): | |
def __init__( self, Din, Nunit, afunc, withBias = True, Wini = 0.01, floatX = theano.config.floatX ): | |
self.Din = Din | |
self.Nunit = Nunit | |
self.afunc = d_afunc[afunc] | |
self.withBias = withBias | |
# theano shared variables for weights & biases | |
self.W = theano.shared( np.array( randomN( ( Nunit, Din ), Wini ), dtype = floatX ) ) | |
self.dW = theano.shared( np.zeros( ( Nunit, Din ), dtype = floatX ) ) | |
if withBias: | |
self.b = theano.shared( np.zeros( Nunit, dtype = floatX ) ) | |
self.db = theano.shared( np.zeros( Nunit, dtype = floatX ) ) | |
def output( self, X ): | |
if self.withBias: | |
Y = T.dot( X, self.W.T ) + self.b # Ndat x Nunit | |
else: | |
Y = T.dot( X, self.W.T ) | |
Z = self.afunc( Y ) | |
return Y, Z | |
########## MLP ########## | |
class MLP( object ): | |
def __init__( self, Layers ): | |
# layers - list of Layer instances | |
self.Layers = Layers | |
# theano functions | |
self.output = self._Tfunc_output() | |
self.cost = self._Tfunc_cost() | |
self.train = self._Tfunc_train() | |
### theano function for output computation | |
# | |
def _Tfunc_output( self ): | |
X = T.matrix() # N x D | |
Y, Z = _T_output( self.Layers, X ) | |
return theano.function( [ X ], [ Y, Z ] ) | |
### theano function for cost computation | |
# | |
def _Tfunc_cost( self ): | |
Z = T.matrix() # N x K | |
lab = T.ivector() # N-dim | |
cost = _T_cost( Z, lab ) | |
return theano.function( [ Z, lab ], cost ) | |
### theano function for gradient descent learning | |
# | |
def _Tfunc_train( self ): | |
X = T.matrix( 'X' ) # N x D | |
lab = T.ivector( 'lab' ) # N-dim | |
eta = T.scalar( 'eta' ) | |
mu = T.scalar( 'mu' ) | |
lam = T.scalar( 'lambda' ) | |
Y, Z = _T_output( self.Layers, X ) | |
cost = T.mean( _T_cost( Z, lab ) ) | |
updatesList = [] | |
for layer in self.Layers: | |
gradW = T.grad( cost, layer.W ) | |
#dWnew = -eta * gradW + mu * layer.dW | |
dWnew = -eta * ( gradW + lam * layer.W ) + mu * layer.dW | |
Wnew = layer.W + dWnew | |
updatesList.append( ( layer.W, Wnew ) ) | |
updatesList.append( ( layer.dW, dWnew ) ) | |
if layer.withBias: | |
gradb = T.grad( cost, layer.b ) | |
# no weight decay for bias | |
dbnew = -eta * gradb + mu * layer.db | |
bnew = layer.b + dbnew | |
updatesList.append( ( layer.b, bnew ) ) | |
updatesList.append( ( layer.db, dbnew ) ) | |
return theano.function( [ X, lab, eta, mu, lam ], cost, updates = updatesList ) | |
def _T_output( Layers, X ): | |
Zprev = X | |
for layer in Layers: | |
Y, Z = layer.output( Zprev ) | |
Zprev = Z | |
return Y, Z | |
def _T_cost( Z, lab ): | |
return T.nnet.categorical_crossentropy( Z, lab ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment