takatakamanbou/cifar10_convnet150712.py Secret

## cifar10_convnet150712.py
import numpy as np
import scipy as sp
import datetime

import cifar10
import convnet150712 as convnet


# ZCA whitening
def ZCAtrans( Xraw, Uzca = None ):

    Xraw2 = Xraw.reshape( ( Xraw.shape[0], -1 ) )

    if Uzca == None:
        # Xraw is assumed to be zero-mean
        C = np.dot( Xraw2.T, Xraw2 ) / Xraw2.shape[0]
        U, eva, V = np.linalg.svd( C )  # U[:, i] is the i-th eigenvector
        sqeva = np.sqrt( eva + 0.001 )
        Uzca = np.dot( U / sqeva[np.newaxis, :], U.T )
        X = np.dot( Xraw2, Uzca ).reshape( Xraw.shape )
        return X, Uzca
    else:
        X = np.dot( Xraw2, Uzca ).reshape( Xraw.shape )
        return X


# mini batch indicies for stochastic gradient ascent
def makebatchindex( N, batchsize ):

    idx = np.random.permutation( N )
    nbatch = int( np.ceil( float( N ) / batchsize ) )
    idxB = np.zeros( ( nbatch, N ), dtype = bool )
    for ib in range( nbatch - 1 ):
        idxB[ib, idx[ib*batchsize:(ib+1)*batchsize]] = True
    ib = nbatch - 1
    idxB[ib, idx[ib*batchsize:]] = True

    return idxB


# computing the recognition rate
def recograte( cnn, X, label, batchsize ):

    N = X.shape[0]
    nbatch = int( np.ceil( float( N ) / batchsize ) )

    LL = 0.0
    cnt = 0
    for ib in range( nbatch - 1 ):
        ii = np.arange( ib*batchsize, (ib+1)*batchsize )
        Y, Z  = cnn.output( X[ii] )
        LL += np.sum( cnn.cost( Z, label[ii] ) )
        cnt += np.sum( label[ii] == np.argmax( Z, axis = 1 ) )
    ib = nbatch - 1
    ii = np.arange( ib*batchsize, N )
    Y, Z  = cnn.output( X[ii] )
    LL += np.sum( cnn.cost( Z, label[ii] ) )
    cnt += np.sum( label[ii] == np.argmax( Z, axis = 1 ) )

    return LL / N, float( cnt ) / N


# Conv-Pool-ReLu-Softmax
def CPRS( Xnch, Xrow, Xcol, K ):

    Xdim = ( Xnch, Xrow, Xcol )
    W1dim = ( 16, 5, 5 )
    #W1dim = ( 64, 5, 5 )
    #W1dim = ( 256, 5, 5 )
    ds1 = ( 4, 4 )
    #ds1 = ( 2, 2 )
    #st1 = None
    #st1 = ( 4, 4 )
    st1 = ( 2, 2 )
    L1conv = convnet.ConvLayer( Xdim, W1dim, 'linear', withBias = False )
    L1pool = convnet.PoolLayer( L1conv.Yshape, ds1, 'ReLu', withBias = True, st = st1 )
    H1 = L1pool.Dout
    H2 = 1000
    L2 = convnet.FullLayer( H1, H2, 'ReLu', withBias = True, T4toMat = True )
    L3 = convnet.FullLayer( H2, K, 'softmax', withBias = True, T4toMat = False )
    cnn = convnet.CNN( [ L1conv, L1pool, L2, L3 ] )
    print '### Conv-Pool-ReLu-Softmax   Xdim:', Xdim
    print '#   W1dim:', W1dim, ' ds1:', ds1, ' st1:', st1, ' H1:', H1
    print '#   H2:', H2

    return cnn


# Conv-Pool-ReLu-ReLu-Softmax
def CPRRS( Xnch, Xrow, Xcol, K ):

    Xdim = ( Xnch, Xrow, Xcol )
    W1dim = ( 16, 5, 5 )
    #W1dim = ( 64, 5, 5 )
    ds1 = ( 4, 4 )
    #ds1 = ( 2, 2 )
    st1 = None
    #st1 = ( 4, 4 )
    #st1 = ( 2, 2 )
    L1conv = convnet.ConvLayer( Xdim, W1dim, 'linear', withBias = False )
    L1pool = convnet.PoolLayer( L1conv.Yshape, ds1, 'ReLu', withBias = True, st = st1 )
    H1 = L1pool.Dout
    H2 = 1000
    H3 = 1000
    L2 = convnet.FullLayer( H1, H2, 'ReLu', withBias = True, T4toMat = True )
    L3 = convnet.FullLayer( H2, H3, 'ReLu', withBias = True, T4toMat = False )
    L4 = convnet.FullLayer( H3, K, 'softmax', withBias = True, T4toMat = False )
    cnn = convnet.CNN( [ L1conv, L1pool, L2, L3, L4 ] )
    print '### Conv-Pool-ReLu-ReLu-Softmax   Xdim:', Xdim
    print '#   W1dim:', W1dim, ' ds1:', ds1, ' st1:', st1, ' H1:', H1
    print '#   H2:', H2, ' H3:', H3

    return cnn


if __name__ == "__main__":

    idstr = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
    print '### ID: ', idstr

    dirCIFAR10 = '../140823-pylearn2/data/cifar10/cifar-10-batches-py'
    cifar = cifar10.CIFAR10( dirCIFAR10 )
    ZCAwhitening = True

    ##### setting the training data & the validation data
    #
    Xraw, label, t = cifar.loadData( 'L' )
    Xraw /= 255
    xm = np.mean( Xraw, axis = 0 )
    Xraw -= xm
    if ZCAwhitening:
        X, Uzca = ZCAtrans( Xraw, Uzca = None )
    else:
        X = Xraw
    X = np.asarray( X, dtype = np.float32 )
    label = np.asarray( label, dtype = np.int32 )

    idxL, idxV = cifar.genIndexLV( label )
    XL, labelL = X[idxL], label[idxL]
    XV, labelV = X[idxV], label[idxV]
    NL, Xnch, Xrow, Xcol = XL.shape
    NV, Xnch, Xrow, Xcol = XV.shape
    K = cifar.nclass
    Xdim = ( Xrow, Xcol )

    np.random.seed( 0 )
    batchsize = 100
    idxB = makebatchindex( NL, batchsize )
    nbatch = idxB.shape[0]


    ##### training
    #
    cnn = CPRS( Xnch, Xrow, Xcol, K )
    #cnn = CPRRS( Xnch, Xrow, Xcol, K )

    eta, mu, lam = 0.01, 0.9, 0.0001
    nepoch = 50
    print '# eta = ', eta, ' mu = ', mu, ' lam = ', lam
    print '# ZCAwhitening = ', ZCAwhitening

    print '### training:   NL = ', NL, ' NV = ', NV, ' K = ', K, ' batchsize = ', batchsize

    for i in range( nepoch ):

        # printing error rates etc.
        if (i <= 5 ) or ( i % 10 == 0 ):
            mnLLL, rrL = recograte( cnn, XL, labelL, batchsize )
            mnLLV, rrV = recograte( cnn, XV, labelV, batchsize )
            print '%d | %.4f %.2f | %.4f %.2f' % ( i, mnLLL, rrL * 100, mnLLV, rrV * 100 )

        # training (selecting each batch in random order)
        for ib in np.random.permutation( nbatch ):
            ii = idxB[ib, :]
            cnn.train( XL[ii], labelL[ii], eta, mu, lam )

    i = nepoch

    mnLLL, rrL = recograte( cnn, XL, labelL, batchsize )
    mnLLV, rrV = recograte( cnn, XV, labelV, batchsize )
    print '%d | %.4f %.2f | %.4f %.2f' % ( i, mnLLL, rrL * 100, mnLLV, rrV * 100 )


    ##### setting the test data
    #
    XTraw, labelT, tT = cifar.loadData( 'T' )
    XTraw /= 255
    XTraw -= xm
    if ZCAwhitening:
        XT = ZCAtrans( XTraw, Uzca = Uzca )
    else:
        XT = XTraw
    XT = np.asarray( XT, dtype = np.float32 )
    labelT = np.asarray( labelT, dtype = np.int32 )

    NT, Nstack, Xrow, Xcol = XT.shape
    print '# NT = ', NT
    mnLLT, rrT = recograte( cnn, XT, labelT, batchsize )
    print '%d | %.4f %.2f | %.4f %.2f | %.4f %.2f' % ( i, mnLLL, rrL * 100, mnLLV, rrV * 100, mnLLT, rrT * 100 )

## convnet150712.py
import numpy as np
import theano
import theano.tensor as T
import theano.tensor.signal.downsample as Tsd

import nnet150712 as nnet


########## Convolution Layer ##########

class ConvLayer( object ):

    def __init__( self, Xdim, Wdim, afunc, withBias, Wini = 0.01, floatX = theano.config.floatX ):

        # dimension of the input
        Xnch, Xrow, Xcol = Xdim
        self.Xshape = Xdim

        # dimension of the convolution filters
        Wnch, Wrow, Wcol = Wdim
        self.Wshape = ( Wnch, Xnch, Wrow, Wcol )

        # dimension of the output
        Yrow, Ycol = Xrow - Wrow + 1, Xcol - Wcol + 1
        self.Yshape = ( Wnch, Yrow, Ycol )
        self.Dout = Wnch * Yrow * Ycol

        # activation function of the layer
        self.afunc = nnet.d_afunc[afunc]
        self.withBias = withBias

        # theano shared variables
        self.W  = theano.shared( np.array( nnet.randomN( self.Wshape, Wini ), dtype = floatX ) )
        self.dW = theano.shared( np.zeros( self.Wshape, dtype = floatX ) )
        if withBias:
            self.b  = theano.shared( np.zeros( Wnch, dtype = floatX ) )
            self.db = theano.shared( np.zeros( Wnch, dtype = floatX ) )


    def output( self, X ):

        # X:  Ndat x Xshape,  Y:  Ndat x Yshape
        Xs = ( None, self.Xshape[0], self.Xshape[1], self.Xshape[2] )
        Ws = self.Wshape
        Y = T.nnet.conv.conv2d( X, self.W, image_shape = Xs, filter_shape = Ws )
        if self.withBias:
            b = self.b.dimshuffle( 'x', 0, 'x', 'x' ) # 1 x nch x 1 x 1
            Y += b
        Z = self.afunc( Y )

        return Y, Z  # Ndat x Yshape


########## Pooling Layer ##########

class PoolLayer( object ):

    def __init__( self, Xdim, ds, afunc, withBias, st = None, floatX = theano.config.floatX ):

        # dimension of the input
        Xnch, Xrow, Xcol = Xdim
        self.Xshape = Xdim

        # parameters of the pooling layer
        self.ds = ds
        self.st = st
        self.ignore_border = False
        rv = Tsd.DownsampleFactorMax.out_shape( self.Xshape, ds, ignore_border = self.ignore_border, st = st )
        #self.Yshape = ( Xnch, rv[1], rv[2] )
        self.Yshape = tuple( rv )
        self.Dout = np.prod( self.Yshape )

        # activation function of the layer
        self.afunc = nnet.d_afunc[afunc]
        self.withBias = withBias

        # theano shared variables
        if withBias:
            self.b  = theano.shared( np.zeros( Xnch, dtype = floatX ) )
            self.db = theano.shared( np.zeros( Xnch, dtype = floatX ) )


    def output( self, X ):

        # X:  Ndat x Xshape
        Y = Tsd.max_pool_2d( X, self.ds, ignore_border = self.ignore_border, st = self.st ) # Ndat x Yshape
        if self.withBias:
            b = self.b.dimshuffle( 'x', 0, 'x', 'x' ) # 1 x nch x 1 x 1
            Y += b
        Z = self.afunc( Y )

        return Y, Z


########## Full-Connection Layer ##########

class FullLayer( nnet.Layer ):

    def __init__( self, Din, Nunit, afunc, withBias = True, Wini = 0.01, floatX = theano.config.floatX, T4toMat = False ):

        super( FullLayer, self ).__init__( Din, Nunit, afunc, withBias, Wini, floatX )
        self.T4toMat = T4toMat


    def super_output( self, X ):

        return super( FullLayer, self ).output( X )


    def output( self, X ):

        if self.T4toMat:
            return self.super_output( X.reshape( ( X.shape[0], -1 ) ) )
        else:
            return self.super_output( X )


########## Convolutional Neural Net ##########

class CNN( object ):

    def __init__( self, Layers ):

        # layers - list of Layer instances
        self.Layers = Layers

        # theano functions
        self.output = self._Tfunc_output()
        self.cost   = self._Tfunc_cost()
        self.train  = self._Tfunc_train()


    ### theano function for output computation
    #
    def _Tfunc_output( self ):

        X = T.tensor4()  # Ndat x Xnch x Xrow x Xcol
        Y, Z = nnet._T_output( self.Layers, X )

        return theano.function( [ X ], [ Y, Z ] )


    ### theano function for cost computation
    #
    def _Tfunc_cost( self ):

        Z = T.matrix()  # N x K
        lab = T.ivector()  # N-dim
        cost = nnet._T_cost( Z, lab )

        return theano.function( [ Z, lab ], cost )


    ### theano function for gradient descent learning
    #
    def _Tfunc_train( self ):

        X    = T.tensor4( 'X' )
        lab  = T.ivector( 'lab' )
        eta  = T.scalar( 'eta' )
        mu   = T.scalar( 'mu' )
        lam  = T.scalar( 'lambda' )
        Y, Z = nnet._T_output( self.Layers, X )
        cost = T.mean( nnet._T_cost( Z, lab ) )

        updatesList = []
        for il, layer in enumerate( self.Layers ):

            # PoolLayer doesn't have W & dW
            if not isinstance( layer, PoolLayer ):
                gradW = T.grad( cost, layer.W )
                #dWnew = -eta * gradW + mu * layer.dW
                dWnew = -eta * ( gradW + lam * layer.W ) + mu * layer.dW
                Wnew  = layer.W + dWnew
                updatesList.append( ( layer.W, Wnew ) )
                updatesList.append( ( layer.dW, dWnew ) )
            if layer.withBias:
                gradb = T.grad( cost, layer.b )
                # no weight decay for bias
                dbnew = -eta * gradb + mu * layer.db
                bnew  = layer.b + dbnew
                updatesList.append( ( layer.b, bnew ) )
                updatesList.append( ( layer.db, dbnew ) )


        return theano.function( [ X, lab, eta, mu, lam ], cost, updates = updatesList )

## nnet150712.py
import numpy as np
import theano
import theano.tensor as T


# activation functions
d_afunc = { 'linear': lambda Y: Y,
            'sigmoid': T.nnet.sigmoid,
            'softmax': T.nnet.softmax,
            'ReLu': lambda Y: T.switch( Y > 0, Y, 0 ) }

### uniform random numbers for weight initialization
#
def randomU( shape, a ):

    # [ -a, a )
    return 2 * a * ( np.random.random_sample( shape ) - 0.5 )


### Gaussian random numbers for weight initialization
#
def randomN( shape, sig ):

    # N(0,sig)
    return sig * np.random.standard_normal( shape )


########## Layer ##########

class Layer( object ):

    def __init__( self, Din, Nunit, afunc, withBias = True, Wini = 0.01, floatX = theano.config.floatX ):

        self.Din   = Din
        self.Nunit = Nunit
        self.afunc = d_afunc[afunc]
        self.withBias = withBias

        # theano shared variables for weights & biases
        self.W = theano.shared( np.array( randomN( ( Nunit, Din ), Wini ), dtype = floatX ) )
        self.dW = theano.shared( np.zeros( ( Nunit, Din ), dtype = floatX ) )
        if withBias:
            self.b = theano.shared( np.zeros( Nunit, dtype = floatX ) )
            self.db = theano.shared( np.zeros( Nunit, dtype = floatX ) )


    def output( self, X ):

        if self.withBias:
            Y = T.dot( X, self.W.T ) + self.b  # Ndat x Nunit
        else:
            Y = T.dot( X, self.W.T )

        Z = self.afunc( Y )

        return Y, Z


########## MLP ##########

class MLP( object ):

    def __init__( self, Layers ):

        # layers - list of Layer instances
        self.Layers = Layers

        # theano functions
        self.output = self._Tfunc_output()
        self.cost   = self._Tfunc_cost()
        self.train  = self._Tfunc_train()


    ### theano function for output computation
    #
    def _Tfunc_output( self ):

        X = T.matrix()  # N x D
        Y, Z = _T_output( self.Layers, X )

        return theano.function( [ X ], [ Y, Z ] )


    ### theano function for cost computation
    #
    def _Tfunc_cost( self ):

        Z = T.matrix()  # N x K
        lab = T.ivector()  # N-dim
        cost = _T_cost( Z, lab )

        return theano.function( [ Z, lab ], cost )


    ### theano function for gradient descent learning
    #
    def _Tfunc_train( self ):

        X    = T.matrix( 'X' )  # N x D
        lab    = T.ivector( 'lab' )  # N-dim
        eta  = T.scalar( 'eta' )
        mu   = T.scalar( 'mu' )
        lam  = T.scalar( 'lambda' )
        Y, Z = _T_output( self.Layers, X )
        cost = T.mean( _T_cost( Z, lab ) )

        updatesList = []
        for layer in self.Layers:
            gradW = T.grad( cost, layer.W )
            #dWnew = -eta * gradW + mu * layer.dW
            dWnew = -eta * ( gradW + lam * layer.W ) + mu * layer.dW
            Wnew  = layer.W + dWnew
            updatesList.append( ( layer.W, Wnew ) )
            updatesList.append( ( layer.dW, dWnew ) )
            if layer.withBias:
                gradb = T.grad( cost, layer.b )
                # no weight decay for bias
                dbnew = -eta * gradb + mu * layer.db
                bnew  = layer.b + dbnew
                updatesList.append( ( layer.b, bnew ) )
                updatesList.append( ( layer.db, dbnew ) )

        return theano.function( [ X, lab, eta, mu, lam ], cost, updates = updatesList )


def _T_output( Layers, X ):

    Zprev = X
    for layer in Layers:
        Y, Z = layer.output( Zprev )
        Zprev = Z

    return Y, Z


def _T_cost( Z, lab ):

    return T.nnet.categorical_crossentropy( Z, lab )
	import numpy as np
	import scipy as sp
	import datetime

	import cifar10
	import convnet150712 as convnet


	# ZCA whitening
	def ZCAtrans( Xraw, Uzca = None ):

	Xraw2 = Xraw.reshape( ( Xraw.shape[0], -1 ) )

	if Uzca == None:
	# Xraw is assumed to be zero-mean
	C = np.dot( Xraw2.T, Xraw2 ) / Xraw2.shape[0]
	U, eva, V = np.linalg.svd( C ) # U[:, i] is the i-th eigenvector
	sqeva = np.sqrt( eva + 0.001 )
	Uzca = np.dot( U / sqeva[np.newaxis, :], U.T )
	X = np.dot( Xraw2, Uzca ).reshape( Xraw.shape )
	return X, Uzca
	else:
	X = np.dot( Xraw2, Uzca ).reshape( Xraw.shape )
	return X


	# mini batch indicies for stochastic gradient ascent
	def makebatchindex( N, batchsize ):

	idx = np.random.permutation( N )
	nbatch = int( np.ceil( float( N ) / batchsize ) )
	idxB = np.zeros( ( nbatch, N ), dtype = bool )
	for ib in range( nbatch - 1 ):
	idxB[ib, idx[ibbatchsize:(ib+1)batchsize]] = True
	ib = nbatch - 1
	idxB[ib, idx[ib*batchsize:]] = True

	return idxB


	# computing the recognition rate
	def recograte( cnn, X, label, batchsize ):

	N = X.shape[0]
	nbatch = int( np.ceil( float( N ) / batchsize ) )

	LL = 0.0
	cnt = 0
	for ib in range( nbatch - 1 ):
	ii = np.arange( ibbatchsize, (ib+1)batchsize )
	Y, Z = cnn.output( X[ii] )
	LL += np.sum( cnn.cost( Z, label[ii] ) )
	cnt += np.sum( label[ii] == np.argmax( Z, axis = 1 ) )
	ib = nbatch - 1
	ii = np.arange( ib*batchsize, N )
	Y, Z = cnn.output( X[ii] )
	LL += np.sum( cnn.cost( Z, label[ii] ) )
	cnt += np.sum( label[ii] == np.argmax( Z, axis = 1 ) )

	return LL / N, float( cnt ) / N



	# Conv-Pool-ReLu-Softmax
	def CPRS( Xnch, Xrow, Xcol, K ):

	Xdim = ( Xnch, Xrow, Xcol )
	W1dim = ( 16, 5, 5 )
	#W1dim = ( 64, 5, 5 )
	#W1dim = ( 256, 5, 5 )
	ds1 = ( 4, 4 )
	#ds1 = ( 2, 2 )
	#st1 = None
	#st1 = ( 4, 4 )
	st1 = ( 2, 2 )
	L1conv = convnet.ConvLayer( Xdim, W1dim, 'linear', withBias = False )
	L1pool = convnet.PoolLayer( L1conv.Yshape, ds1, 'ReLu', withBias = True, st = st1 )
	H1 = L1pool.Dout
	H2 = 1000
	L2 = convnet.FullLayer( H1, H2, 'ReLu', withBias = True, T4toMat = True )
	L3 = convnet.FullLayer( H2, K, 'softmax', withBias = True, T4toMat = False )
	cnn = convnet.CNN( [ L1conv, L1pool, L2, L3 ] )
	print '### Conv-Pool-ReLu-Softmax Xdim:', Xdim
	print '# W1dim:', W1dim, ' ds1:', ds1, ' st1:', st1, ' H1:', H1
	print '# H2:', H2

	return cnn


	# Conv-Pool-ReLu-ReLu-Softmax
	def CPRRS( Xnch, Xrow, Xcol, K ):

	Xdim = ( Xnch, Xrow, Xcol )
	W1dim = ( 16, 5, 5 )
	#W1dim = ( 64, 5, 5 )
	ds1 = ( 4, 4 )
	#ds1 = ( 2, 2 )
	st1 = None
	#st1 = ( 4, 4 )
	#st1 = ( 2, 2 )
	L1conv = convnet.ConvLayer( Xdim, W1dim, 'linear', withBias = False )
	L1pool = convnet.PoolLayer( L1conv.Yshape, ds1, 'ReLu', withBias = True, st = st1 )
	H1 = L1pool.Dout
	H2 = 1000
	H3 = 1000
	L2 = convnet.FullLayer( H1, H2, 'ReLu', withBias = True, T4toMat = True )
	L3 = convnet.FullLayer( H2, H3, 'ReLu', withBias = True, T4toMat = False )
	L4 = convnet.FullLayer( H3, K, 'softmax', withBias = True, T4toMat = False )
	cnn = convnet.CNN( [ L1conv, L1pool, L2, L3, L4 ] )
	print '### Conv-Pool-ReLu-ReLu-Softmax Xdim:', Xdim
	print '# W1dim:', W1dim, ' ds1:', ds1, ' st1:', st1, ' H1:', H1
	print '# H2:', H2, ' H3:', H3

	return cnn


	if __name__ == "__main__":

	idstr = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
	print '### ID: ', idstr

	dirCIFAR10 = '../140823-pylearn2/data/cifar10/cifar-10-batches-py'
	cifar = cifar10.CIFAR10( dirCIFAR10 )
	ZCAwhitening = True

	##### setting the training data & the validation data
	#
	Xraw, label, t = cifar.loadData( 'L' )
	Xraw /= 255
	xm = np.mean( Xraw, axis = 0 )
	Xraw -= xm
	if ZCAwhitening:
	X, Uzca = ZCAtrans( Xraw, Uzca = None )
	else:
	X = Xraw
	X = np.asarray( X, dtype = np.float32 )
	label = np.asarray( label, dtype = np.int32 )

	idxL, idxV = cifar.genIndexLV( label )
	XL, labelL = X[idxL], label[idxL]
	XV, labelV = X[idxV], label[idxV]
	NL, Xnch, Xrow, Xcol = XL.shape
	NV, Xnch, Xrow, Xcol = XV.shape
	K = cifar.nclass
	Xdim = ( Xrow, Xcol )

	np.random.seed( 0 )
	batchsize = 100
	idxB = makebatchindex( NL, batchsize )
	nbatch = idxB.shape[0]


	##### training
	#
	cnn = CPRS( Xnch, Xrow, Xcol, K )
	#cnn = CPRRS( Xnch, Xrow, Xcol, K )

	eta, mu, lam = 0.01, 0.9, 0.0001
	nepoch = 50
	print '# eta = ', eta, ' mu = ', mu, ' lam = ', lam
	print '# ZCAwhitening = ', ZCAwhitening

	print '### training: NL = ', NL, ' NV = ', NV, ' K = ', K, ' batchsize = ', batchsize

	for i in range( nepoch ):

	# printing error rates etc.
	if (i <= 5 ) or ( i % 10 == 0 ):
	mnLLL, rrL = recograte( cnn, XL, labelL, batchsize )
	mnLLV, rrV = recograte( cnn, XV, labelV, batchsize )
	print '%d \| %.4f %.2f \| %.4f %.2f' % ( i, mnLLL, rrL * 100, mnLLV, rrV * 100 )

	# training (selecting each batch in random order)
	for ib in np.random.permutation( nbatch ):
	ii = idxB[ib, :]
	cnn.train( XL[ii], labelL[ii], eta, mu, lam )

	i = nepoch

	mnLLL, rrL = recograte( cnn, XL, labelL, batchsize )
	mnLLV, rrV = recograte( cnn, XV, labelV, batchsize )
	print '%d \| %.4f %.2f \| %.4f %.2f' % ( i, mnLLL, rrL * 100, mnLLV, rrV * 100 )


	##### setting the test data
	#
	XTraw, labelT, tT = cifar.loadData( 'T' )
	XTraw /= 255
	XTraw -= xm
	if ZCAwhitening:
	XT = ZCAtrans( XTraw, Uzca = Uzca )
	else:
	XT = XTraw
	XT = np.asarray( XT, dtype = np.float32 )
	labelT = np.asarray( labelT, dtype = np.int32 )

	NT, Nstack, Xrow, Xcol = XT.shape
	print '# NT = ', NT
	mnLLT, rrT = recograte( cnn, XT, labelT, batchsize )
	print '%d \| %.4f %.2f \| %.4f %.2f \| %.4f %.2f' % ( i, mnLLL, rrL * 100, mnLLV, rrV * 100, mnLLT, rrT * 100 )
	import numpy as np
	import theano
	import theano.tensor as T
	import theano.tensor.signal.downsample as Tsd

	import nnet150712 as nnet


	########## Convolution Layer ##########

	class ConvLayer( object ):

	def __init__( self, Xdim, Wdim, afunc, withBias, Wini = 0.01, floatX = theano.config.floatX ):

	# dimension of the input
	Xnch, Xrow, Xcol = Xdim
	self.Xshape = Xdim

	# dimension of the convolution filters
	Wnch, Wrow, Wcol = Wdim
	self.Wshape = ( Wnch, Xnch, Wrow, Wcol )

	# dimension of the output
	Yrow, Ycol = Xrow - Wrow + 1, Xcol - Wcol + 1
	self.Yshape = ( Wnch, Yrow, Ycol )
	self.Dout = Wnch * Yrow * Ycol

	# activation function of the layer
	self.afunc = nnet.d_afunc[afunc]
	self.withBias = withBias

	# theano shared variables
	self.W = theano.shared( np.array( nnet.randomN( self.Wshape, Wini ), dtype = floatX ) )
	self.dW = theano.shared( np.zeros( self.Wshape, dtype = floatX ) )
	if withBias:
	self.b = theano.shared( np.zeros( Wnch, dtype = floatX ) )
	self.db = theano.shared( np.zeros( Wnch, dtype = floatX ) )


	def output( self, X ):

	# X: Ndat x Xshape, Y: Ndat x Yshape
	Xs = ( None, self.Xshape[0], self.Xshape[1], self.Xshape[2] )
	Ws = self.Wshape
	Y = T.nnet.conv.conv2d( X, self.W, image_shape = Xs, filter_shape = Ws )
	if self.withBias:
	b = self.b.dimshuffle( 'x', 0, 'x', 'x' ) # 1 x nch x 1 x 1
	Y += b
	Z = self.afunc( Y )

	return Y, Z # Ndat x Yshape



	########## Pooling Layer ##########

	class PoolLayer( object ):

	def __init__( self, Xdim, ds, afunc, withBias, st = None, floatX = theano.config.floatX ):

	# dimension of the input
	Xnch, Xrow, Xcol = Xdim
	self.Xshape = Xdim

	# parameters of the pooling layer
	self.ds = ds
	self.st = st
	self.ignore_border = False
	rv = Tsd.DownsampleFactorMax.out_shape( self.Xshape, ds, ignore_border = self.ignore_border, st = st )
	#self.Yshape = ( Xnch, rv[1], rv[2] )
	self.Yshape = tuple( rv )
	self.Dout = np.prod( self.Yshape )

	# activation function of the layer
	self.afunc = nnet.d_afunc[afunc]
	self.withBias = withBias

	# theano shared variables
	if withBias:
	self.b = theano.shared( np.zeros( Xnch, dtype = floatX ) )
	self.db = theano.shared( np.zeros( Xnch, dtype = floatX ) )


	def output( self, X ):

	# X: Ndat x Xshape
	Y = Tsd.max_pool_2d( X, self.ds, ignore_border = self.ignore_border, st = self.st ) # Ndat x Yshape
	if self.withBias:
	b = self.b.dimshuffle( 'x', 0, 'x', 'x' ) # 1 x nch x 1 x 1
	Y += b
	Z = self.afunc( Y )

	return Y, Z


	########## Full-Connection Layer ##########

	class FullLayer( nnet.Layer ):

	def __init__( self, Din, Nunit, afunc, withBias = True, Wini = 0.01, floatX = theano.config.floatX, T4toMat = False ):

	super( FullLayer, self ).__init__( Din, Nunit, afunc, withBias, Wini, floatX )
	self.T4toMat = T4toMat


	def super_output( self, X ):

	return super( FullLayer, self ).output( X )


	def output( self, X ):

	if self.T4toMat:
	return self.super_output( X.reshape( ( X.shape[0], -1 ) ) )
	else:
	return self.super_output( X )


	########## Convolutional Neural Net ##########

	class CNN( object ):

	def __init__( self, Layers ):

	# layers - list of Layer instances
	self.Layers = Layers

	# theano functions
	self.output = self._Tfunc_output()
	self.cost = self._Tfunc_cost()
	self.train = self._Tfunc_train()


	### theano function for output computation
	#
	def _Tfunc_output( self ):

	X = T.tensor4() # Ndat x Xnch x Xrow x Xcol
	Y, Z = nnet._T_output( self.Layers, X )

	return theano.function( [ X ], [ Y, Z ] )


	### theano function for cost computation
	#
	def _Tfunc_cost( self ):

	Z = T.matrix() # N x K
	lab = T.ivector() # N-dim
	cost = nnet._T_cost( Z, lab )

	return theano.function( [ Z, lab ], cost )


	### theano function for gradient descent learning
	#
	def _Tfunc_train( self ):

	X = T.tensor4( 'X' )
	lab = T.ivector( 'lab' )
	eta = T.scalar( 'eta' )
	mu = T.scalar( 'mu' )
	lam = T.scalar( 'lambda' )
	Y, Z = nnet._T_output( self.Layers, X )
	cost = T.mean( nnet._T_cost( Z, lab ) )

	updatesList = []
	for il, layer in enumerate( self.Layers ):

	# PoolLayer doesn't have W & dW
	if not isinstance( layer, PoolLayer ):
	gradW = T.grad( cost, layer.W )
	#dWnew = -eta * gradW + mu * layer.dW
	dWnew = -eta * ( gradW + lam * layer.W ) + mu * layer.dW
	Wnew = layer.W + dWnew
	updatesList.append( ( layer.W, Wnew ) )
	updatesList.append( ( layer.dW, dWnew ) )
	if layer.withBias:
	gradb = T.grad( cost, layer.b )
	# no weight decay for bias
	dbnew = -eta * gradb + mu * layer.db
	bnew = layer.b + dbnew
	updatesList.append( ( layer.b, bnew ) )
	updatesList.append( ( layer.db, dbnew ) )


	return theano.function( [ X, lab, eta, mu, lam ], cost, updates = updatesList )