{{ message }}

Instantly share code, notes, and snippets.

# takatakamanbou/00_ex160323.md Secret

Last active Mar 26, 2016
ex160323

## ex160323

This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode characters
 from __future__ import print_function import numpy as np import scipy as sp import datetime import mnist import convnet160323 as convnet def gendat( mnist, LT ): label = np.asarray( mnist.getLabel( LT ), dtype = np.int32 ) X = np.array( mnist.getImage( LT ) / 256, dtype = np.float32 ) # => in [0,1] return X, label # mini batch indicies for stochastic gradient ascent def makebatchindex( N, batchsize ): idx = np.random.permutation( N ) nbatch = int( np.ceil( float( N ) / batchsize ) ) idxB = np.zeros( ( nbatch, N ), dtype = bool ) for ib in range( nbatch - 1 ): idxB[ib, idx[ib*batchsize:(ib+1)*batchsize]] = True ib = nbatch - 1 idxB[ib, idx[ib*batchsize:]] = True return idxB # computing the recognition rate def errorrate( cnn, X, label, batchsize ): N = X.shape[0] nbatch = int( np.ceil( float( N ) / batchsize ) ) LL = 0.0 cnt = 0 for ib in range( nbatch - 1 ): ii = np.arange( ib*batchsize, (ib+1)*batchsize ) Z = cnn.output( X[ii] ) LL += np.sum( cnn.cost( Z, label[ii] ) ) cnt += np.sum( label[ii] != np.argmax( Z, axis = 1 ) ) ib = nbatch - 1 ii = np.arange( ib*batchsize, N ) Z = cnn.output( X[ii] ) LL += np.sum( cnn.cost( Z, label[ii] ) ) cnt += np.sum( label[ii] != np.argmax( Z, axis = 1 ) ) return LL / N, float( cnt ) / N # Conv-Pool-Conv-Pool-ReLu-Softmax def CPCPRS( Xnch, Xrow, Xcol, K, dropout = False ): if dropout: do = [ 0.8, 0.8, 0.5, 1.0 ] else: do = [ 1.0, 1.0, 1.0, 1.0 ] # 0th Layer Xdim = ( Xnch, Xrow, Xcol ) L0 = convnet.T4InputLayer( Xdim, dropout = do[0] ) # 1st Layer ( Conv-Pool ) W1dim = ( 20, 5, 5 ) b1 = 'valid' L1conv = convnet.ConvLayer( Xdim, W1dim, 'linear', withBias = True, border_mode = b1 ) ds1 = ( 2, 2 ) st1 = None L1pool = convnet.PoolLayer( L1conv.Yshape, ds1, st = st1, dropout = do[1] ) H1 = L1pool.Dout # 2nd Layer ( Conv-Pool ) W2dim = ( 50, 5, 5 ) b2 = 'valid' L2conv = convnet.ConvLayer( L1pool.Yshape, W2dim, 'linear', withBias = True, border_mode = b2 ) ds2 = ( 2, 2 ) st2 = None L2pool = convnet.PoolLayer( L2conv.Yshape, ds2, st = st2, dropout = do[2] ) H2 = L2pool.Dout # 3rd Layer ( Full Connect ) H3 = 500 L3 = convnet.FullLayer( H2, H3, 'ReLu', withBias = True, T4toMat = True ) # 4th Layer ( Softmax ) L4 = convnet.FullLayer( H3, K, 'linear', withBias = True, T4toMat = False ) cnn = convnet.CNN( [ L0, L1conv, L1pool, L2conv, L2pool, L3, L4 ] ) print( '### Conv-Pool-Conv-Pool-ReLu-Softmax' ) print( '# T4InputLayer: ', L0.Xshape, ' dropout = ', L0.dropout ) layer = L1conv print( '# ConvLayer: ', layer.Wshape, layer.Yshape, ' bmode = ', layer.border_mode ) layer = L1pool print( '# PoolLayer: ', ' ds = ', layer.ds, ' st = ', layer.st, ' dropout = ', layer.dropout, layer.Yshape, layer.Dout ) layer = L2conv print( '# ConvLayer: ', layer.Wshape, layer.Yshape, ' bmode = ', layer.border_mode ) layer = L2pool print( '# PoolLayer: ', ' ds = ', layer.ds, ' st = ', layer.st, ' dropout = ', layer.dropout, layer.Yshape, layer.Dout ) layer = L3 print( '# FullLayer: ', layer.Din, layer.Nunit, layer.afunc, layer.dropout ) layer = L4 print( '# FullLayer: ', layer.Din, layer.Nunit, layer.afunc, layer.dropout ) return cnn if __name__ == "__main__": idstr = datetime.datetime.now().strftime('%Y%m%d-%H%M%S') print( '### ID: ', idstr ) mn = mnist.MNIST( pathMNIST = '../150117-mnist/' ) ##### setting the training data # K = mn.nclass Xnch = 1 Xrow, Xcol = 28, 28 Xdim = ( Xrow, Xcol ) Xraw, label = gendat( mn, 'L' ) X = np.reshape( Xraw, ( -1, Xnch, Xcol, Xrow ) ) XL, labelL = X, label NL = XL.shape[0] ##### setting the test data # XrawT, labelT = gendat( mn, 'T' ) XT = np.reshape( XrawT, ( -1, Xnch, Xcol, Xrow ) ) NT = XT.shape[0] ##### mini batch indicies for stochastic gradient ascent # batchsize = 100 idxB = makebatchindex( NL, batchsize ) nbatch = idxB.shape[0] ##### training # cnn = CPCPRS( Xnch, Xrow, Xcol, K ) eta = 0.01 mu = 0.9 lam = 0.0005 nt = 10000 print( '### training: NL = ', NL, ' batchsize = ', batchsize ) print( '# eta = ', eta, 'mu = ', mu, 'lam = ', lam ) for i in range( nt ): if i % 500 == 0: mnLLL, erL = errorrate( cnn, XL, labelL, batchsize ) mnLLT, erT = errorrate( cnn, XT, labelT, batchsize ) print( '%d | %.4f %.2f | %.4f %.2f' % ( i, mnLLL, erL * 100, mnLLT, erT * 100 ) ) # training ib = np.random.randint( 0, nbatch ) ii = idxB[ib, :] cnn.train( XL[ii], labelL[ii], eta, mu, lam ) i = nt mnLLL, erL = errorrate( cnn, XL, labelL, batchsize ) mnLLT, erT = errorrate( cnn, XT, labelT, batchsize ) print( '%d | %.4f %.2f | %.4f %.2f' % ( i, mnLLL, erL * 100, mnLLT, erT * 100 ) ) print( '### ID: ', idstr )