takatakamanbou/00readme.md Secret

## 00readme.md

      
    Raw
  

              00readme.md
            
          
    ex150904

see http://takatakamanbou.hatenablog.com/entry/2015/09/07/171648
source


ex150904.py  see below
nnet150903.py  https://gist.github.com/takatakamanbou/2471f7b4de908a601646
mnist0118.py  https://gist.github.com/takatakamanbou/a6a36598a2e2bf73ccd1#file-mnist0118-py

experimental results


result_woDO.txt  without Dropout
result_DO1.txt  with dropout using nnet.T_update()
result_DO2.txt  with dropout using nnet.T_updateMasked()
result_DO3.txt  with dropout using nnet.T_updateMasked2()


## ex150904.py
import numpy as np
import scipy as sp
import mnist0118 as mnist
import nnet150903 as nnet


def gendat( mnist ):

    label = np.asarray( mnist.getLabel(), dtype = np.int32 )
    X = np.array( mnist.getImage() / 255, dtype = np.float32 )  # => in [0,1]

    return X, label


def MLP2( D, H1, K, dropout = False ):

    if dropout:
        do = [ 0.8, 0.5, 1.0 ]
    else:
        do = [ 1.0, 1.0, 1.0 ]

    L0 = nnet.InputLayer( D, dropout = do[0] )
    L1 = nnet.Layer( D,  H1, 'ReLu',   withBias = True, Wini = 0.01, dropout = do[1] )
    L2 = nnet.Layer( H1, K,  'linear', withBias = True, Wini = 0.01, dropout = do[2] )
    mlp = nnet.MLP( [ L0, L1, L2 ] )

    return mlp


def MLP3( D, H1, H2, K, dropout = False ):

    if dropout:
        do = [ 0.8, 0.5, 0.5, 1.0 ]
    else:
        do = [ 1.0, 1.0, 1.0, 1.0 ]

    L0 = nnet.InputLayer( D, dropout = do[0] )
    L1 = nnet.Layer( D,  H1, 'ReLu',   withBias = True, Wini = 0.01, dropout = do[1] )
    L2 = nnet.Layer( H1, H2, 'ReLu',   withBias = True, Wini = 0.01, dropout = do[2] )
    L3 = nnet.Layer( H2, K,  'linear', withBias = True, Wini = 0.01, dropout = do[3] )
    mlp = nnet.MLP( [ L0, L1, L2, L3 ] )

    return mlp


# mini batch indicies for stochastic gradient ascent
def makebatchindex( N, batchsize ):

    idx = np.random.permutation( N )
    nbatch = int( np.ceil( float( N ) / batchsize ) )
    idxB = np.zeros( ( nbatch, N ), dtype = bool )
    for ib in range( nbatch - 1 ):
        idxB[ib, idx[ib*batchsize:(ib+1)*batchsize]] = True
    ib = nbatch - 1
    idxB[ib, idx[ib*batchsize:]] = True

    return idxB


def errorrate( mlp, X, label, batchsize ):

    N = X.shape[0]
    nbatch = int( np.ceil( float( N ) / batchsize ) )

    LL = 0.0
    cnt = 0
    for ib in range( nbatch - 1 ):
        ii = np.arange( ib*batchsize, (ib+1)*batchsize )
        Z  = mlp.output( X[ii] )
        LL += mlp.cost( Z, label[ii] ) * ii.shape[0]
        cnt += np.sum( label[ii] != np.argmax( Z, axis = 1 ) )
    ib = nbatch - 1
    ii = np.arange( ib*batchsize, N )
    Z  = mlp.output( X[ii] )
    LL += mlp.cost( Z, label[ii] ) * ii.shape[0]
    cnt += np.sum( label[ii] != np.argmax( Z, axis = 1 ) )

    return LL / N, float( cnt ) / N


def weightnorm( mlp ):

    W2 = np.empty( len( mlp.Layers ) - 1 )
    for i, layer in enumerate( mlp.Layers[1:] ):
        Wb = layer.getWeight()
        if layer.withBias:
            W = Wb[0]
        else:
            W = Wb
        W2[i] = np.mean( np.square( W ) )

    return W2


if __name__ == "__main__":

    np.random.seed( 1 )

    ##### setting the training data & the validation data
    #
    mn = mnist.MNIST( 'L' )
    K = mn.nclass
    X, label = gendat( mn )
    xm = np.mean( X, axis = 0 )
    X -= xm
    XL, labelL = X[:50000], label[:50000]
    XV, labelV = X[50000:], label[50000:]
    NL, D = XL.shape
    NV, D = XV.shape


    ##### mini batch indicies for stochastic gradient ascent
    #
    batchsize = 100
    idxB = makebatchindex( NL, batchsize )
    nbatch = idxB.shape[0]


    ##### initializing
    #
    #mlp = MLP2( D, 1000, K )
    mlp = MLP3( D, 1000, 1000, K, dropout = True )
    #mlp = MLP3( D, 2000, 2000, K )
    for i, layer in enumerate( mlp.Layers ):
        if i == 0:
            print '# Layer 0 : Input (', layer.Din, ') dropout = ', layer.dropout
        else:
            print '# Layer', i, ':', layer.afunc, '(', layer.Din, 'x', layer.Nunit, ') dropout = ', layer.dropout

    ##### training
    #
    eta, mu, lam = 0.1, 0.9, 0.0
    nepoch = 100

    print '### training:   NL = ', NL, 'NV = ', NV, ' batchsize = ', batchsize
    print '#               eta = ', eta, 'mu = ', mu, 'lam = ', lam

    i = 0
    mnLLL, erL = errorrate( mlp, XL, labelL, batchsize )
    mnLLV, erV = errorrate( mlp, XV, labelV, batchsize )
    print '%d | %.4f %.2f | %.4f %.2f' % ( i, mnLLL, erL * 100, mnLLV, erV * 100 ),
    w2 = weightnorm( mlp )
    print ' | ', w2

    for i in range( 1, nepoch ):

        # training (selecting each batch in random order)
        for ib in np.random.permutation( nbatch ):
            ii = idxB[ib, :]
            mlp.train( XL[ii], labelL[ii], eta, mu, lam )

        # printing error rates etc.
        if ( i < 10 ) or ( i % 10 == 0 ):
        #if i % 10 == 0:
            mnLLL, erL = errorrate( mlp, XL, labelL, batchsize )
            mnLLV, erV = errorrate( mlp, XV, labelV, batchsize )
            print '%d | %.4f %.2f | %.4f %.2f' % ( i, mnLLL, erL * 100, mnLLV, erV * 100 ),
            w2 = weightnorm( mlp )
            print ' | ', w2


    i = nepoch

    ##### setting the test data
    #
    mn = mnist.MNIST( 'T' )
    XT, labelT = gendat( mn )
    XT -= xm
    NT, D = XT.shape
    print '# NT = ', NT
    mnLLT, erT = errorrate( mlp, XT, labelT, batchsize )
    print '%d | %.4f %.2f | %.4f %.2f | %.4f %.2f' % ( i, mnLLL, erL * 100, mnLLV, erV * 100, mnLLT, erT * 100 )

## result_DO1.txt
##### result obtained with dropout (1) #####

# using nnet.T_update()


$ time THEANO_FLAGS='floatX=float32,device=gpu0' python ex150904.py
Using gpu device 0: Tesla K20c
# Layer 0 : Input ( 784 ) dropout =  0.8
# Layer 1 : ReLu ( 784 x 1000 ) dropout =  0.5
# Layer 2 : ReLu ( 1000 x 1000 ) dropout =  0.5
# Layer 3 : linear ( 1000 x 10 ) dropout =  1.0
### training:   NL =  50000 NV =  10000  batchsize =  100
#               eta =  0.1 mu =  0.9 lam =  0.0
0 | 2.3026 92.95 | 2.3027 93.31  |  [  9.98441756e-05   9.97784300e-05   9.98655232e-05]
1 | 0.1889 5.82 | 0.1785 5.55  |  [ 0.00031995  0.00023307  0.01017031]
2 | 0.1050 3.28 | 0.1119 3.28  |  [ 0.00050071  0.00033251  0.01328769]
3 | 0.0726 2.20 | 0.0860 2.78  |  [ 0.00064703  0.00040984  0.01534916]
4 | 0.0556 1.74 | 0.0764 2.25  |  [ 0.00077867  0.00048094  0.01686583]
5 | 0.0506 1.54 | 0.0750 2.21  |  [ 0.00089908  0.00054449  0.01814652]
6 | 0.0390 1.19 | 0.0724 1.98  |  [ 0.00100988  0.00060491  0.01935988]
7 | 0.0318 0.92 | 0.0671 1.94  |  [ 0.00110785  0.0006594   0.02031165]
8 | 0.0264 0.78 | 0.0620 1.77  |  [ 0.00120683  0.00071318  0.02122617]
9 | 0.0225 0.67 | 0.0609 1.82  |  [ 0.00129922  0.0007626   0.02220863]
10 | 0.0228 0.68 | 0.0603 1.66  |  [ 0.00138951  0.00081236  0.02310262]
20 | 0.0068 0.18 | 0.0550 1.43  |  [ 0.00214829  0.00123889  0.02996662]
30 | 0.0035 0.10 | 0.0579 1.31  |  [ 0.00275098  0.00158488  0.03502906]
40 | 0.0015 0.03 | 0.0523 1.17  |  [ 0.00325752  0.0018803   0.03880851]
50 | 0.0010 0.02 | 0.0563 1.22  |  [ 0.00370175  0.00214782  0.04204497]
60 | 0.0006 0.01 | 0.0581 1.30  |  [ 0.00409431  0.0023857   0.04454408]
70 | 0.0004 0.01 | 0.0576 1.22  |  [ 0.00444975  0.00260766  0.04707579]
80 | 0.0005 0.01 | 0.0546 1.17  |  [ 0.00479328  0.00281817  0.04927089]
90 | 0.0003 0.01 | 0.0594 1.18  |  [ 0.0051061   0.00301716  0.05107509]
# NT =  10000
100 | 0.0003 0.01 | 0.0594 1.18 | 0.0513 1.12

real    4m26.345s
user    3m14.279s
sys    1m11.698s

seed = 1  100 | 0.0003 0.01 | 0.0594 1.18 | 0.0513 1.12  real    4m13.466s
seed = 2  100 | 0.0003 0.01 | 0.0588 1.10 | 0.0507 1.21  real    4m14.241s
seed = 3  100 | 0.0002 0.00 | 0.0642 1.23 | 0.0471 1.16  real    4m15.080s

## result_DO2.txt
##### result obtained with dropout (2) #####

# using nnet.T_updateMasked()

$ time THEANO_FLAGS='floatX=float32,device=gpu0' python ex150904.py
Using gpu device 0: Tesla K20c
# Layer 0 : Input ( 784 ) dropout =  0.8
# Layer 1 : ReLu ( 784 x 1000 ) dropout =  0.5
# Layer 2 : ReLu ( 1000 x 1000 ) dropout =  0.5
# Layer 3 : linear ( 1000 x 10 ) dropout =  1.0
### training:   NL =  50000 NV =  10000  batchsize =  100
#               eta =  0.1 mu =  0.9 lam =  0.0
0 | 2.3026 92.95 | 2.3027 93.31  |  [  9.98441756e-05   9.97784300e-05   9.98655232e-05]
1 | 0.1887 5.77 | 0.1760 5.37  |  [ 0.00031389  0.00022934  0.01001292]
2 | 0.1034 3.18 | 0.1099 3.24  |  [ 0.00049193  0.0003279   0.01334954]
3 | 0.0735 2.21 | 0.0865 2.66  |  [ 0.00063866  0.00040622  0.01552608]
4 | 0.0554 1.68 | 0.0779 2.42  |  [ 0.0007723   0.00047857  0.0171594 ]
5 | 0.0457 1.41 | 0.0744 2.18  |  [ 0.00089417  0.00054387  0.01857153]
6 | 0.0383 1.14 | 0.0716 1.95  |  [ 0.00100706  0.00060475  0.01974435]
7 | 0.0303 0.92 | 0.0684 1.88  |  [ 0.00110892  0.0006606   0.02077041]
8 | 0.0252 0.71 | 0.0613 1.75  |  [ 0.00120774  0.00071534  0.02166135]
9 | 0.0210 0.65 | 0.0616 1.75  |  [ 0.00130167  0.00076647  0.02272711]
10 | 0.0209 0.62 | 0.0619 1.81  |  [ 0.0013926   0.00081686  0.02351622]
20 | 0.0066 0.17 | 0.0549 1.44  |  [ 0.0021621  0.0012522  0.0304906]
30 | 0.0029 0.07 | 0.0577 1.37  |  [ 0.00277483  0.00160787  0.03551942]
40 | 0.0016 0.04 | 0.0560 1.31  |  [ 0.00327066  0.00190542  0.03951812]
50 | 0.0010 0.02 | 0.0597 1.23  |  [ 0.00370507  0.00217238  0.0427468 ]
60 | 0.0006 0.00 | 0.0568 1.23  |  [ 0.00409685  0.00240636  0.04532112]
70 | 0.0003 0.00 | 0.0588 1.24  |  [ 0.00445958  0.00262472  0.0475995 ]
80 | 0.0003 0.00 | 0.0575 1.19  |  [ 0.00479193  0.00283209  0.04952905]
90 | 0.0003 0.00 | 0.0636 1.25  |  [ 0.00509129  0.00302375  0.05154153]
# NT =  10000
100 | 0.0003 0.00 | 0.0636 1.25 | 0.0510 1.13

real    4m41.910s
user    3m29.095s
sys    1m12.510s

## result_DO3.txt
##### result obtained with dropout (3) #####

# using nnet.T_updateMasked2()

$ time THEANO_FLAGS='floatX=float32,device=gpu0' python ex150904.py
Using gpu device 0: Tesla K20c
# Layer 0 : Input ( 784 ) dropout =  0.8
# Layer 1 : ReLu ( 784 x 1000 ) dropout =  0.5
# Layer 2 : ReLu ( 1000 x 1000 ) dropout =  0.5
# Layer 3 : linear ( 1000 x 10 ) dropout =  1.0
### training:   NL =  50000 NV =  10000  batchsize =  100
#               eta =  0.1 mu =  0.9 lam =  0.0
0 | 2.3026 92.95 | 2.3027 93.31  |  [  9.98441756e-05   9.97784300e-05   9.98655232e-05]
1 | 0.1917 5.88 | 0.1790 5.46  |  [ 0.00030902  0.00022561  0.0098097 ]
2 | 0.1047 3.22 | 0.1111 3.20  |  [ 0.00048939  0.00032688  0.0132591 ]
3 | 0.0725 2.18 | 0.0857 2.61  |  [ 0.00063783  0.0004066   0.01556546]
4 | 0.0568 1.78 | 0.0790 2.36  |  [ 0.00077272  0.00047931  0.01716605]
5 | 0.0457 1.44 | 0.0717 2.08  |  [ 0.00089393  0.00054522  0.0186321 ]
6 | 0.0377 1.10 | 0.0682 1.90  |  [ 0.00100142  0.00060648  0.01988898]
7 | 0.0305 0.93 | 0.0654 1.85  |  [ 0.00110259  0.00066296  0.02094646]
8 | 0.0256 0.74 | 0.0613 1.73  |  [ 0.00120326  0.00071845  0.02190425]
9 | 0.0213 0.64 | 0.0577 1.61  |  [ 0.00129752  0.00077054  0.0229095 ]
10 | 0.0196 0.56 | 0.0569 1.68  |  [ 0.00138678  0.00082089  0.02370521]
20 | 0.0064 0.17 | 0.0547 1.36  |  [ 0.00213722  0.00125381  0.03101237]
30 | 0.0028 0.05 | 0.0535 1.31  |  [ 0.00274978  0.00161166  0.03611655]
40 | 0.0015 0.02 | 0.0529 1.26  |  [ 0.00325037  0.00191021  0.04013891]
50 | 0.0009 0.02 | 0.0552 1.27  |  [ 0.00368539  0.00217701  0.04348322]
60 | 0.0005 0.01 | 0.0559 1.17  |  [ 0.00408166  0.00241876  0.0460985 ]
70 | 0.0004 0.00 | 0.0575 1.28  |  [ 0.0044459   0.00264775  0.04849945]
80 | 0.0004 0.01 | 0.0542 1.16  |  [ 0.00477751  0.00285784  0.05047518]
90 | 0.0002 0.00 | 0.0619 1.28  |  [ 0.00508282  0.00305433  0.05214791]
# NT =  10000
100 | 0.0002 0.00 | 0.0619 1.28 | 0.0521 1.24

real    12m16.551s
user    10m29.532s
sys    1m45.592s

## result_woDO.txt
##### result obtained without dropout #####

$ time THEANO_FLAGS='floatX=float32,device=gpu0' python ex150904.py
Using gpu device 0: Tesla K20c
# Layer 0 : Input ( 784 ) dropout =  1.0
# Layer 1 : ReLu ( 784 x 1000 ) dropout =  1.0
# Layer 2 : ReLu ( 1000 x 1000 ) dropout =  1.0
# Layer 3 : linear ( 1000 x 10 ) dropout =  1.0
### training:   NL =  50000 NV =  10000  batchsize =  100
#               eta =  0.1 mu =  0.9 lam =  0.0
0 | 2.3028 92.95 | 2.3029 93.31  |  [  9.98441756e-05   9.97784300e-05   9.98655232e-05]
1 | 0.0933 2.85 | 0.1146 3.38  |  [ 0.00026057  0.00015942  0.00438416]
2 | 0.0518 1.67 | 0.0894 2.44  |  [ 0.00035918  0.00019641  0.00536437]
3 | 0.0351 1.15 | 0.0822 2.33  |  [ 0.00043118  0.00022391  0.00620047]
4 | 0.0250 0.81 | 0.0894 2.47  |  [ 0.00049831  0.00024913  0.00690446]
5 | 0.0203 0.70 | 0.0845 2.19  |  [ 0.00055695  0.00027102  0.00753349]
6 | 0.0265 0.88 | 0.1023 2.47  |  [ 0.00060401  0.00028876  0.00813732]
7 | 0.0162 0.58 | 0.1086 2.43  |  [ 0.00065404  0.00030656  0.00857737]
8 | 0.0059 0.19 | 0.0948 2.01  |  [ 0.00069271  0.00032126  0.00907713]
9 | 0.0032 0.08 | 0.0832 1.85  |  [ 0.00071972  0.00033147  0.00940243]
10 | 0.0028 0.06 | 0.0886 1.74  |  [ 0.00074086  0.00033896  0.00970113]
20 | 0.0000 0.00 | 0.0870 1.51  |  [ 0.00076904  0.0003519   0.01059568]
30 | 0.0000 0.00 | 0.0902 1.45  |  [ 0.00077228  0.00035419  0.01083231]
40 | 0.0000 0.00 | 0.0925 1.45  |  [ 0.00077451  0.00035576  0.01099472]
# NT =  10000
50 | 0.0000 0.00 | 0.0925 1.45 | 0.0857 1.46

real    2m14.636s
user    1m35.877s
sys    0m38.879s

seed = 1  50 | 0.0000 0.00 | 0.0925 1.45 | 0.0857 1.46   real    2m2.908s
seed = 2  50 | 0.0000 0.00 | 0.0949 1.45 | 0.0834 1.41   real    2m2.661s
seed = 3  50 | 0.0000 0.00 | 0.1022 1.55 | 0.0882 1.47   real    2m2.639s
	import numpy as np
	import scipy as sp
	import mnist0118 as mnist
	import nnet150903 as nnet


	def gendat( mnist ):

	label = np.asarray( mnist.getLabel(), dtype = np.int32 )
	X = np.array( mnist.getImage() / 255, dtype = np.float32 ) # => in [0,1]

	return X, label


	def MLP2( D, H1, K, dropout = False ):

	if dropout:
	do = [ 0.8, 0.5, 1.0 ]
	else:
	do = [ 1.0, 1.0, 1.0 ]

	L0 = nnet.InputLayer( D, dropout = do[0] )
	L1 = nnet.Layer( D, H1, 'ReLu', withBias = True, Wini = 0.01, dropout = do[1] )
	L2 = nnet.Layer( H1, K, 'linear', withBias = True, Wini = 0.01, dropout = do[2] )
	mlp = nnet.MLP( [ L0, L1, L2 ] )

	return mlp


	def MLP3( D, H1, H2, K, dropout = False ):

	if dropout:
	do = [ 0.8, 0.5, 0.5, 1.0 ]
	else:
	do = [ 1.0, 1.0, 1.0, 1.0 ]

	L0 = nnet.InputLayer( D, dropout = do[0] )
	L1 = nnet.Layer( D, H1, 'ReLu', withBias = True, Wini = 0.01, dropout = do[1] )
	L2 = nnet.Layer( H1, H2, 'ReLu', withBias = True, Wini = 0.01, dropout = do[2] )
	L3 = nnet.Layer( H2, K, 'linear', withBias = True, Wini = 0.01, dropout = do[3] )
	mlp = nnet.MLP( [ L0, L1, L2, L3 ] )

	return mlp


	# mini batch indicies for stochastic gradient ascent
	def makebatchindex( N, batchsize ):

	idx = np.random.permutation( N )
	nbatch = int( np.ceil( float( N ) / batchsize ) )
	idxB = np.zeros( ( nbatch, N ), dtype = bool )
	for ib in range( nbatch - 1 ):
	idxB[ib, idx[ibbatchsize:(ib+1)batchsize]] = True
	ib = nbatch - 1
	idxB[ib, idx[ib*batchsize:]] = True

	return idxB



	def errorrate( mlp, X, label, batchsize ):

	N = X.shape[0]
	nbatch = int( np.ceil( float( N ) / batchsize ) )

	LL = 0.0
	cnt = 0
	for ib in range( nbatch - 1 ):
	ii = np.arange( ibbatchsize, (ib+1)batchsize )
	Z = mlp.output( X[ii] )
	LL += mlp.cost( Z, label[ii] ) * ii.shape[0]
	cnt += np.sum( label[ii] != np.argmax( Z, axis = 1 ) )
	ib = nbatch - 1
	ii = np.arange( ib*batchsize, N )
	Z = mlp.output( X[ii] )
	LL += mlp.cost( Z, label[ii] ) * ii.shape[0]
	cnt += np.sum( label[ii] != np.argmax( Z, axis = 1 ) )

	return LL / N, float( cnt ) / N


	def weightnorm( mlp ):

	W2 = np.empty( len( mlp.Layers ) - 1 )
	for i, layer in enumerate( mlp.Layers[1:] ):
	Wb = layer.getWeight()
	if layer.withBias:
	W = Wb[0]
	else:
	W = Wb
	W2[i] = np.mean( np.square( W ) )

	return W2






	if __name__ == "__main__":

	np.random.seed( 1 )

	##### setting the training data & the validation data
	#
	mn = mnist.MNIST( 'L' )
	K = mn.nclass
	X, label = gendat( mn )
	xm = np.mean( X, axis = 0 )
	X -= xm
	XL, labelL = X[:50000], label[:50000]
	XV, labelV = X[50000:], label[50000:]
	NL, D = XL.shape
	NV, D = XV.shape


	##### mini batch indicies for stochastic gradient ascent
	#
	batchsize = 100
	idxB = makebatchindex( NL, batchsize )
	nbatch = idxB.shape[0]


	##### initializing
	#
	#mlp = MLP2( D, 1000, K )
	mlp = MLP3( D, 1000, 1000, K, dropout = True )
	#mlp = MLP3( D, 2000, 2000, K )
	for i, layer in enumerate( mlp.Layers ):
	if i == 0:
	print '# Layer 0 : Input (', layer.Din, ') dropout = ', layer.dropout
	else:
	print '# Layer', i, ':', layer.afunc, '(', layer.Din, 'x', layer.Nunit, ') dropout = ', layer.dropout

	##### training
	#
	eta, mu, lam = 0.1, 0.9, 0.0
	nepoch = 100

	print '### training: NL = ', NL, 'NV = ', NV, ' batchsize = ', batchsize
	print '# eta = ', eta, 'mu = ', mu, 'lam = ', lam

	i = 0
	mnLLL, erL = errorrate( mlp, XL, labelL, batchsize )
	mnLLV, erV = errorrate( mlp, XV, labelV, batchsize )
	print '%d \| %.4f %.2f \| %.4f %.2f' % ( i, mnLLL, erL * 100, mnLLV, erV * 100 ),
	w2 = weightnorm( mlp )
	print ' \| ', w2

	for i in range( 1, nepoch ):

	# training (selecting each batch in random order)
	for ib in np.random.permutation( nbatch ):
	ii = idxB[ib, :]
	mlp.train( XL[ii], labelL[ii], eta, mu, lam )

	# printing error rates etc.
	if ( i < 10 ) or ( i % 10 == 0 ):
	#if i % 10 == 0:
	mnLLL, erL = errorrate( mlp, XL, labelL, batchsize )
	mnLLV, erV = errorrate( mlp, XV, labelV, batchsize )
	print '%d \| %.4f %.2f \| %.4f %.2f' % ( i, mnLLL, erL * 100, mnLLV, erV * 100 ),
	w2 = weightnorm( mlp )
	print ' \| ', w2


	i = nepoch

	##### setting the test data
	#
	mn = mnist.MNIST( 'T' )
	XT, labelT = gendat( mn )
	XT -= xm
	NT, D = XT.shape
	print '# NT = ', NT
	mnLLT, erT = errorrate( mlp, XT, labelT, batchsize )
	print '%d \| %.4f %.2f \| %.4f %.2f \| %.4f %.2f' % ( i, mnLLL, erL * 100, mnLLV, erV * 100, mnLLT, erT * 100 )
	##### result obtained with dropout (1) #####

	# using nnet.T_update()


	$ time THEANO_FLAGS='floatX=float32,device=gpu0' python ex150904.py
	Using gpu device 0: Tesla K20c
	# Layer 0 : Input ( 784 ) dropout = 0.8
	# Layer 1 : ReLu ( 784 x 1000 ) dropout = 0.5
	# Layer 2 : ReLu ( 1000 x 1000 ) dropout = 0.5
	# Layer 3 : linear ( 1000 x 10 ) dropout = 1.0
	### training: NL = 50000 NV = 10000 batchsize = 100
	# eta = 0.1 mu = 0.9 lam = 0.0
	0 \| 2.3026 92.95 \| 2.3027 93.31 \| [ 9.98441756e-05 9.97784300e-05 9.98655232e-05]
	1 \| 0.1889 5.82 \| 0.1785 5.55 \| [ 0.00031995 0.00023307 0.01017031]
	2 \| 0.1050 3.28 \| 0.1119 3.28 \| [ 0.00050071 0.00033251 0.01328769]
	3 \| 0.0726 2.20 \| 0.0860 2.78 \| [ 0.00064703 0.00040984 0.01534916]
	4 \| 0.0556 1.74 \| 0.0764 2.25 \| [ 0.00077867 0.00048094 0.01686583]
	5 \| 0.0506 1.54 \| 0.0750 2.21 \| [ 0.00089908 0.00054449 0.01814652]
	6 \| 0.0390 1.19 \| 0.0724 1.98 \| [ 0.00100988 0.00060491 0.01935988]
	7 \| 0.0318 0.92 \| 0.0671 1.94 \| [ 0.00110785 0.0006594 0.02031165]
	8 \| 0.0264 0.78 \| 0.0620 1.77 \| [ 0.00120683 0.00071318 0.02122617]
	9 \| 0.0225 0.67 \| 0.0609 1.82 \| [ 0.00129922 0.0007626 0.02220863]
	10 \| 0.0228 0.68 \| 0.0603 1.66 \| [ 0.00138951 0.00081236 0.02310262]
	20 \| 0.0068 0.18 \| 0.0550 1.43 \| [ 0.00214829 0.00123889 0.02996662]
	30 \| 0.0035 0.10 \| 0.0579 1.31 \| [ 0.00275098 0.00158488 0.03502906]
	40 \| 0.0015 0.03 \| 0.0523 1.17 \| [ 0.00325752 0.0018803 0.03880851]
	50 \| 0.0010 0.02 \| 0.0563 1.22 \| [ 0.00370175 0.00214782 0.04204497]
	60 \| 0.0006 0.01 \| 0.0581 1.30 \| [ 0.00409431 0.0023857 0.04454408]
	70 \| 0.0004 0.01 \| 0.0576 1.22 \| [ 0.00444975 0.00260766 0.04707579]
	80 \| 0.0005 0.01 \| 0.0546 1.17 \| [ 0.00479328 0.00281817 0.04927089]
	90 \| 0.0003 0.01 \| 0.0594 1.18 \| [ 0.0051061 0.00301716 0.05107509]
	# NT = 10000
	100 \| 0.0003 0.01 \| 0.0594 1.18 \| 0.0513 1.12

	real 4m26.345s
	user 3m14.279s
	sys 1m11.698s

	seed = 1 100 \| 0.0003 0.01 \| 0.0594 1.18 \| 0.0513 1.12 real 4m13.466s
	seed = 2 100 \| 0.0003 0.01 \| 0.0588 1.10 \| 0.0507 1.21 real 4m14.241s
	seed = 3 100 \| 0.0002 0.00 \| 0.0642 1.23 \| 0.0471 1.16 real 4m15.080s
	##### result obtained with dropout (2) #####

	# using nnet.T_updateMasked()

	$ time THEANO_FLAGS='floatX=float32,device=gpu0' python ex150904.py
	Using gpu device 0: Tesla K20c
	# Layer 0 : Input ( 784 ) dropout = 0.8
	# Layer 1 : ReLu ( 784 x 1000 ) dropout = 0.5
	# Layer 2 : ReLu ( 1000 x 1000 ) dropout = 0.5
	# Layer 3 : linear ( 1000 x 10 ) dropout = 1.0
	### training: NL = 50000 NV = 10000 batchsize = 100
	# eta = 0.1 mu = 0.9 lam = 0.0
	0 \| 2.3026 92.95 \| 2.3027 93.31 \| [ 9.98441756e-05 9.97784300e-05 9.98655232e-05]
	1 \| 0.1887 5.77 \| 0.1760 5.37 \| [ 0.00031389 0.00022934 0.01001292]
	2 \| 0.1034 3.18 \| 0.1099 3.24 \| [ 0.00049193 0.0003279 0.01334954]
	3 \| 0.0735 2.21 \| 0.0865 2.66 \| [ 0.00063866 0.00040622 0.01552608]
	4 \| 0.0554 1.68 \| 0.0779 2.42 \| [ 0.0007723 0.00047857 0.0171594 ]
	5 \| 0.0457 1.41 \| 0.0744 2.18 \| [ 0.00089417 0.00054387 0.01857153]
	6 \| 0.0383 1.14 \| 0.0716 1.95 \| [ 0.00100706 0.00060475 0.01974435]
	7 \| 0.0303 0.92 \| 0.0684 1.88 \| [ 0.00110892 0.0006606 0.02077041]
	8 \| 0.0252 0.71 \| 0.0613 1.75 \| [ 0.00120774 0.00071534 0.02166135]
	9 \| 0.0210 0.65 \| 0.0616 1.75 \| [ 0.00130167 0.00076647 0.02272711]
	10 \| 0.0209 0.62 \| 0.0619 1.81 \| [ 0.0013926 0.00081686 0.02351622]
	20 \| 0.0066 0.17 \| 0.0549 1.44 \| [ 0.0021621 0.0012522 0.0304906]
	30 \| 0.0029 0.07 \| 0.0577 1.37 \| [ 0.00277483 0.00160787 0.03551942]
	40 \| 0.0016 0.04 \| 0.0560 1.31 \| [ 0.00327066 0.00190542 0.03951812]
	50 \| 0.0010 0.02 \| 0.0597 1.23 \| [ 0.00370507 0.00217238 0.0427468 ]
	60 \| 0.0006 0.00 \| 0.0568 1.23 \| [ 0.00409685 0.00240636 0.04532112]
	70 \| 0.0003 0.00 \| 0.0588 1.24 \| [ 0.00445958 0.00262472 0.0475995 ]
	80 \| 0.0003 0.00 \| 0.0575 1.19 \| [ 0.00479193 0.00283209 0.04952905]
	90 \| 0.0003 0.00 \| 0.0636 1.25 \| [ 0.00509129 0.00302375 0.05154153]
	# NT = 10000
	100 \| 0.0003 0.00 \| 0.0636 1.25 \| 0.0510 1.13

	real 4m41.910s
	user 3m29.095s
	sys 1m12.510s
	##### result obtained with dropout (3) #####

	# using nnet.T_updateMasked2()

	$ time THEANO_FLAGS='floatX=float32,device=gpu0' python ex150904.py
	Using gpu device 0: Tesla K20c
	# Layer 0 : Input ( 784 ) dropout = 0.8
	# Layer 1 : ReLu ( 784 x 1000 ) dropout = 0.5
	# Layer 2 : ReLu ( 1000 x 1000 ) dropout = 0.5
	# Layer 3 : linear ( 1000 x 10 ) dropout = 1.0
	### training: NL = 50000 NV = 10000 batchsize = 100
	# eta = 0.1 mu = 0.9 lam = 0.0
	0 \| 2.3026 92.95 \| 2.3027 93.31 \| [ 9.98441756e-05 9.97784300e-05 9.98655232e-05]
	1 \| 0.1917 5.88 \| 0.1790 5.46 \| [ 0.00030902 0.00022561 0.0098097 ]
	2 \| 0.1047 3.22 \| 0.1111 3.20 \| [ 0.00048939 0.00032688 0.0132591 ]
	3 \| 0.0725 2.18 \| 0.0857 2.61 \| [ 0.00063783 0.0004066 0.01556546]
	4 \| 0.0568 1.78 \| 0.0790 2.36 \| [ 0.00077272 0.00047931 0.01716605]
	5 \| 0.0457 1.44 \| 0.0717 2.08 \| [ 0.00089393 0.00054522 0.0186321 ]
	6 \| 0.0377 1.10 \| 0.0682 1.90 \| [ 0.00100142 0.00060648 0.01988898]
	7 \| 0.0305 0.93 \| 0.0654 1.85 \| [ 0.00110259 0.00066296 0.02094646]
	8 \| 0.0256 0.74 \| 0.0613 1.73 \| [ 0.00120326 0.00071845 0.02190425]
	9 \| 0.0213 0.64 \| 0.0577 1.61 \| [ 0.00129752 0.00077054 0.0229095 ]
	10 \| 0.0196 0.56 \| 0.0569 1.68 \| [ 0.00138678 0.00082089 0.02370521]
	20 \| 0.0064 0.17 \| 0.0547 1.36 \| [ 0.00213722 0.00125381 0.03101237]
	30 \| 0.0028 0.05 \| 0.0535 1.31 \| [ 0.00274978 0.00161166 0.03611655]
	40 \| 0.0015 0.02 \| 0.0529 1.26 \| [ 0.00325037 0.00191021 0.04013891]
	50 \| 0.0009 0.02 \| 0.0552 1.27 \| [ 0.00368539 0.00217701 0.04348322]
	60 \| 0.0005 0.01 \| 0.0559 1.17 \| [ 0.00408166 0.00241876 0.0460985 ]
	70 \| 0.0004 0.00 \| 0.0575 1.28 \| [ 0.0044459 0.00264775 0.04849945]
	80 \| 0.0004 0.01 \| 0.0542 1.16 \| [ 0.00477751 0.00285784 0.05047518]
	90 \| 0.0002 0.00 \| 0.0619 1.28 \| [ 0.00508282 0.00305433 0.05214791]
	# NT = 10000
	100 \| 0.0002 0.00 \| 0.0619 1.28 \| 0.0521 1.24

	real 12m16.551s
	user 10m29.532s
	sys 1m45.592s
	##### result obtained without dropout #####

	$ time THEANO_FLAGS='floatX=float32,device=gpu0' python ex150904.py
	Using gpu device 0: Tesla K20c
	# Layer 0 : Input ( 784 ) dropout = 1.0
	# Layer 1 : ReLu ( 784 x 1000 ) dropout = 1.0
	# Layer 2 : ReLu ( 1000 x 1000 ) dropout = 1.0
	# Layer 3 : linear ( 1000 x 10 ) dropout = 1.0
	### training: NL = 50000 NV = 10000 batchsize = 100
	# eta = 0.1 mu = 0.9 lam = 0.0
	0 \| 2.3028 92.95 \| 2.3029 93.31 \| [ 9.98441756e-05 9.97784300e-05 9.98655232e-05]
	1 \| 0.0933 2.85 \| 0.1146 3.38 \| [ 0.00026057 0.00015942 0.00438416]
	2 \| 0.0518 1.67 \| 0.0894 2.44 \| [ 0.00035918 0.00019641 0.00536437]
	3 \| 0.0351 1.15 \| 0.0822 2.33 \| [ 0.00043118 0.00022391 0.00620047]
	4 \| 0.0250 0.81 \| 0.0894 2.47 \| [ 0.00049831 0.00024913 0.00690446]
	5 \| 0.0203 0.70 \| 0.0845 2.19 \| [ 0.00055695 0.00027102 0.00753349]
	6 \| 0.0265 0.88 \| 0.1023 2.47 \| [ 0.00060401 0.00028876 0.00813732]
	7 \| 0.0162 0.58 \| 0.1086 2.43 \| [ 0.00065404 0.00030656 0.00857737]
	8 \| 0.0059 0.19 \| 0.0948 2.01 \| [ 0.00069271 0.00032126 0.00907713]
	9 \| 0.0032 0.08 \| 0.0832 1.85 \| [ 0.00071972 0.00033147 0.00940243]
	10 \| 0.0028 0.06 \| 0.0886 1.74 \| [ 0.00074086 0.00033896 0.00970113]
	20 \| 0.0000 0.00 \| 0.0870 1.51 \| [ 0.00076904 0.0003519 0.01059568]
	30 \| 0.0000 0.00 \| 0.0902 1.45 \| [ 0.00077228 0.00035419 0.01083231]
	40 \| 0.0000 0.00 \| 0.0925 1.45 \| [ 0.00077451 0.00035576 0.01099472]
	# NT = 10000
	50 \| 0.0000 0.00 \| 0.0925 1.45 \| 0.0857 1.46

	real 2m14.636s
	user 1m35.877s
	sys 0m38.879s

	seed = 1 50 \| 0.0000 0.00 \| 0.0925 1.45 \| 0.0857 1.46 real 2m2.908s
	seed = 2 50 \| 0.0000 0.00 \| 0.0949 1.45 \| 0.0834 1.41 real 2m2.661s
	seed = 3 50 \| 0.0000 0.00 \| 0.1022 1.55 \| 0.0882 1.47 real 2m2.639s