Instantly share code, notes, and snippets.

@takatakamanbou /00readme.md Secret
Last active Sep 7, 2015

Embed
What would you like to do?
import numpy as np
import scipy as sp
import mnist0118 as mnist
import nnet150903 as nnet
def gendat( mnist ):
label = np.asarray( mnist.getLabel(), dtype = np.int32 )
X = np.array( mnist.getImage() / 255, dtype = np.float32 ) # => in [0,1]
return X, label
def MLP2( D, H1, K, dropout = False ):
if dropout:
do = [ 0.8, 0.5, 1.0 ]
else:
do = [ 1.0, 1.0, 1.0 ]
L0 = nnet.InputLayer( D, dropout = do[0] )
L1 = nnet.Layer( D, H1, 'ReLu', withBias = True, Wini = 0.01, dropout = do[1] )
L2 = nnet.Layer( H1, K, 'linear', withBias = True, Wini = 0.01, dropout = do[2] )
mlp = nnet.MLP( [ L0, L1, L2 ] )
return mlp
def MLP3( D, H1, H2, K, dropout = False ):
if dropout:
do = [ 0.8, 0.5, 0.5, 1.0 ]
else:
do = [ 1.0, 1.0, 1.0, 1.0 ]
L0 = nnet.InputLayer( D, dropout = do[0] )
L1 = nnet.Layer( D, H1, 'ReLu', withBias = True, Wini = 0.01, dropout = do[1] )
L2 = nnet.Layer( H1, H2, 'ReLu', withBias = True, Wini = 0.01, dropout = do[2] )
L3 = nnet.Layer( H2, K, 'linear', withBias = True, Wini = 0.01, dropout = do[3] )
mlp = nnet.MLP( [ L0, L1, L2, L3 ] )
return mlp
# mini batch indicies for stochastic gradient ascent
def makebatchindex( N, batchsize ):
idx = np.random.permutation( N )
nbatch = int( np.ceil( float( N ) / batchsize ) )
idxB = np.zeros( ( nbatch, N ), dtype = bool )
for ib in range( nbatch - 1 ):
idxB[ib, idx[ib*batchsize:(ib+1)*batchsize]] = True
ib = nbatch - 1
idxB[ib, idx[ib*batchsize:]] = True
return idxB
def errorrate( mlp, X, label, batchsize ):
N = X.shape[0]
nbatch = int( np.ceil( float( N ) / batchsize ) )
LL = 0.0
cnt = 0
for ib in range( nbatch - 1 ):
ii = np.arange( ib*batchsize, (ib+1)*batchsize )
Z = mlp.output( X[ii] )
LL += mlp.cost( Z, label[ii] ) * ii.shape[0]
cnt += np.sum( label[ii] != np.argmax( Z, axis = 1 ) )
ib = nbatch - 1
ii = np.arange( ib*batchsize, N )
Z = mlp.output( X[ii] )
LL += mlp.cost( Z, label[ii] ) * ii.shape[0]
cnt += np.sum( label[ii] != np.argmax( Z, axis = 1 ) )
return LL / N, float( cnt ) / N
def weightnorm( mlp ):
W2 = np.empty( len( mlp.Layers ) - 1 )
for i, layer in enumerate( mlp.Layers[1:] ):
Wb = layer.getWeight()
if layer.withBias:
W = Wb[0]
else:
W = Wb
W2[i] = np.mean( np.square( W ) )
return W2
if __name__ == "__main__":
np.random.seed( 1 )
##### setting the training data & the validation data
#
mn = mnist.MNIST( 'L' )
K = mn.nclass
X, label = gendat( mn )
xm = np.mean( X, axis = 0 )
X -= xm
XL, labelL = X[:50000], label[:50000]
XV, labelV = X[50000:], label[50000:]
NL, D = XL.shape
NV, D = XV.shape
##### mini batch indicies for stochastic gradient ascent
#
batchsize = 100
idxB = makebatchindex( NL, batchsize )
nbatch = idxB.shape[0]
##### initializing
#
#mlp = MLP2( D, 1000, K )
mlp = MLP3( D, 1000, 1000, K, dropout = True )
#mlp = MLP3( D, 2000, 2000, K )
for i, layer in enumerate( mlp.Layers ):
if i == 0:
print '# Layer 0 : Input (', layer.Din, ') dropout = ', layer.dropout
else:
print '# Layer', i, ':', layer.afunc, '(', layer.Din, 'x', layer.Nunit, ') dropout = ', layer.dropout
##### training
#
eta, mu, lam = 0.1, 0.9, 0.0
nepoch = 100
print '### training: NL = ', NL, 'NV = ', NV, ' batchsize = ', batchsize
print '# eta = ', eta, 'mu = ', mu, 'lam = ', lam
i = 0
mnLLL, erL = errorrate( mlp, XL, labelL, batchsize )
mnLLV, erV = errorrate( mlp, XV, labelV, batchsize )
print '%d | %.4f %.2f | %.4f %.2f' % ( i, mnLLL, erL * 100, mnLLV, erV * 100 ),
w2 = weightnorm( mlp )
print ' | ', w2
for i in range( 1, nepoch ):
# training (selecting each batch in random order)
for ib in np.random.permutation( nbatch ):
ii = idxB[ib, :]
mlp.train( XL[ii], labelL[ii], eta, mu, lam )
# printing error rates etc.
if ( i < 10 ) or ( i % 10 == 0 ):
#if i % 10 == 0:
mnLLL, erL = errorrate( mlp, XL, labelL, batchsize )
mnLLV, erV = errorrate( mlp, XV, labelV, batchsize )
print '%d | %.4f %.2f | %.4f %.2f' % ( i, mnLLL, erL * 100, mnLLV, erV * 100 ),
w2 = weightnorm( mlp )
print ' | ', w2
i = nepoch
##### setting the test data
#
mn = mnist.MNIST( 'T' )
XT, labelT = gendat( mn )
XT -= xm
NT, D = XT.shape
print '# NT = ', NT
mnLLT, erT = errorrate( mlp, XT, labelT, batchsize )
print '%d | %.4f %.2f | %.4f %.2f | %.4f %.2f' % ( i, mnLLL, erL * 100, mnLLV, erV * 100, mnLLT, erT * 100 )
##### result obtained with dropout (1) #####
# using nnet.T_update()
$ time THEANO_FLAGS='floatX=float32,device=gpu0' python ex150904.py
Using gpu device 0: Tesla K20c
# Layer 0 : Input ( 784 ) dropout = 0.8
# Layer 1 : ReLu ( 784 x 1000 ) dropout = 0.5
# Layer 2 : ReLu ( 1000 x 1000 ) dropout = 0.5
# Layer 3 : linear ( 1000 x 10 ) dropout = 1.0
### training: NL = 50000 NV = 10000 batchsize = 100
# eta = 0.1 mu = 0.9 lam = 0.0
0 | 2.3026 92.95 | 2.3027 93.31 | [ 9.98441756e-05 9.97784300e-05 9.98655232e-05]
1 | 0.1889 5.82 | 0.1785 5.55 | [ 0.00031995 0.00023307 0.01017031]
2 | 0.1050 3.28 | 0.1119 3.28 | [ 0.00050071 0.00033251 0.01328769]
3 | 0.0726 2.20 | 0.0860 2.78 | [ 0.00064703 0.00040984 0.01534916]
4 | 0.0556 1.74 | 0.0764 2.25 | [ 0.00077867 0.00048094 0.01686583]
5 | 0.0506 1.54 | 0.0750 2.21 | [ 0.00089908 0.00054449 0.01814652]
6 | 0.0390 1.19 | 0.0724 1.98 | [ 0.00100988 0.00060491 0.01935988]
7 | 0.0318 0.92 | 0.0671 1.94 | [ 0.00110785 0.0006594 0.02031165]
8 | 0.0264 0.78 | 0.0620 1.77 | [ 0.00120683 0.00071318 0.02122617]
9 | 0.0225 0.67 | 0.0609 1.82 | [ 0.00129922 0.0007626 0.02220863]
10 | 0.0228 0.68 | 0.0603 1.66 | [ 0.00138951 0.00081236 0.02310262]
20 | 0.0068 0.18 | 0.0550 1.43 | [ 0.00214829 0.00123889 0.02996662]
30 | 0.0035 0.10 | 0.0579 1.31 | [ 0.00275098 0.00158488 0.03502906]
40 | 0.0015 0.03 | 0.0523 1.17 | [ 0.00325752 0.0018803 0.03880851]
50 | 0.0010 0.02 | 0.0563 1.22 | [ 0.00370175 0.00214782 0.04204497]
60 | 0.0006 0.01 | 0.0581 1.30 | [ 0.00409431 0.0023857 0.04454408]
70 | 0.0004 0.01 | 0.0576 1.22 | [ 0.00444975 0.00260766 0.04707579]
80 | 0.0005 0.01 | 0.0546 1.17 | [ 0.00479328 0.00281817 0.04927089]
90 | 0.0003 0.01 | 0.0594 1.18 | [ 0.0051061 0.00301716 0.05107509]
# NT = 10000
100 | 0.0003 0.01 | 0.0594 1.18 | 0.0513 1.12
real 4m26.345s
user 3m14.279s
sys 1m11.698s
seed = 1 100 | 0.0003 0.01 | 0.0594 1.18 | 0.0513 1.12 real 4m13.466s
seed = 2 100 | 0.0003 0.01 | 0.0588 1.10 | 0.0507 1.21 real 4m14.241s
seed = 3 100 | 0.0002 0.00 | 0.0642 1.23 | 0.0471 1.16 real 4m15.080s
##### result obtained with dropout (2) #####
# using nnet.T_updateMasked()
$ time THEANO_FLAGS='floatX=float32,device=gpu0' python ex150904.py
Using gpu device 0: Tesla K20c
# Layer 0 : Input ( 784 ) dropout = 0.8
# Layer 1 : ReLu ( 784 x 1000 ) dropout = 0.5
# Layer 2 : ReLu ( 1000 x 1000 ) dropout = 0.5
# Layer 3 : linear ( 1000 x 10 ) dropout = 1.0
### training: NL = 50000 NV = 10000 batchsize = 100
# eta = 0.1 mu = 0.9 lam = 0.0
0 | 2.3026 92.95 | 2.3027 93.31 | [ 9.98441756e-05 9.97784300e-05 9.98655232e-05]
1 | 0.1887 5.77 | 0.1760 5.37 | [ 0.00031389 0.00022934 0.01001292]
2 | 0.1034 3.18 | 0.1099 3.24 | [ 0.00049193 0.0003279 0.01334954]
3 | 0.0735 2.21 | 0.0865 2.66 | [ 0.00063866 0.00040622 0.01552608]
4 | 0.0554 1.68 | 0.0779 2.42 | [ 0.0007723 0.00047857 0.0171594 ]
5 | 0.0457 1.41 | 0.0744 2.18 | [ 0.00089417 0.00054387 0.01857153]
6 | 0.0383 1.14 | 0.0716 1.95 | [ 0.00100706 0.00060475 0.01974435]
7 | 0.0303 0.92 | 0.0684 1.88 | [ 0.00110892 0.0006606 0.02077041]
8 | 0.0252 0.71 | 0.0613 1.75 | [ 0.00120774 0.00071534 0.02166135]
9 | 0.0210 0.65 | 0.0616 1.75 | [ 0.00130167 0.00076647 0.02272711]
10 | 0.0209 0.62 | 0.0619 1.81 | [ 0.0013926 0.00081686 0.02351622]
20 | 0.0066 0.17 | 0.0549 1.44 | [ 0.0021621 0.0012522 0.0304906]
30 | 0.0029 0.07 | 0.0577 1.37 | [ 0.00277483 0.00160787 0.03551942]
40 | 0.0016 0.04 | 0.0560 1.31 | [ 0.00327066 0.00190542 0.03951812]
50 | 0.0010 0.02 | 0.0597 1.23 | [ 0.00370507 0.00217238 0.0427468 ]
60 | 0.0006 0.00 | 0.0568 1.23 | [ 0.00409685 0.00240636 0.04532112]
70 | 0.0003 0.00 | 0.0588 1.24 | [ 0.00445958 0.00262472 0.0475995 ]
80 | 0.0003 0.00 | 0.0575 1.19 | [ 0.00479193 0.00283209 0.04952905]
90 | 0.0003 0.00 | 0.0636 1.25 | [ 0.00509129 0.00302375 0.05154153]
# NT = 10000
100 | 0.0003 0.00 | 0.0636 1.25 | 0.0510 1.13
real 4m41.910s
user 3m29.095s
sys 1m12.510s
##### result obtained with dropout (3) #####
# using nnet.T_updateMasked2()
$ time THEANO_FLAGS='floatX=float32,device=gpu0' python ex150904.py
Using gpu device 0: Tesla K20c
# Layer 0 : Input ( 784 ) dropout = 0.8
# Layer 1 : ReLu ( 784 x 1000 ) dropout = 0.5
# Layer 2 : ReLu ( 1000 x 1000 ) dropout = 0.5
# Layer 3 : linear ( 1000 x 10 ) dropout = 1.0
### training: NL = 50000 NV = 10000 batchsize = 100
# eta = 0.1 mu = 0.9 lam = 0.0
0 | 2.3026 92.95 | 2.3027 93.31 | [ 9.98441756e-05 9.97784300e-05 9.98655232e-05]
1 | 0.1917 5.88 | 0.1790 5.46 | [ 0.00030902 0.00022561 0.0098097 ]
2 | 0.1047 3.22 | 0.1111 3.20 | [ 0.00048939 0.00032688 0.0132591 ]
3 | 0.0725 2.18 | 0.0857 2.61 | [ 0.00063783 0.0004066 0.01556546]
4 | 0.0568 1.78 | 0.0790 2.36 | [ 0.00077272 0.00047931 0.01716605]
5 | 0.0457 1.44 | 0.0717 2.08 | [ 0.00089393 0.00054522 0.0186321 ]
6 | 0.0377 1.10 | 0.0682 1.90 | [ 0.00100142 0.00060648 0.01988898]
7 | 0.0305 0.93 | 0.0654 1.85 | [ 0.00110259 0.00066296 0.02094646]
8 | 0.0256 0.74 | 0.0613 1.73 | [ 0.00120326 0.00071845 0.02190425]
9 | 0.0213 0.64 | 0.0577 1.61 | [ 0.00129752 0.00077054 0.0229095 ]
10 | 0.0196 0.56 | 0.0569 1.68 | [ 0.00138678 0.00082089 0.02370521]
20 | 0.0064 0.17 | 0.0547 1.36 | [ 0.00213722 0.00125381 0.03101237]
30 | 0.0028 0.05 | 0.0535 1.31 | [ 0.00274978 0.00161166 0.03611655]
40 | 0.0015 0.02 | 0.0529 1.26 | [ 0.00325037 0.00191021 0.04013891]
50 | 0.0009 0.02 | 0.0552 1.27 | [ 0.00368539 0.00217701 0.04348322]
60 | 0.0005 0.01 | 0.0559 1.17 | [ 0.00408166 0.00241876 0.0460985 ]
70 | 0.0004 0.00 | 0.0575 1.28 | [ 0.0044459 0.00264775 0.04849945]
80 | 0.0004 0.01 | 0.0542 1.16 | [ 0.00477751 0.00285784 0.05047518]
90 | 0.0002 0.00 | 0.0619 1.28 | [ 0.00508282 0.00305433 0.05214791]
# NT = 10000
100 | 0.0002 0.00 | 0.0619 1.28 | 0.0521 1.24
real 12m16.551s
user 10m29.532s
sys 1m45.592s
##### result obtained without dropout #####
$ time THEANO_FLAGS='floatX=float32,device=gpu0' python ex150904.py
Using gpu device 0: Tesla K20c
# Layer 0 : Input ( 784 ) dropout = 1.0
# Layer 1 : ReLu ( 784 x 1000 ) dropout = 1.0
# Layer 2 : ReLu ( 1000 x 1000 ) dropout = 1.0
# Layer 3 : linear ( 1000 x 10 ) dropout = 1.0
### training: NL = 50000 NV = 10000 batchsize = 100
# eta = 0.1 mu = 0.9 lam = 0.0
0 | 2.3028 92.95 | 2.3029 93.31 | [ 9.98441756e-05 9.97784300e-05 9.98655232e-05]
1 | 0.0933 2.85 | 0.1146 3.38 | [ 0.00026057 0.00015942 0.00438416]
2 | 0.0518 1.67 | 0.0894 2.44 | [ 0.00035918 0.00019641 0.00536437]
3 | 0.0351 1.15 | 0.0822 2.33 | [ 0.00043118 0.00022391 0.00620047]
4 | 0.0250 0.81 | 0.0894 2.47 | [ 0.00049831 0.00024913 0.00690446]
5 | 0.0203 0.70 | 0.0845 2.19 | [ 0.00055695 0.00027102 0.00753349]
6 | 0.0265 0.88 | 0.1023 2.47 | [ 0.00060401 0.00028876 0.00813732]
7 | 0.0162 0.58 | 0.1086 2.43 | [ 0.00065404 0.00030656 0.00857737]
8 | 0.0059 0.19 | 0.0948 2.01 | [ 0.00069271 0.00032126 0.00907713]
9 | 0.0032 0.08 | 0.0832 1.85 | [ 0.00071972 0.00033147 0.00940243]
10 | 0.0028 0.06 | 0.0886 1.74 | [ 0.00074086 0.00033896 0.00970113]
20 | 0.0000 0.00 | 0.0870 1.51 | [ 0.00076904 0.0003519 0.01059568]
30 | 0.0000 0.00 | 0.0902 1.45 | [ 0.00077228 0.00035419 0.01083231]
40 | 0.0000 0.00 | 0.0925 1.45 | [ 0.00077451 0.00035576 0.01099472]
# NT = 10000
50 | 0.0000 0.00 | 0.0925 1.45 | 0.0857 1.46
real 2m14.636s
user 1m35.877s
sys 0m38.879s
seed = 1 50 | 0.0000 0.00 | 0.0925 1.45 | 0.0857 1.46 real 2m2.908s
seed = 2 50 | 0.0000 0.00 | 0.0949 1.45 | 0.0834 1.41 real 2m2.661s
seed = 3 50 | 0.0000 0.00 | 0.1022 1.55 | 0.0882 1.47 real 2m2.639s
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment