Create a gist now

Instantly share code, notes, and snippets.

@takatakamanbou /00readme.md Secret
Last active Sep 7, 2015

Embed
import numpy as np
import scipy as sp
import mnist0118 as mnist
import nnet150903 as nnet
def gendat( mnist ):
label = np.asarray( mnist.getLabel(), dtype = np.int32 )
X = np.array( mnist.getImage() / 255, dtype = np.float32 ) # => in [0,1]
return X, label
def MLP2( D, H1, K, dropout = False ):
if dropout:
do = [ 0.8, 0.5, 1.0 ]
else:
do = [ 1.0, 1.0, 1.0 ]
L0 = nnet.InputLayer( D, dropout = do[0] )
L1 = nnet.Layer( D, H1, 'ReLu', withBias = True, Wini = 0.01, dropout = do[1] )
L2 = nnet.Layer( H1, K, 'linear', withBias = True, Wini = 0.01, dropout = do[2] )
mlp = nnet.MLP( [ L0, L1, L2 ] )
return mlp
def MLP3( D, H1, H2, K, dropout = False ):
if dropout:
do = [ 0.8, 0.5, 0.5, 1.0 ]
else:
do = [ 1.0, 1.0, 1.0, 1.0 ]
L0 = nnet.InputLayer( D, dropout = do[0] )
L1 = nnet.Layer( D, H1, 'ReLu', withBias = True, Wini = 0.01, dropout = do[1] )
L2 = nnet.Layer( H1, H2, 'ReLu', withBias = True, Wini = 0.01, dropout = do[2] )
L3 = nnet.Layer( H2, K, 'linear', withBias = True, Wini = 0.01, dropout = do[3] )
mlp = nnet.MLP( [ L0, L1, L2, L3 ] )
return mlp
# mini batch indicies for stochastic gradient ascent
def makebatchindex( N, batchsize ):
idx = np.random.permutation( N )
nbatch = int( np.ceil( float( N ) / batchsize ) )
idxB = np.zeros( ( nbatch, N ), dtype = bool )
for ib in range( nbatch - 1 ):
idxB[ib, idx[ib*batchsize:(ib+1)*batchsize]] = True
ib = nbatch - 1
idxB[ib, idx[ib*batchsize:]] = True
return idxB
def errorrate( mlp, X, label, batchsize ):
N = X.shape[0]
nbatch = int( np.ceil( float( N ) / batchsize ) )
LL = 0.0
cnt = 0
for ib in range( nbatch - 1 ):
ii = np.arange( ib*batchsize, (ib+1)*batchsize )
Z = mlp.output( X[ii] )
LL += mlp.cost( Z, label[ii] ) * ii.shape[0]
cnt += np.sum( label[ii] != np.argmax( Z, axis = 1 ) )
ib = nbatch - 1
ii = np.arange( ib*batchsize, N )
Z = mlp.output( X[ii] )
LL += mlp.cost( Z, label[ii] ) * ii.shape[0]
cnt += np.sum( label[ii] != np.argmax( Z, axis = 1 ) )
return LL / N, float( cnt ) / N
def weightnorm( mlp ):
W2 = np.empty( len( mlp.Layers ) - 1 )
for i, layer in enumerate( mlp.Layers[1:] ):
Wb = layer.getWeight()
if layer.withBias:
W = Wb[0]
else:
W = Wb
W2[i] = np.mean( np.square( W ) )
return W2
if __name__ == "__main__":
np.random.seed( 1 )
##### setting the training data & the validation data
#
mn = mnist.MNIST( 'L' )
K = mn.nclass
X, label = gendat( mn )
xm = np.mean( X, axis = 0 )
X -= xm
XL, labelL = X[:50000], label[:50000]
XV, labelV = X[50000:], label[50000:]
NL, D = XL.shape
NV, D = XV.shape
##### mini batch indicies for stochastic gradient ascent
#
batchsize = 100
idxB = makebatchindex( NL, batchsize )
nbatch = idxB.shape[0]
##### initializing
#
#mlp = MLP2( D, 1000, K )
mlp = MLP3( D, 1000, 1000, K, dropout = True )
#mlp = MLP3( D, 2000, 2000, K )
for i, layer in enumerate( mlp.Layers ):
if i == 0:
print '# Layer 0 : Input (', layer.Din, ') dropout = ', layer.dropout
else:
print '# Layer', i, ':', layer.afunc, '(', layer.Din, 'x', layer.Nunit, ') dropout = ', layer.dropout
##### training
#
eta, mu, lam = 0.1, 0.9, 0.0
nepoch = 100
print '### training: NL = ', NL, 'NV = ', NV, ' batchsize = ', batchsize
print '# eta = ', eta, 'mu = ', mu, 'lam = ', lam
i = 0
mnLLL, erL = errorrate( mlp, XL, labelL, batchsize )
mnLLV, erV = errorrate( mlp, XV, labelV, batchsize )
print '%d | %.4f %.2f | %.4f %.2f' % ( i, mnLLL, erL * 100, mnLLV, erV * 100 ),
w2 = weightnorm( mlp )
print ' | ', w2
for i in range( 1, nepoch ):
# training (selecting each batch in random order)
for ib in np.random.permutation( nbatch ):
ii = idxB[ib, :]
mlp.train( XL[ii], labelL[ii], eta, mu, lam )
# printing error rates etc.
if ( i < 10 ) or ( i % 10 == 0 ):
#if i % 10 == 0:
mnLLL, erL = errorrate( mlp, XL, labelL, batchsize )
mnLLV, erV = errorrate( mlp, XV, labelV, batchsize )
print '%d | %.4f %.2f | %.4f %.2f' % ( i, mnLLL, erL * 100, mnLLV, erV * 100 ),
w2 = weightnorm( mlp )
print ' | ', w2
i = nepoch
##### setting the test data
#
mn = mnist.MNIST( 'T' )
XT, labelT = gendat( mn )
XT -= xm
NT, D = XT.shape
print '# NT = ', NT
mnLLT, erT = errorrate( mlp, XT, labelT, batchsize )
print '%d | %.4f %.2f | %.4f %.2f | %.4f %.2f' % ( i, mnLLL, erL * 100, mnLLV, erV * 100, mnLLT, erT * 100 )
##### result obtained with dropout (1) #####
# using nnet.T_update()
$ time THEANO_FLAGS='floatX=float32,device=gpu0' python ex150904.py
Using gpu device 0: Tesla K20c
# Layer 0 : Input ( 784 ) dropout = 0.8
# Layer 1 : ReLu ( 784 x 1000 ) dropout = 0.5
# Layer 2 : ReLu ( 1000 x 1000 ) dropout = 0.5
# Layer 3 : linear ( 1000 x 10 ) dropout = 1.0
### training: NL = 50000 NV = 10000 batchsize = 100
# eta = 0.1 mu = 0.9 lam = 0.0
0 | 2.3026 92.95 | 2.3027 93.31 | [ 9.98441756e-05 9.97784300e-05 9.98655232e-05]
1 | 0.1889 5.82 | 0.1785 5.55 | [ 0.00031995 0.00023307 0.01017031]
2 | 0.1050 3.28 | 0.1119 3.28 | [ 0.00050071 0.00033251 0.01328769]
3 | 0.0726 2.20 | 0.0860 2.78 | [ 0.00064703 0.00040984 0.01534916]
4 | 0.0556 1.74 | 0.0764 2.25 | [ 0.00077867 0.00048094 0.01686583]
5 | 0.0506 1.54 | 0.0750 2.21 | [ 0.00089908 0.00054449 0.01814652]
6 | 0.0390 1.19 | 0.0724 1.98 | [ 0.00100988 0.00060491 0.01935988]
7 | 0.0318 0.92 | 0.0671 1.94 | [ 0.00110785 0.0006594 0.02031165]
8 | 0.0264 0.78 | 0.0620 1.77 | [ 0.00120683 0.00071318 0.02122617]
9 | 0.0225 0.67 | 0.0609 1.82 | [ 0.00129922 0.0007626 0.02220863]
10 | 0.0228 0.68 | 0.0603 1.66 | [ 0.00138951 0.00081236 0.02310262]
20 | 0.0068 0.18 | 0.0550 1.43 | [ 0.00214829 0.00123889 0.02996662]
30 | 0.0035 0.10 | 0.0579 1.31 | [ 0.00275098 0.00158488 0.03502906]
40 | 0.0015 0.03 | 0.0523 1.17 | [ 0.00325752 0.0018803 0.03880851]
50 | 0.0010 0.02 | 0.0563 1.22 | [ 0.00370175 0.00214782 0.04204497]
60 | 0.0006 0.01 | 0.0581 1.30 | [ 0.00409431 0.0023857 0.04454408]
70 | 0.0004 0.01 | 0.0576 1.22 | [ 0.00444975 0.00260766 0.04707579]
80 | 0.0005 0.01 | 0.0546 1.17 | [ 0.00479328 0.00281817 0.04927089]
90 | 0.0003 0.01 | 0.0594 1.18 | [ 0.0051061 0.00301716 0.05107509]
# NT = 10000
100 | 0.0003 0.01 | 0.0594 1.18 | 0.0513 1.12
real 4m26.345s
user 3m14.279s
sys 1m11.698s
seed = 1 100 | 0.0003 0.01 | 0.0594 1.18 | 0.0513 1.12 real 4m13.466s
seed = 2 100 | 0.0003 0.01 | 0.0588 1.10 | 0.0507 1.21 real 4m14.241s
seed = 3 100 | 0.0002 0.00 | 0.0642 1.23 | 0.0471 1.16 real 4m15.080s
##### result obtained with dropout (2) #####
# using nnet.T_updateMasked()
$ time THEANO_FLAGS='floatX=float32,device=gpu0' python ex150904.py
Using gpu device 0: Tesla K20c
# Layer 0 : Input ( 784 ) dropout = 0.8
# Layer 1 : ReLu ( 784 x 1000 ) dropout = 0.5
# Layer 2 : ReLu ( 1000 x 1000 ) dropout = 0.5
# Layer 3 : linear ( 1000 x 10 ) dropout = 1.0
### training: NL = 50000 NV = 10000 batchsize = 100
# eta = 0.1 mu = 0.9 lam = 0.0
0 | 2.3026 92.95 | 2.3027 93.31 | [ 9.98441756e-05 9.97784300e-05 9.98655232e-05]
1 | 0.1887 5.77 | 0.1760 5.37 | [ 0.00031389 0.00022934 0.01001292]
2 | 0.1034 3.18 | 0.1099 3.24 | [ 0.00049193 0.0003279 0.01334954]
3 | 0.0735 2.21 | 0.0865 2.66 | [ 0.00063866 0.00040622 0.01552608]
4 | 0.0554 1.68 | 0.0779 2.42 | [ 0.0007723 0.00047857 0.0171594 ]
5 | 0.0457 1.41 | 0.0744 2.18 | [ 0.00089417 0.00054387 0.01857153]
6 | 0.0383 1.14 | 0.0716 1.95 | [ 0.00100706 0.00060475 0.01974435]
7 | 0.0303 0.92 | 0.0684 1.88 | [ 0.00110892 0.0006606 0.02077041]
8 | 0.0252 0.71 | 0.0613 1.75 | [ 0.00120774 0.00071534 0.02166135]
9 | 0.0210 0.65 | 0.0616 1.75 | [ 0.00130167 0.00076647 0.02272711]
10 | 0.0209 0.62 | 0.0619 1.81 | [ 0.0013926 0.00081686 0.02351622]
20 | 0.0066 0.17 | 0.0549 1.44 | [ 0.0021621 0.0012522 0.0304906]
30 | 0.0029 0.07 | 0.0577 1.37 | [ 0.00277483 0.00160787 0.03551942]
40 | 0.0016 0.04 | 0.0560 1.31 | [ 0.00327066 0.00190542 0.03951812]
50 | 0.0010 0.02 | 0.0597 1.23 | [ 0.00370507 0.00217238 0.0427468 ]
60 | 0.0006 0.00 | 0.0568 1.23 | [ 0.00409685 0.00240636 0.04532112]
70 | 0.0003 0.00 | 0.0588 1.24 | [ 0.00445958 0.00262472 0.0475995 ]
80 | 0.0003 0.00 | 0.0575 1.19 | [ 0.00479193 0.00283209 0.04952905]
90 | 0.0003 0.00 | 0.0636 1.25 | [ 0.00509129 0.00302375 0.05154153]
# NT = 10000
100 | 0.0003 0.00 | 0.0636 1.25 | 0.0510 1.13
real 4m41.910s
user 3m29.095s
sys 1m12.510s
##### result obtained with dropout (3) #####
# using nnet.T_updateMasked2()
$ time THEANO_FLAGS='floatX=float32,device=gpu0' python ex150904.py
Using gpu device 0: Tesla K20c
# Layer 0 : Input ( 784 ) dropout = 0.8
# Layer 1 : ReLu ( 784 x 1000 ) dropout = 0.5
# Layer 2 : ReLu ( 1000 x 1000 ) dropout = 0.5
# Layer 3 : linear ( 1000 x 10 ) dropout = 1.0
### training: NL = 50000 NV = 10000 batchsize = 100
# eta = 0.1 mu = 0.9 lam = 0.0
0 | 2.3026 92.95 | 2.3027 93.31 | [ 9.98441756e-05 9.97784300e-05 9.98655232e-05]
1 | 0.1917 5.88 | 0.1790 5.46 | [ 0.00030902 0.00022561 0.0098097 ]
2 | 0.1047 3.22 | 0.1111 3.20 | [ 0.00048939 0.00032688 0.0132591 ]
3 | 0.0725 2.18 | 0.0857 2.61 | [ 0.00063783 0.0004066 0.01556546]
4 | 0.0568 1.78 | 0.0790 2.36 | [ 0.00077272 0.00047931 0.01716605]
5 | 0.0457 1.44 | 0.0717 2.08 | [ 0.00089393 0.00054522 0.0186321 ]
6 | 0.0377 1.10 | 0.0682 1.90 | [ 0.00100142 0.00060648 0.01988898]
7 | 0.0305 0.93 | 0.0654 1.85 | [ 0.00110259 0.00066296 0.02094646]
8 | 0.0256 0.74 | 0.0613 1.73 | [ 0.00120326 0.00071845 0.02190425]
9 | 0.0213 0.64 | 0.0577 1.61 | [ 0.00129752 0.00077054 0.0229095 ]
10 | 0.0196 0.56 | 0.0569 1.68 | [ 0.00138678 0.00082089 0.02370521]
20 | 0.0064 0.17 | 0.0547 1.36 | [ 0.00213722 0.00125381 0.03101237]
30 | 0.0028 0.05 | 0.0535 1.31 | [ 0.00274978 0.00161166 0.03611655]
40 | 0.0015 0.02 | 0.0529 1.26 | [ 0.00325037 0.00191021 0.04013891]
50 | 0.0009 0.02 | 0.0552 1.27 | [ 0.00368539 0.00217701 0.04348322]
60 | 0.0005 0.01 | 0.0559 1.17 | [ 0.00408166 0.00241876 0.0460985 ]
70 | 0.0004 0.00 | 0.0575 1.28 | [ 0.0044459 0.00264775 0.04849945]
80 | 0.0004 0.01 | 0.0542 1.16 | [ 0.00477751 0.00285784 0.05047518]
90 | 0.0002 0.00 | 0.0619 1.28 | [ 0.00508282 0.00305433 0.05214791]
# NT = 10000
100 | 0.0002 0.00 | 0.0619 1.28 | 0.0521 1.24
real 12m16.551s
user 10m29.532s
sys 1m45.592s
##### result obtained without dropout #####
$ time THEANO_FLAGS='floatX=float32,device=gpu0' python ex150904.py
Using gpu device 0: Tesla K20c
# Layer 0 : Input ( 784 ) dropout = 1.0
# Layer 1 : ReLu ( 784 x 1000 ) dropout = 1.0
# Layer 2 : ReLu ( 1000 x 1000 ) dropout = 1.0
# Layer 3 : linear ( 1000 x 10 ) dropout = 1.0
### training: NL = 50000 NV = 10000 batchsize = 100
# eta = 0.1 mu = 0.9 lam = 0.0
0 | 2.3028 92.95 | 2.3029 93.31 | [ 9.98441756e-05 9.97784300e-05 9.98655232e-05]
1 | 0.0933 2.85 | 0.1146 3.38 | [ 0.00026057 0.00015942 0.00438416]
2 | 0.0518 1.67 | 0.0894 2.44 | [ 0.00035918 0.00019641 0.00536437]
3 | 0.0351 1.15 | 0.0822 2.33 | [ 0.00043118 0.00022391 0.00620047]
4 | 0.0250 0.81 | 0.0894 2.47 | [ 0.00049831 0.00024913 0.00690446]
5 | 0.0203 0.70 | 0.0845 2.19 | [ 0.00055695 0.00027102 0.00753349]
6 | 0.0265 0.88 | 0.1023 2.47 | [ 0.00060401 0.00028876 0.00813732]
7 | 0.0162 0.58 | 0.1086 2.43 | [ 0.00065404 0.00030656 0.00857737]
8 | 0.0059 0.19 | 0.0948 2.01 | [ 0.00069271 0.00032126 0.00907713]
9 | 0.0032 0.08 | 0.0832 1.85 | [ 0.00071972 0.00033147 0.00940243]
10 | 0.0028 0.06 | 0.0886 1.74 | [ 0.00074086 0.00033896 0.00970113]
20 | 0.0000 0.00 | 0.0870 1.51 | [ 0.00076904 0.0003519 0.01059568]
30 | 0.0000 0.00 | 0.0902 1.45 | [ 0.00077228 0.00035419 0.01083231]
40 | 0.0000 0.00 | 0.0925 1.45 | [ 0.00077451 0.00035576 0.01099472]
# NT = 10000
50 | 0.0000 0.00 | 0.0925 1.45 | 0.0857 1.46
real 2m14.636s
user 1m35.877s
sys 0m38.879s
seed = 1 50 | 0.0000 0.00 | 0.0925 1.45 | 0.0857 1.46 real 2m2.908s
seed = 2 50 | 0.0000 0.00 | 0.0949 1.45 | 0.0834 1.41 real 2m2.661s
seed = 3 50 | 0.0000 0.00 | 0.1022 1.55 | 0.0882 1.47 real 2m2.639s
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment