Skip to content

Instantly share code, notes, and snippets.

@takatakamanbou
Last active December 20, 2015 14:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save takatakamanbou/1af88ab0d7f10b842f58 to your computer and use it in GitHub Desktop.
Save takatakamanbou/1af88ab0d7f10b842f58 to your computer and use it in GitHub Desktop.
$ time THEANO_FLAGS='device=gpu1' python ex151220bn.py
Using gpu device 1: Tesla K20c
# Layer 0 : Input ( 784 ) dropout = 1.0
# Layer 1 : ReLu ( 784 x 1000 ) dropout = 1.0
# Layer 2 : ReLu ( 1000 x 1000 ) dropout = 1.0
# Layer 3 : ReLu ( 1000 x 1000 ) dropout = 1.0
# Layer 4 : ReLu ( 1000 x 1000 ) dropout = 1.0
# Layer 5 : ReLu ( 1000 x 1000 ) dropout = 1.0
# Layer 6 : ReLu ( 1000 x 1000 ) dropout = 1.0
# Layer 7 : ReLu ( 1000 x 1000 ) dropout = 1.0
# Layer 8 : ReLu ( 1000 x 1000 ) dropout = 1.0
# Layer 9 : ReLu ( 1000 x 1000 ) dropout = 1.0
# Layer 10 : ReLu ( 1000 x 1000 ) dropout = 1.0
# Layer 11 : ReLu ( 1000 x 1000 ) dropout = 1.0
# Layer 12 : ReLu ( 1000 x 1000 ) dropout = 1.0
# Layer 13 : ReLu ( 1000 x 1000 ) dropout = 1.0
# Layer 14 : ReLu ( 1000 x 1000 ) dropout = 1.0
# Layer 15 : linear ( 1000 x 10 ) dropout = 1.0
### training: NL = 50000 NV = 10000 batchsize = 128
# eta = 0.1 mu = 0.9 lam = 0.0
0 | 2.3026 90.14 | 2.3026 90.09
1 | 0.3051 5.57 | 0.3116 5.70 | eta = 0.1
2 | 0.0900 2.54 | 0.1193 3.34 | eta = 0.1
3 | 0.0621 1.75 | 0.0958 2.75 | eta = 0.1
4 | 0.0403 1.16 | 0.0829 2.15 | eta = 0.1
5 | 0.0429 1.12 | 0.0882 2.48 | eta = 0.1
6 | 0.0324 0.90 | 0.0865 2.34 | eta = 0.1
7 | 0.0283 0.72 | 0.0778 2.26 | eta = 0.1
8 | 0.0249 0.65 | 0.0811 2.09 | eta = 0.1
9 | 0.0164 0.42 | 0.0690 1.83 | eta = 0.1
10 | 0.0185 0.47 | 0.0785 2.16 | eta = 0.1
20 | 0.0046 0.07 | 0.0683 1.66 | eta = 0.1
30 | 0.0030 0.04 | 0.0626 1.45 | eta = 0.1
40 | 0.0016 0.03 | 0.0715 1.61 | eta = 0.1
# NT = 10000
50 | 0.0016 0.03 | 0.0715 1.61 | 0.0690 1.58
real 19m1.669s
user 11m13.904s
sys 7m23.208s
$ time THEANO_FLAGS='device=gpu1' python ex151220bn.py
Using gpu device 1: Tesla K20c
# Layer 0 : Input ( 784 ) dropout = 1.0
# Layer 1 : ReLu ( 784 x 1000 ) dropout = 1.0
# Layer 2 : ReLu ( 1000 x 1000 ) dropout = 1.0
# Layer 3 : linear ( 1000 x 10 ) dropout = 1.0
### training: NL = 50000 NV = 10000 batchsize = 128
# eta = 0.1 mu = 0.9 lam = 0.0
0 | 2.3023 90.29 | 2.3023 89.84
1 | 0.1206 3.75 | 0.1412 4.27 | eta = 0.1
2 | 0.0515 1.47 | 0.0815 2.47 | eta = 0.1
3 | 0.0348 0.98 | 0.0727 2.17 | eta = 0.1
4 | 0.0280 0.81 | 0.0748 2.25 | eta = 0.1
5 | 0.0190 0.54 | 0.0693 1.91 | eta = 0.1
6 | 0.0118 0.28 | 0.0591 1.65 | eta = 0.1
7 | 0.0097 0.26 | 0.0584 1.58 | eta = 0.1
8 | 0.0102 0.27 | 0.0640 1.89 | eta = 0.1
9 | 0.0050 0.10 | 0.0562 1.55 | eta = 0.1
10 | 0.0043 0.08 | 0.0568 1.55 | eta = 0.1
20 | 0.0009 0.02 | 0.0632 1.47 | eta = 0.1
30 | 0.0001 0.00 | 0.0510 1.20 | eta = 0.1
40 | 0.0001 0.00 | 0.0537 1.19 | eta = 0.1
# NT = 10000
50 | 0.0001 0.00 | 0.0537 1.19 | 0.0520 1.31
real 3m9.765s
user 2m9.004s
sys 0m57.476s
$ time THEANO_FLAGS='device=gpu1' python ex151220.py
Using gpu device 1: Tesla K20c
# Layer 0 : Input ( 784 ) dropout = 1.0
# Layer 1 : ReLu ( 784 x 1000 ) dropout = 1.0
# Layer 2 : ReLu ( 1000 x 1000 ) dropout = 1.0
# Layer 3 : ReLu ( 1000 x 1000 ) dropout = 1.0
# Layer 4 : ReLu ( 1000 x 1000 ) dropout = 1.0
# Layer 5 : ReLu ( 1000 x 1000 ) dropout = 1.0
# Layer 6 : ReLu ( 1000 x 1000 ) dropout = 1.0
# Layer 7 : ReLu ( 1000 x 1000 ) dropout = 1.0
# Layer 8 : ReLu ( 1000 x 1000 ) dropout = 1.0
# Layer 9 : ReLu ( 1000 x 1000 ) dropout = 1.0
# Layer 10 : ReLu ( 1000 x 1000 ) dropout = 1.0
# Layer 11 : ReLu ( 1000 x 1000 ) dropout = 1.0
# Layer 12 : ReLu ( 1000 x 1000 ) dropout = 1.0
# Layer 13 : ReLu ( 1000 x 1000 ) dropout = 1.0
# Layer 14 : ReLu ( 1000 x 1000 ) dropout = 1.0
# Layer 15 : linear ( 1000 x 10 ) dropout = 1.0
### training: NL = 50000 NV = 10000 batchsize = 128
# eta = 0.1 mu = 0.9 lam = 0.0
0 | 2.3026 90.14 | 2.3026 90.09
1 | 2.3030 88.64 | 2.3046 89.36 | eta = 0.1
2 | 2.3028 90.06 | 2.3039 90.10 | eta = 0.1
3 | 2.3028 90.32 | 2.3032 89.91 | eta = 0.1
4 | 2.3044 88.64 | 2.3062 89.36 | eta = 0.1
5 | 2.3020 88.64 | 2.3035 89.36 | eta = 0.1
6 | 2.3029 88.64 | 2.3034 89.36 | eta = 0.1
7 | 2.3031 88.64 | 2.3039 89.36 | eta = 0.1
8 | 2.3021 88.64 | 2.3033 89.36 | eta = 0.1
9 | 2.3035 88.64 | 2.3060 89.36 | eta = 0.1
10 | 2.3031 88.64 | 2.3051 89.36 | eta = 0.1
20 | 2.3016 88.64 | 2.3031 89.36 | eta = 0.1
30 | 2.3021 88.64 | 2.3034 89.36 | eta = 0.1
40 | 2.3032 88.64 | 2.3038 89.36 | eta = 0.1
# NT = 10000
50 | 2.3032 88.64 | 2.3038 89.36 | 2.3029 88.65
real 9m6.693s
user 5m45.972s
sys 3m10.120s
$ time THEANO_FLAGS='device=gpu1' python ex151220.py
Using gpu device 1: Tesla K20c
# Layer 0 : Input ( 784 ) dropout = 1.0
# Layer 1 : ReLu ( 784 x 1000 ) dropout = 1.0
# Layer 2 : ReLu ( 1000 x 1000 ) dropout = 1.0
# Layer 3 : linear ( 1000 x 10 ) dropout = 1.0
### training: NL = 50000 NV = 10000 batchsize = 128
# eta = 0.1 mu = 0.9 lam = 0.0
0 | 2.3023 90.29 | 2.3023 89.84
1 | 0.1007 3.06 | 0.1128 3.31 | eta = 0.1
2 | 0.0617 1.83 | 0.0901 2.75 | eta = 0.1
3 | 0.0428 1.29 | 0.0842 2.49 | eta = 0.1
4 | 0.0302 0.88 | 0.0861 2.19 | eta = 0.1
5 | 0.0237 0.68 | 0.0834 2.13 | eta = 0.1
6 | 0.0140 0.42 | 0.0775 1.95 | eta = 0.1
7 | 0.0137 0.43 | 0.0866 2.03 | eta = 0.1
8 | 0.0099 0.32 | 0.0869 2.11 | eta = 0.1
9 | 0.0049 0.15 | 0.0806 1.78 | eta = 0.1
10 | 0.0024 0.05 | 0.0798 1.73 | eta = 0.1
20 | 0.0001 0.00 | 0.0869 1.62 | eta = 0.1
30 | 0.0000 0.00 | 0.0905 1.61 | eta = 0.1
40 | 0.0000 0.00 | 0.0929 1.62 | eta = 0.1
# NT = 10000
50 | 0.0000 0.00 | 0.0929 1.62 | 0.0862 1.56
real 2m16.864s
user 1m25.864s
sys 0m48.664s
import numpy as np
import scipy as sp
import mnist
import nnet151219 as nnet
import theano
def gendat( mnist, LT ):
label = np.asarray( mnist.getLabel( LT ), dtype = np.int32 )
X = np.array( mnist.getImage( LT ) / 255, dtype = np.float32 ) # => in [0,1]
return X, label
def MLP( D, H, K, nHiddenLayer, dropout = [ 1.0, 1.0, 1.0 ] ):
rng = nnet.randomstreams( 0 )
Layers = []
Layers.append( nnet.InputLayer( D, rng = rng, dropout = dropout[0] ) )
Layers.append( nnet.Layer( D, H, 'ReLu', rng = rng, withBias = True, Wini = 0.01, dropout = dropout[1] ) )
for il in range( 1, nHiddenLayer ):
Layers.append( nnet.Layer( H, H, 'ReLu', rng = rng, withBias = True, Wini = 0.01, dropout = dropout[1] ) )
Layers.append( nnet.Layer( H, K, 'linear', rng = rng, withBias = True, Wini = 0.01, dropout = dropout[2] ) )
mlp = nnet.MLP( Layers )
return mlp
def errorrate( mlp, X, label, batchsize ):
N = X.shape[0]
nbatch = int( np.ceil( float( N ) / batchsize ) )
LL = 0.0
cnt = 0
for ib in range( nbatch - 1 ):
ii = np.arange( ib*batchsize, (ib+1)*batchsize )
Z = mlp.output( X[ii] )
LL += mlp.cost( Z, label[ii] ) * ii.shape[0]
cnt += np.sum( label[ii] != np.argmax( Z, axis = 1 ) )
ib = nbatch - 1
ii = np.arange( ib*batchsize, N )
Z = mlp.output( X[ii] )
LL += mlp.cost( Z, label[ii] ) * ii.shape[0]
cnt += np.sum( label[ii] != np.argmax( Z, axis = 1 ) )
return LL / N, float( cnt ) / N
if __name__ == "__main__":
theano.config.floatX = 'float32'
theano.config.fastmath = True
np.random.seed( 1 )
##### setting the training data & the validation data
#
mn = mnist.MNIST( pathMNIST = '../150117-mnist' )
K = mn.nclass
X, label = gendat( mn, 'L' )
xm = np.mean( X, axis = 0 )
X -= xm
XL, labelL = X[:50000], label[:50000]
XV, labelV = X[50000:], label[50000:]
NL, D = XL.shape
NV, D = XV.shape
##### initializing
#
mlp = MLP( D, 1000, K, 14, dropout = [ 1.0, 1.0, 1.0 ] )
for i, layer in enumerate( mlp.Layers ):
if i == 0:
print '# Layer 0 : Input (', layer.Din, ') dropout = ', layer.dropout
else:
print '# Layer', i, ':', layer.afunc, '(', layer.Din, 'x', layer.Nunit, ') dropout = ', layer.dropout
##### training
#
batchsize = 128
nbatch = NL / batchsize
eta, mu, lam = 0.1, 0.9, 0.0
etaR = 1.0
nepoch = 50
print '### training: NL = ', NL, 'NV = ', NV, ' batchsize = ', batchsize
print '# eta = ', eta, 'mu = ', mu, 'lam = ', lam
i = 0
mnLLL, erL = errorrate( mlp, XL, labelL, batchsize )
mnLLV, erV = errorrate( mlp, XV, labelV, batchsize )
print '%d | %.4f %.2f | %.4f %.2f' % ( i, mnLLL, erL * 100, mnLLV, erV * 100 )
for i in range( 1, nepoch ):
# training
for ib in range( nbatch ):
ii = np.random.randint( 0, NL, batchsize )
mlp.train( XL[ii], labelL[ii], eta, mu, lam )
# inference & printing error rates
if ( i < 10 ) or ( i % 10 == 0 ):
mnLLL, erL = errorrate( mlp, XL, labelL, batchsize )
mnLLV, erV = errorrate( mlp, XV, labelV, batchsize )
print '%d | %.4f %.2f | %.4f %.2f' % ( i, mnLLL, erL * 100, mnLLV, erV * 100 ),
print ' | eta =', eta
eta *= etaR
i = nepoch
##### setting the test data
#
XT, labelT = gendat( mn, 'T' )
XT -= xm
NT, D = XT.shape
print '# NT = ', NT
mnLLT, erT = errorrate( mlp, XT, labelT, batchsize )
print '%d | %.4f %.2f | %.4f %.2f | %.4f %.2f' % ( i, mnLLL, erL * 100, mnLLV, erV * 100, mnLLT, erT * 100 )
import numpy as np
import scipy as sp
import mnist
import nnet151219bn as nnet
import theano
def gendat( mnist, LT ):
label = np.asarray( mnist.getLabel( LT ), dtype = np.int32 )
X = np.array( mnist.getImage( LT ) / 255, dtype = np.float32 ) # => in [0,1]
return X, label
def MLP( D, H, K, nHiddenLayer, dropout = [ 1.0, 1.0, 1.0 ] ):
rng = nnet.randomstreams( 0 )
Layers = []
Layers.append( nnet.InputLayer( D, rng = rng, dropout = dropout[0] ) )
Layers.append( nnet.Layer( D, H, 'ReLu', rng = rng, Wini = 0.01, dropout = dropout[1] ) )
for il in range( 1, nHiddenLayer ):
Layers.append( nnet.Layer( H, H, 'ReLu', rng = rng, Wini = 0.01, dropout = dropout[1] ) )
Layers.append( nnet.Layer( H, K, 'linear', rng = rng, Wini = 0.01, dropout = dropout[2] ) )
mlp = nnet.MLP( Layers )
return mlp
def errorrate( mlp, X, label, batchsize ):
N = X.shape[0]
nbatch = int( np.ceil( float( N ) / batchsize ) )
LL = 0.0
cnt = 0
for ib in range( nbatch - 1 ):
ii = np.arange( ib*batchsize, (ib+1)*batchsize )
Z = mlp.output( X[ii] )
LL += mlp.cost( Z, label[ii] ) * ii.shape[0]
cnt += np.sum( label[ii] != np.argmax( Z, axis = 1 ) )
ib = nbatch - 1
ii = np.arange( ib*batchsize, N )
Z = mlp.output( X[ii] )
LL += mlp.cost( Z, label[ii] ) * ii.shape[0]
cnt += np.sum( label[ii] != np.argmax( Z, axis = 1 ) )
return LL / N, float( cnt ) / N
if __name__ == "__main__":
theano.config.floatX = 'float32'
theano.config.fastmath = True
np.random.seed( 1 )
##### setting the training data & the validation data
#
mn = mnist.MNIST( pathMNIST = '../150117-mnist' )
K = mn.nclass
X, label = gendat( mn, 'L' )
xm = np.mean( X, axis = 0 )
X -= xm
XL, labelL = X[:50000], label[:50000]
XV, labelV = X[50000:], label[50000:]
NL, D = XL.shape
NV, D = XV.shape
##### initializing
#
mlp = MLP( D, 1000, K, 14, dropout = [ 1.0, 1.0, 1.0 ] )
for i, layer in enumerate( mlp.Layers ):
if i == 0:
print '# Layer 0 : Input (', layer.Din, ') dropout = ', layer.dropout
else:
print '# Layer', i, ':', layer.afunc, '(', layer.Din, 'x', layer.Nunit, ') dropout = ', layer.dropout
##### training
#
batchsize = 128
nbatch = NL / batchsize
eta, mu, lam = 0.1, 0.9, 0.0
etaR = 1.0
nepoch = 50
print '### training: NL = ', NL, 'NV = ', NV, ' batchsize = ', batchsize
print '# eta = ', eta, 'mu = ', mu, 'lam = ', lam
i = 0
mnLLL, erL = errorrate( mlp, XL, labelL, batchsize )
mnLLV, erV = errorrate( mlp, XV, labelV, batchsize )
print '%d | %.4f %.2f | %.4f %.2f' % ( i, mnLLL, erL * 100, mnLLV, erV * 100 )
BNmu = np.empty( mlp.nlayer - 1, dtype = object )
BNsig2 = np.empty( mlp.nlayer - 1, dtype = object )
for i, layer in enumerate( mlp.Layers[1:] ):
BNmu[i] = np.zeros( layer.Nunit, dtype = theano.config.floatX )
BNsig2[i] = np.zeros( layer.Nunit, dtype = theano.config.floatX )
for i in range( 1, nepoch ):
for il in range( mlp.nlayer - 1 ):
BNmu[il][:] = 0.0
BNsig2[il][:] = 0.0
# training
for ib in range( nbatch ):
ii = np.random.randint( 0, NL, batchsize )
mlp.train( XL[ii], labelL[ii], eta, mu, lam )
for il, layer in enumerate( mlp.Layers[1:] ):
BNmu[il] += layer.BNmu.get_value()
BNsig2[il] += layer.BNsig2.get_value()
for il, layer in enumerate( mlp.Layers[1:] ):
tmpBNmu = BNmu[il] / nbatch
tmpBNsig2 = BNsig2[il] / nbatch * batchsize / ( batchsize - 1 ) + layer.BNeps
layer.BNmu.set_value( tmpBNmu )
layer.BNsig2.set_value( tmpBNsig2 )
# inference & printing error rates
if ( i < 10 ) or ( i % 10 == 0 ):
mnLLL, erL = errorrate( mlp, XL, labelL, batchsize )
mnLLV, erV = errorrate( mlp, XV, labelV, batchsize )
print '%d | %.4f %.2f | %.4f %.2f' % ( i, mnLLL, erL * 100, mnLLV, erV * 100 ),
print ' | eta =', eta
eta *= etaR
i = nepoch
##### setting the test data
#
XT, labelT = gendat( mn, 'T' )
XT -= xm
NT, D = XT.shape
print '# NT = ', NT
mnLLT, erT = errorrate( mlp, XT, labelT, batchsize )
print '%d | %.4f %.2f | %.4f %.2f | %.4f %.2f' % ( i, mnLLL, erL * 100, mnLLV, erV * 100, mnLLT, erT * 100 )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment