|
import numpy as np |
|
import scipy as sp |
|
import mnist |
|
import nnet151219bn as nnet |
|
import theano |
|
|
|
|
|
def gendat( mnist, LT ): |
|
|
|
label = np.asarray( mnist.getLabel( LT ), dtype = np.int32 ) |
|
X = np.array( mnist.getImage( LT ) / 255, dtype = np.float32 ) # => in [0,1] |
|
|
|
return X, label |
|
|
|
|
|
def MLP( D, H, K, nHiddenLayer, dropout = [ 1.0, 1.0, 1.0 ] ): |
|
|
|
rng = nnet.randomstreams( 0 ) |
|
|
|
Layers = [] |
|
Layers.append( nnet.InputLayer( D, rng = rng, dropout = dropout[0] ) ) |
|
|
|
Layers.append( nnet.Layer( D, H, 'ReLu', rng = rng, Wini = 0.01, dropout = dropout[1] ) ) |
|
|
|
for il in range( 1, nHiddenLayer ): |
|
Layers.append( nnet.Layer( H, H, 'ReLu', rng = rng, Wini = 0.01, dropout = dropout[1] ) ) |
|
|
|
Layers.append( nnet.Layer( H, K, 'linear', rng = rng, Wini = 0.01, dropout = dropout[2] ) ) |
|
|
|
mlp = nnet.MLP( Layers ) |
|
|
|
return mlp |
|
|
|
|
|
|
|
def errorrate( mlp, X, label, batchsize ): |
|
|
|
N = X.shape[0] |
|
nbatch = int( np.ceil( float( N ) / batchsize ) ) |
|
|
|
LL = 0.0 |
|
cnt = 0 |
|
for ib in range( nbatch - 1 ): |
|
ii = np.arange( ib*batchsize, (ib+1)*batchsize ) |
|
Z = mlp.output( X[ii] ) |
|
LL += mlp.cost( Z, label[ii] ) * ii.shape[0] |
|
cnt += np.sum( label[ii] != np.argmax( Z, axis = 1 ) ) |
|
ib = nbatch - 1 |
|
ii = np.arange( ib*batchsize, N ) |
|
Z = mlp.output( X[ii] ) |
|
LL += mlp.cost( Z, label[ii] ) * ii.shape[0] |
|
cnt += np.sum( label[ii] != np.argmax( Z, axis = 1 ) ) |
|
|
|
return LL / N, float( cnt ) / N |
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
theano.config.floatX = 'float32' |
|
theano.config.fastmath = True |
|
|
|
np.random.seed( 1 ) |
|
|
|
##### setting the training data & the validation data |
|
# |
|
mn = mnist.MNIST( pathMNIST = '../150117-mnist' ) |
|
K = mn.nclass |
|
X, label = gendat( mn, 'L' ) |
|
xm = np.mean( X, axis = 0 ) |
|
X -= xm |
|
XL, labelL = X[:50000], label[:50000] |
|
XV, labelV = X[50000:], label[50000:] |
|
NL, D = XL.shape |
|
NV, D = XV.shape |
|
|
|
##### initializing |
|
# |
|
mlp = MLP( D, 1000, K, 14, dropout = [ 1.0, 1.0, 1.0 ] ) |
|
for i, layer in enumerate( mlp.Layers ): |
|
if i == 0: |
|
print '# Layer 0 : Input (', layer.Din, ') dropout = ', layer.dropout |
|
else: |
|
print '# Layer', i, ':', layer.afunc, '(', layer.Din, 'x', layer.Nunit, ') dropout = ', layer.dropout |
|
|
|
##### training |
|
# |
|
batchsize = 128 |
|
nbatch = NL / batchsize |
|
eta, mu, lam = 0.1, 0.9, 0.0 |
|
etaR = 1.0 |
|
nepoch = 50 |
|
|
|
print '### training: NL = ', NL, 'NV = ', NV, ' batchsize = ', batchsize |
|
print '# eta = ', eta, 'mu = ', mu, 'lam = ', lam |
|
|
|
i = 0 |
|
mnLLL, erL = errorrate( mlp, XL, labelL, batchsize ) |
|
mnLLV, erV = errorrate( mlp, XV, labelV, batchsize ) |
|
print '%d | %.4f %.2f | %.4f %.2f' % ( i, mnLLL, erL * 100, mnLLV, erV * 100 ) |
|
|
|
BNmu = np.empty( mlp.nlayer - 1, dtype = object ) |
|
BNsig2 = np.empty( mlp.nlayer - 1, dtype = object ) |
|
for i, layer in enumerate( mlp.Layers[1:] ): |
|
BNmu[i] = np.zeros( layer.Nunit, dtype = theano.config.floatX ) |
|
BNsig2[i] = np.zeros( layer.Nunit, dtype = theano.config.floatX ) |
|
|
|
for i in range( 1, nepoch ): |
|
|
|
for il in range( mlp.nlayer - 1 ): |
|
BNmu[il][:] = 0.0 |
|
BNsig2[il][:] = 0.0 |
|
|
|
# training |
|
for ib in range( nbatch ): |
|
ii = np.random.randint( 0, NL, batchsize ) |
|
mlp.train( XL[ii], labelL[ii], eta, mu, lam ) |
|
for il, layer in enumerate( mlp.Layers[1:] ): |
|
BNmu[il] += layer.BNmu.get_value() |
|
BNsig2[il] += layer.BNsig2.get_value() |
|
|
|
for il, layer in enumerate( mlp.Layers[1:] ): |
|
tmpBNmu = BNmu[il] / nbatch |
|
tmpBNsig2 = BNsig2[il] / nbatch * batchsize / ( batchsize - 1 ) + layer.BNeps |
|
layer.BNmu.set_value( tmpBNmu ) |
|
layer.BNsig2.set_value( tmpBNsig2 ) |
|
|
|
|
|
# inference & printing error rates |
|
if ( i < 10 ) or ( i % 10 == 0 ): |
|
mnLLL, erL = errorrate( mlp, XL, labelL, batchsize ) |
|
mnLLV, erV = errorrate( mlp, XV, labelV, batchsize ) |
|
print '%d | %.4f %.2f | %.4f %.2f' % ( i, mnLLL, erL * 100, mnLLV, erV * 100 ), |
|
print ' | eta =', eta |
|
|
|
eta *= etaR |
|
|
|
i = nepoch |
|
|
|
##### setting the test data |
|
# |
|
XT, labelT = gendat( mn, 'T' ) |
|
XT -= xm |
|
NT, D = XT.shape |
|
print '# NT = ', NT |
|
mnLLT, erT = errorrate( mlp, XT, labelT, batchsize ) |
|
print '%d | %.4f %.2f | %.4f %.2f | %.4f %.2f' % ( i, mnLLL, erL * 100, mnLLV, erV * 100, mnLLT, erT * 100 ) |