|
import numpy as np |
|
import scipy as sp |
|
import mnist0118 as mnist |
|
import nnet150903 as nnet |
|
|
|
|
|
def gendat( mnist ): |
|
|
|
label = np.asarray( mnist.getLabel(), dtype = np.int32 ) |
|
X = np.array( mnist.getImage() / 255, dtype = np.float32 ) # => in [0,1] |
|
|
|
return X, label |
|
|
|
|
|
def MLP2( D, H1, K, dropout = False ): |
|
|
|
if dropout: |
|
do = [ 0.8, 0.5, 1.0 ] |
|
else: |
|
do = [ 1.0, 1.0, 1.0 ] |
|
|
|
L0 = nnet.InputLayer( D, dropout = do[0] ) |
|
L1 = nnet.Layer( D, H1, 'ReLu', withBias = True, Wini = 0.01, dropout = do[1] ) |
|
L2 = nnet.Layer( H1, K, 'linear', withBias = True, Wini = 0.01, dropout = do[2] ) |
|
mlp = nnet.MLP( [ L0, L1, L2 ] ) |
|
|
|
return mlp |
|
|
|
|
|
def MLP3( D, H1, H2, K, dropout = False ): |
|
|
|
if dropout: |
|
do = [ 0.8, 0.5, 0.5, 1.0 ] |
|
else: |
|
do = [ 1.0, 1.0, 1.0, 1.0 ] |
|
|
|
L0 = nnet.InputLayer( D, dropout = do[0] ) |
|
L1 = nnet.Layer( D, H1, 'ReLu', withBias = True, Wini = 0.01, dropout = do[1] ) |
|
L2 = nnet.Layer( H1, H2, 'ReLu', withBias = True, Wini = 0.01, dropout = do[2] ) |
|
L3 = nnet.Layer( H2, K, 'linear', withBias = True, Wini = 0.01, dropout = do[3] ) |
|
mlp = nnet.MLP( [ L0, L1, L2, L3 ] ) |
|
|
|
return mlp |
|
|
|
|
|
# mini batch indicies for stochastic gradient ascent |
|
def makebatchindex( N, batchsize ): |
|
|
|
idx = np.random.permutation( N ) |
|
nbatch = int( np.ceil( float( N ) / batchsize ) ) |
|
idxB = np.zeros( ( nbatch, N ), dtype = bool ) |
|
for ib in range( nbatch - 1 ): |
|
idxB[ib, idx[ib*batchsize:(ib+1)*batchsize]] = True |
|
ib = nbatch - 1 |
|
idxB[ib, idx[ib*batchsize:]] = True |
|
|
|
return idxB |
|
|
|
|
|
|
|
def errorrate( mlp, X, label, batchsize ): |
|
|
|
N = X.shape[0] |
|
nbatch = int( np.ceil( float( N ) / batchsize ) ) |
|
|
|
LL = 0.0 |
|
cnt = 0 |
|
for ib in range( nbatch - 1 ): |
|
ii = np.arange( ib*batchsize, (ib+1)*batchsize ) |
|
Z = mlp.output( X[ii] ) |
|
LL += mlp.cost( Z, label[ii] ) * ii.shape[0] |
|
cnt += np.sum( label[ii] != np.argmax( Z, axis = 1 ) ) |
|
ib = nbatch - 1 |
|
ii = np.arange( ib*batchsize, N ) |
|
Z = mlp.output( X[ii] ) |
|
LL += mlp.cost( Z, label[ii] ) * ii.shape[0] |
|
cnt += np.sum( label[ii] != np.argmax( Z, axis = 1 ) ) |
|
|
|
return LL / N, float( cnt ) / N |
|
|
|
|
|
def weightnorm( mlp ): |
|
|
|
W2 = np.empty( len( mlp.Layers ) - 1 ) |
|
for i, layer in enumerate( mlp.Layers[1:] ): |
|
Wb = layer.getWeight() |
|
if layer.withBias: |
|
W = Wb[0] |
|
else: |
|
W = Wb |
|
W2[i] = np.mean( np.square( W ) ) |
|
|
|
return W2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
np.random.seed( 1 ) |
|
|
|
##### setting the training data & the validation data |
|
# |
|
mn = mnist.MNIST( 'L' ) |
|
K = mn.nclass |
|
X, label = gendat( mn ) |
|
xm = np.mean( X, axis = 0 ) |
|
X -= xm |
|
XL, labelL = X[:50000], label[:50000] |
|
XV, labelV = X[50000:], label[50000:] |
|
NL, D = XL.shape |
|
NV, D = XV.shape |
|
|
|
|
|
##### mini batch indicies for stochastic gradient ascent |
|
# |
|
batchsize = 100 |
|
idxB = makebatchindex( NL, batchsize ) |
|
nbatch = idxB.shape[0] |
|
|
|
|
|
##### initializing |
|
# |
|
#mlp = MLP2( D, 1000, K ) |
|
mlp = MLP3( D, 1000, 1000, K, dropout = True ) |
|
#mlp = MLP3( D, 2000, 2000, K ) |
|
for i, layer in enumerate( mlp.Layers ): |
|
if i == 0: |
|
print '# Layer 0 : Input (', layer.Din, ') dropout = ', layer.dropout |
|
else: |
|
print '# Layer', i, ':', layer.afunc, '(', layer.Din, 'x', layer.Nunit, ') dropout = ', layer.dropout |
|
|
|
##### training |
|
# |
|
eta, mu, lam = 0.1, 0.9, 0.0 |
|
nepoch = 100 |
|
|
|
print '### training: NL = ', NL, 'NV = ', NV, ' batchsize = ', batchsize |
|
print '# eta = ', eta, 'mu = ', mu, 'lam = ', lam |
|
|
|
i = 0 |
|
mnLLL, erL = errorrate( mlp, XL, labelL, batchsize ) |
|
mnLLV, erV = errorrate( mlp, XV, labelV, batchsize ) |
|
print '%d | %.4f %.2f | %.4f %.2f' % ( i, mnLLL, erL * 100, mnLLV, erV * 100 ), |
|
w2 = weightnorm( mlp ) |
|
print ' | ', w2 |
|
|
|
for i in range( 1, nepoch ): |
|
|
|
# training (selecting each batch in random order) |
|
for ib in np.random.permutation( nbatch ): |
|
ii = idxB[ib, :] |
|
mlp.train( XL[ii], labelL[ii], eta, mu, lam ) |
|
|
|
# printing error rates etc. |
|
if ( i < 10 ) or ( i % 10 == 0 ): |
|
#if i % 10 == 0: |
|
mnLLL, erL = errorrate( mlp, XL, labelL, batchsize ) |
|
mnLLV, erV = errorrate( mlp, XV, labelV, batchsize ) |
|
print '%d | %.4f %.2f | %.4f %.2f' % ( i, mnLLL, erL * 100, mnLLV, erV * 100 ), |
|
w2 = weightnorm( mlp ) |
|
print ' | ', w2 |
|
|
|
|
|
i = nepoch |
|
|
|
##### setting the test data |
|
# |
|
mn = mnist.MNIST( 'T' ) |
|
XT, labelT = gendat( mn ) |
|
XT -= xm |
|
NT, D = XT.shape |
|
print '# NT = ', NT |
|
mnLLT, erT = errorrate( mlp, XT, labelT, batchsize ) |
|
print '%d | %.4f %.2f | %.4f %.2f | %.4f %.2f' % ( i, mnLLL, erL * 100, mnLLV, erV * 100, mnLLT, erT * 100 ) |