-
-
Save takatakamanbou/a6a36598a2e2bf73ccd1 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import struct | |
import numpy as np | |
class MNIST: | |
def __init__( self, LT ): | |
if LT == 'L': | |
self.fnLabel = 'train-labels-idx1-ubyte' | |
self.fnImage = 'train-images-idx3-ubyte' | |
else: | |
self.fnLabel = 't10k-labels-idx1-ubyte' | |
self.fnImage = 't10k-images-idx3-ubyte' | |
def getLabel( self ): | |
return _readLabel( self.fnLabel ) | |
def getImage( self ): | |
return _readImage( self.fnImage ) | |
##### reading the label file | |
# | |
def _readLabel( fnLabel ): | |
f = open( fnLabel, 'r' ) | |
### header (two 4B integers, magic number(2049) & number of items) | |
# | |
header = f.read( 8 ) | |
mn, num = struct.unpack( '>2i', header ) # MSB first (bigendian) | |
assert( mn == 2049 ) | |
#print mn, num | |
### labels (unsigned byte) | |
# | |
label = np.array( struct.unpack( '>%dB' % num, f.read() ), dtype = int ) | |
f.close() | |
return label | |
##### reading the image file | |
# | |
def _readImage( fnImage ): | |
f = open( fnImage, 'r' ) | |
### header (four 4B integers, magic number(2051), #images, #rows, and #cols | |
# | |
header = f.read( 16 ) | |
mn, num, nrow, ncol = struct.unpack( '>4i', header ) # MSB first (bigendian) | |
assert( mn == 2051 ) | |
#print mn, num, nrow, ncol | |
### pixels (unsigned byte) | |
# | |
npixel = ncol * nrow | |
#pixel = np.empty( ( num, npixel ), dtype = int ) | |
#pixel = np.empty( ( num, npixel ), dtype = np.int32 ) | |
pixel = np.empty( ( num, npixel ) ) | |
for i in range( num ): | |
buf = struct.unpack( '>%dB' % npixel, f.read( npixel ) ) | |
pixel[i, :] = np.asarray( buf ) | |
f.close() | |
return pixel | |
if __name__ == '__main__': | |
print '# MNIST training data' | |
mnist = MNIST( 'L' ) | |
lab = mnist.getLabel() | |
dat = mnist.getImage() | |
print lab.shape, dat.shape | |
print '# MNIST test data' | |
mnist = MNIST( 'T' ) | |
lab = mnist.getLabel() | |
dat = mnist.getImage() | |
print lab.shape, dat.shape |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import scipy as sp | |
import mnist0118 as mnist | |
import nnet150612 as nnet | |
def gendat( LT ): | |
mn = mnist.MNIST( LT ) | |
label = mn.getLabel() | |
N = label.shape[0] | |
K = 10 | |
X = np.array( mn.getImage() / 255, dtype = np.float32 ) # => in [0,1] | |
t = np.zeros( ( N, K ), dtype = np.float32 ) | |
for ik in range( K ): | |
t[label == ik, ik] = 1.0 | |
return X, label, t | |
def errorrate( mlp, X, t, label ): | |
Y, Z = mlp.output( X ) | |
mnLL = np.mean( mlp.cost( Z, t ) ) | |
er = np.mean( label != np.argmax( Z, axis = 1 ) ) | |
return mnLL, er | |
def MLP2( D, H1, K ): | |
print '### 2-layer MLP: D =', D, ' H (ReLu) =', H1, ' K (softmax) =', K | |
L1 = nnet.Layer( D, H1, 'ReLu', withBias = True, Wini = 0.01 ) | |
L2 = nnet.Layer( H1, K, 'softmax', withBias = True, Wini = 0.01 ) | |
mlp = nnet.MLP( [ L1, L2 ] ) | |
return mlp | |
def MLP3( D, H1, H2, K ): | |
print '### 3-layer MLP: D =', D, ' H1 (ReLu) =', H1, ' H2 (ReLu) =', H2, ' K (softmax) =', K | |
L1 = nnet.Layer( D, H1, 'ReLu', withBias = True, Wini = 0.01 ) | |
L2 = nnet.Layer( H1, H2, 'ReLu', withBias = True, Wini = 0.01 ) | |
L3 = nnet.Layer( H2, K, 'softmax', withBias = True, Wini = 0.01 ) | |
mlp = nnet.MLP( [ L1, L2, L3 ] ) | |
return mlp | |
if __name__ == "__main__": | |
np.random.seed( 0 ) | |
##### setting the training data & the validation data | |
# | |
X, label, t = gendat( 'L' ) | |
xm = np.mean( X, axis = 0 ) | |
X -= xm | |
XL, labelL, tL = X[:50000], label[:50000], t[:50000] | |
XV, labelV, tV = X[50000:], label[50000:], t[50000:] | |
NL, D = XL.shape | |
NV, D = XV.shape | |
K = t.shape[1] | |
##### mini batch indicies for stochastic gradient ascent | |
# | |
idx = np.random.permutation( NL ) | |
batchsize = 100 | |
nbatch = NL / batchsize | |
assert( NL % batchsize == 0 ) | |
idxB = np.zeros( ( nbatch, NL ), dtype = bool ) | |
for ib in range( nbatch ): | |
idxB[ib, idx.reshape( ( nbatch, batchsize ))[ib, :]] = True | |
##### training | |
# | |
#mlp = MLP2( D, 500, K ) | |
mlp = MLP3( D, 1000, 500, K ) | |
eta = 0.01 | |
mu = 0.9 | |
lam = 0.00001 | |
nepoch = 50 | |
print '### training: NL = ', NL, 'NV = ', NV, ' batchsize = ', batchsize | |
print '# eta = ', eta, 'mu = ', mu, 'lam = ', lam | |
for i in range( nepoch ): | |
# printing error rates etc. | |
if i % 10 == 0: | |
mnLLL, erL = errorrate( mlp, XL, tL, labelL ) | |
mnLLV, erV = errorrate( mlp, XV, tV, labelV ) | |
print '%d | %.4f %.2f | %.4f %.2f' % ( i, mnLLL, erL * 100, mnLLV, erV * 100 ) | |
# training (selecting each batch in random order) | |
for ib in np.random.permutation( nbatch ): | |
ii = idxB[ib, :] | |
mlp.train( XL[ii], tL[ii], eta, mu, lam ) | |
i = nepoch | |
mnLLL, erL = errorrate( mlp, XL, tL, labelL ) | |
mnLLV, erV = errorrate( mlp, XV, tV, labelV ) | |
print '%d | %.4f %.2f | %.4f %.2f' % ( i, mnLLL, erL * 100, mnLLV, erV * 100 ) | |
##### setting the test data | |
# | |
XT, labelT, tT = gendat( 'T' ) | |
XT -= xm | |
NT, D = XT.shape | |
print '# NT = ', NT | |
mnLLT, erT = errorrate( mlp, XT, tT, labelT ) | |
print '%d | %.4f %.2f | %.4f %.2f | %.4f %.2f' % ( i, mnLLL, erL * 100, mnLLV, erV * 100, mnLLT, erT * 100 ) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import theano | |
import theano.tensor as T | |
# activation functions | |
d_afunc = { 'linear': lambda Y: Y, | |
'sigmoid': T.nnet.sigmoid, | |
'softmax': T.nnet.softmax, | |
'ReLu': lambda Y: T.switch( Y > 0, Y, 0 ) } | |
### uniform random numbers for weight initialization | |
# | |
def randomU( shape, a ): | |
# [ -a, a ) | |
return 2 * a * ( np.random.random_sample( shape ) - 0.5 ) | |
### Gaussian random numbers for weight initialization | |
# | |
def randomN( shape, sig ): | |
# N(0,sig) | |
return sig * np.random.standard_normal( shape ) | |
########## Layer ########## | |
class Layer( object ): | |
def __init__( self, Din, Nunit, afunc, withBias = True, Wini = 0.01, floatX = theano.config.floatX ): | |
self.Din = Din | |
self.Nunit = Nunit | |
self.afunc = d_afunc[afunc] | |
self.withBias = withBias | |
# theano shared variables for weights & biases | |
self.W = theano.shared( np.array( randomN( ( Nunit, Din ), Wini ), dtype = floatX ) ) | |
self.dW = theano.shared( np.zeros( ( Nunit, Din ), dtype = floatX ) ) | |
if withBias: | |
self.b = theano.shared( np.zeros( Nunit, dtype = floatX ) ) | |
self.db = theano.shared( np.zeros( Nunit, dtype = floatX ) ) | |
def output( self, X ): | |
if self.withBias: | |
Y = T.dot( X, self.W.T ) + self.b # Ndat x Nunit | |
else: | |
Y = T.dot( X, self.W.T ) | |
Z = self.afunc( Y ) | |
return Y, Z | |
########## MLP ########## | |
class MLP( object ): | |
def __init__( self, Layers ): | |
# layers - list of Layer instances | |
self.Layers = Layers | |
# theano functions | |
self.output = self._Tfunc_output() | |
self.cost = self._Tfunc_cost() | |
self.train = self._Tfunc_train() | |
### theano function for output computation | |
# | |
def _Tfunc_output( self ): | |
X = T.matrix() # N x D | |
Y, Z = _T_output( self.Layers, X ) | |
return theano.function( [ X ], [ Y, Z ] ) | |
### theano function for cost computation | |
# | |
def _Tfunc_cost( self ): | |
Z = T.matrix() # N x K | |
t = T.matrix() # N x K | |
cost = _T_cost( Z, t ) | |
return theano.function( [ Z, t ], cost ) | |
### theano function for gradient descent learning | |
# | |
def _Tfunc_train( self ): | |
X = T.matrix( 'X' ) # N x D | |
t = T.matrix( 't' ) # N x K | |
eta = T.scalar( 'eta' ) | |
mu = T.scalar( 'mu' ) | |
lam = T.scalar( 'lambda' ) | |
Y, Z = _T_output( self.Layers, X ) | |
cost = T.mean( _T_cost( Z, t ) ) | |
updatesList = [] | |
for layer in self.Layers: | |
gradW = T.grad( cost, layer.W ) | |
#dWnew = -eta * gradW + mu * layer.dW | |
dWnew = -eta * ( gradW + lam * layer.W ) + mu * layer.dW | |
Wnew = layer.W + dWnew | |
updatesList.append( ( layer.W, Wnew ) ) | |
updatesList.append( ( layer.dW, dWnew ) ) | |
if layer.withBias: | |
gradb = T.grad( cost, layer.b ) | |
# no weight decay for bias | |
dbnew = -eta * gradb + mu * layer.db | |
bnew = layer.b + dbnew | |
updatesList.append( ( layer.b, bnew ) ) | |
updatesList.append( ( layer.db, dbnew ) ) | |
return theano.function( [ X, t, eta, mu, lam ], cost, updates = updatesList ) | |
def _T_output( Layers, X ): | |
Zprev = X | |
for layer in Layers: | |
Y, Z = layer.output( Zprev ) | |
Zprev = Z | |
return Y, Z | |
def _T_cost( Z, t ): | |
return T.nnet.categorical_crossentropy( Z, t ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment