Skip to content

Instantly share code, notes, and snippets.

@takatakamanbou
Last active August 29, 2015 14:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save takatakamanbou/a6a36598a2e2bf73ccd1 to your computer and use it in GitHub Desktop.
Save takatakamanbou/a6a36598a2e2bf73ccd1 to your computer and use it in GitHub Desktop.
import struct
import numpy as np
class MNIST:
def __init__( self, LT ):
if LT == 'L':
self.fnLabel = 'train-labels-idx1-ubyte'
self.fnImage = 'train-images-idx3-ubyte'
else:
self.fnLabel = 't10k-labels-idx1-ubyte'
self.fnImage = 't10k-images-idx3-ubyte'
def getLabel( self ):
return _readLabel( self.fnLabel )
def getImage( self ):
return _readImage( self.fnImage )
##### reading the label file
#
def _readLabel( fnLabel ):
f = open( fnLabel, 'r' )
### header (two 4B integers, magic number(2049) & number of items)
#
header = f.read( 8 )
mn, num = struct.unpack( '>2i', header ) # MSB first (bigendian)
assert( mn == 2049 )
#print mn, num
### labels (unsigned byte)
#
label = np.array( struct.unpack( '>%dB' % num, f.read() ), dtype = int )
f.close()
return label
##### reading the image file
#
def _readImage( fnImage ):
f = open( fnImage, 'r' )
### header (four 4B integers, magic number(2051), #images, #rows, and #cols
#
header = f.read( 16 )
mn, num, nrow, ncol = struct.unpack( '>4i', header ) # MSB first (bigendian)
assert( mn == 2051 )
#print mn, num, nrow, ncol
### pixels (unsigned byte)
#
npixel = ncol * nrow
#pixel = np.empty( ( num, npixel ), dtype = int )
#pixel = np.empty( ( num, npixel ), dtype = np.int32 )
pixel = np.empty( ( num, npixel ) )
for i in range( num ):
buf = struct.unpack( '>%dB' % npixel, f.read( npixel ) )
pixel[i, :] = np.asarray( buf )
f.close()
return pixel
if __name__ == '__main__':
print '# MNIST training data'
mnist = MNIST( 'L' )
lab = mnist.getLabel()
dat = mnist.getImage()
print lab.shape, dat.shape
print '# MNIST test data'
mnist = MNIST( 'T' )
lab = mnist.getLabel()
dat = mnist.getImage()
print lab.shape, dat.shape
import numpy as np
import scipy as sp
import mnist0118 as mnist
import nnet150612 as nnet
def gendat( LT ):
mn = mnist.MNIST( LT )
label = mn.getLabel()
N = label.shape[0]
K = 10
X = np.array( mn.getImage() / 255, dtype = np.float32 ) # => in [0,1]
t = np.zeros( ( N, K ), dtype = np.float32 )
for ik in range( K ):
t[label == ik, ik] = 1.0
return X, label, t
def errorrate( mlp, X, t, label ):
Y, Z = mlp.output( X )
mnLL = np.mean( mlp.cost( Z, t ) )
er = np.mean( label != np.argmax( Z, axis = 1 ) )
return mnLL, er
def MLP2( D, H1, K ):
print '### 2-layer MLP: D =', D, ' H (ReLu) =', H1, ' K (softmax) =', K
L1 = nnet.Layer( D, H1, 'ReLu', withBias = True, Wini = 0.01 )
L2 = nnet.Layer( H1, K, 'softmax', withBias = True, Wini = 0.01 )
mlp = nnet.MLP( [ L1, L2 ] )
return mlp
def MLP3( D, H1, H2, K ):
print '### 3-layer MLP: D =', D, ' H1 (ReLu) =', H1, ' H2 (ReLu) =', H2, ' K (softmax) =', K
L1 = nnet.Layer( D, H1, 'ReLu', withBias = True, Wini = 0.01 )
L2 = nnet.Layer( H1, H2, 'ReLu', withBias = True, Wini = 0.01 )
L3 = nnet.Layer( H2, K, 'softmax', withBias = True, Wini = 0.01 )
mlp = nnet.MLP( [ L1, L2, L3 ] )
return mlp
if __name__ == "__main__":
np.random.seed( 0 )
##### setting the training data & the validation data
#
X, label, t = gendat( 'L' )
xm = np.mean( X, axis = 0 )
X -= xm
XL, labelL, tL = X[:50000], label[:50000], t[:50000]
XV, labelV, tV = X[50000:], label[50000:], t[50000:]
NL, D = XL.shape
NV, D = XV.shape
K = t.shape[1]
##### mini batch indicies for stochastic gradient ascent
#
idx = np.random.permutation( NL )
batchsize = 100
nbatch = NL / batchsize
assert( NL % batchsize == 0 )
idxB = np.zeros( ( nbatch, NL ), dtype = bool )
for ib in range( nbatch ):
idxB[ib, idx.reshape( ( nbatch, batchsize ))[ib, :]] = True
##### training
#
#mlp = MLP2( D, 500, K )
mlp = MLP3( D, 1000, 500, K )
eta = 0.01
mu = 0.9
lam = 0.00001
nepoch = 50
print '### training: NL = ', NL, 'NV = ', NV, ' batchsize = ', batchsize
print '# eta = ', eta, 'mu = ', mu, 'lam = ', lam
for i in range( nepoch ):
# printing error rates etc.
if i % 10 == 0:
mnLLL, erL = errorrate( mlp, XL, tL, labelL )
mnLLV, erV = errorrate( mlp, XV, tV, labelV )
print '%d | %.4f %.2f | %.4f %.2f' % ( i, mnLLL, erL * 100, mnLLV, erV * 100 )
# training (selecting each batch in random order)
for ib in np.random.permutation( nbatch ):
ii = idxB[ib, :]
mlp.train( XL[ii], tL[ii], eta, mu, lam )
i = nepoch
mnLLL, erL = errorrate( mlp, XL, tL, labelL )
mnLLV, erV = errorrate( mlp, XV, tV, labelV )
print '%d | %.4f %.2f | %.4f %.2f' % ( i, mnLLL, erL * 100, mnLLV, erV * 100 )
##### setting the test data
#
XT, labelT, tT = gendat( 'T' )
XT -= xm
NT, D = XT.shape
print '# NT = ', NT
mnLLT, erT = errorrate( mlp, XT, tT, labelT )
print '%d | %.4f %.2f | %.4f %.2f | %.4f %.2f' % ( i, mnLLL, erL * 100, mnLLV, erV * 100, mnLLT, erT * 100 )
import numpy as np
import theano
import theano.tensor as T
# activation functions
d_afunc = { 'linear': lambda Y: Y,
'sigmoid': T.nnet.sigmoid,
'softmax': T.nnet.softmax,
'ReLu': lambda Y: T.switch( Y > 0, Y, 0 ) }
### uniform random numbers for weight initialization
#
def randomU( shape, a ):
# [ -a, a )
return 2 * a * ( np.random.random_sample( shape ) - 0.5 )
### Gaussian random numbers for weight initialization
#
def randomN( shape, sig ):
# N(0,sig)
return sig * np.random.standard_normal( shape )
########## Layer ##########
class Layer( object ):
def __init__( self, Din, Nunit, afunc, withBias = True, Wini = 0.01, floatX = theano.config.floatX ):
self.Din = Din
self.Nunit = Nunit
self.afunc = d_afunc[afunc]
self.withBias = withBias
# theano shared variables for weights & biases
self.W = theano.shared( np.array( randomN( ( Nunit, Din ), Wini ), dtype = floatX ) )
self.dW = theano.shared( np.zeros( ( Nunit, Din ), dtype = floatX ) )
if withBias:
self.b = theano.shared( np.zeros( Nunit, dtype = floatX ) )
self.db = theano.shared( np.zeros( Nunit, dtype = floatX ) )
def output( self, X ):
if self.withBias:
Y = T.dot( X, self.W.T ) + self.b # Ndat x Nunit
else:
Y = T.dot( X, self.W.T )
Z = self.afunc( Y )
return Y, Z
########## MLP ##########
class MLP( object ):
def __init__( self, Layers ):
# layers - list of Layer instances
self.Layers = Layers
# theano functions
self.output = self._Tfunc_output()
self.cost = self._Tfunc_cost()
self.train = self._Tfunc_train()
### theano function for output computation
#
def _Tfunc_output( self ):
X = T.matrix() # N x D
Y, Z = _T_output( self.Layers, X )
return theano.function( [ X ], [ Y, Z ] )
### theano function for cost computation
#
def _Tfunc_cost( self ):
Z = T.matrix() # N x K
t = T.matrix() # N x K
cost = _T_cost( Z, t )
return theano.function( [ Z, t ], cost )
### theano function for gradient descent learning
#
def _Tfunc_train( self ):
X = T.matrix( 'X' ) # N x D
t = T.matrix( 't' ) # N x K
eta = T.scalar( 'eta' )
mu = T.scalar( 'mu' )
lam = T.scalar( 'lambda' )
Y, Z = _T_output( self.Layers, X )
cost = T.mean( _T_cost( Z, t ) )
updatesList = []
for layer in self.Layers:
gradW = T.grad( cost, layer.W )
#dWnew = -eta * gradW + mu * layer.dW
dWnew = -eta * ( gradW + lam * layer.W ) + mu * layer.dW
Wnew = layer.W + dWnew
updatesList.append( ( layer.W, Wnew ) )
updatesList.append( ( layer.dW, dWnew ) )
if layer.withBias:
gradb = T.grad( cost, layer.b )
# no weight decay for bias
dbnew = -eta * gradb + mu * layer.db
bnew = layer.b + dbnew
updatesList.append( ( layer.b, bnew ) )
updatesList.append( ( layer.db, dbnew ) )
return theano.function( [ X, t, eta, mu, lam ], cost, updates = updatesList )
def _T_output( Layers, X ):
Zprev = X
for layer in Layers:
Y, Z = layer.output( Zprev )
Zprev = Z
return Y, Z
def _T_cost( Z, t ):
return T.nnet.categorical_crossentropy( Z, t )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment