takatakamanbou/mnist0118.py Secret

## mnist0118.py
import struct
import numpy as np


class MNIST:

    def __init__( self, LT ):

        if LT == 'L':
            self.fnLabel = 'train-labels-idx1-ubyte'
            self.fnImage = 'train-images-idx3-ubyte'
        else:
            self.fnLabel = 't10k-labels-idx1-ubyte'
            self.fnImage = 't10k-images-idx3-ubyte'

    def getLabel( self ):

        return _readLabel( self.fnLabel )

    def getImage( self ):

        return _readImage( self.fnImage )


##### reading the label file
#
def _readLabel( fnLabel ):

    f = open( fnLabel, 'r' )

    ### header (two 4B integers, magic number(2049) & number of items)
    #
    header = f.read( 8 )
    mn, num = struct.unpack( '>2i', header )  # MSB first (bigendian)
    assert( mn == 2049 )
    #print mn, num

    ### labels (unsigned byte)
    #
    label = np.array( struct.unpack( '>%dB' % num, f.read() ), dtype = int )

    f.close()

    return label


##### reading the image file
#
def _readImage( fnImage ):

    f = open( fnImage, 'r' )

    ### header (four 4B integers, magic number(2051), #images, #rows, and #cols
    #
    header = f.read( 16 )
    mn, num, nrow, ncol = struct.unpack( '>4i', header ) # MSB first (bigendian)
    assert( mn == 2051 )
    #print mn, num, nrow, ncol

    ### pixels (unsigned byte)
    #
    npixel = ncol * nrow
    #pixel = np.empty( ( num, npixel ), dtype = int )
    #pixel = np.empty( ( num, npixel ), dtype = np.int32 )
    pixel = np.empty( ( num, npixel ) )
    for i in range( num ):
        buf = struct.unpack( '>%dB' % npixel, f.read( npixel ) )
        pixel[i, :] = np.asarray( buf )

    f.close()

    return pixel


if __name__ == '__main__':

    print '# MNIST training data'
    mnist = MNIST( 'L' )
    lab = mnist.getLabel()
    dat = mnist.getImage()
    print lab.shape, dat.shape

    print '# MNIST test data'
    mnist = MNIST( 'T' )
    lab = mnist.getLabel()
    dat = mnist.getImage()
    print lab.shape, dat.shape

## mnist_mlp150612.py
import numpy as np
import scipy as sp
import mnist0118 as mnist
import nnet150612 as nnet


def gendat( LT ):

    mn = mnist.MNIST( LT )
    label = mn.getLabel()
    N = label.shape[0]
    K = 10
    X = np.array( mn.getImage() / 255, dtype = np.float32 )  # => in [0,1]
    t = np.zeros( ( N, K ), dtype = np.float32 )
    for ik in range( K ):
        t[label == ik, ik] = 1.0

    return X, label, t


def errorrate( mlp, X, t, label ):

    Y, Z  = mlp.output( X )
    mnLL = np.mean( mlp.cost( Z, t ) )
    er = np.mean( label != np.argmax( Z, axis = 1 ) )

    return mnLL, er


def MLP2( D, H1, K ):

    print '### 2-layer MLP: D =', D, ' H (ReLu) =', H1, ' K (softmax) =', K
    L1 = nnet.Layer( D,  H1, 'ReLu',    withBias = True, Wini = 0.01 )
    L2 = nnet.Layer( H1, K,  'softmax', withBias = True, Wini = 0.01 )
    mlp = nnet.MLP( [ L1, L2 ] )

    return mlp


def MLP3( D, H1, H2, K ):

    print '### 3-layer MLP: D =', D, ' H1 (ReLu) =', H1, ' H2 (ReLu) =', H2, ' K (softmax) =', K
    L1 = nnet.Layer( D,  H1, 'ReLu',    withBias = True, Wini = 0.01 )
    L2 = nnet.Layer( H1, H2, 'ReLu',    withBias = True, Wini = 0.01 )
    L3 = nnet.Layer( H2, K,  'softmax', withBias = True, Wini = 0.01 )
    mlp = nnet.MLP( [ L1, L2, L3 ] )

    return mlp


if __name__ == "__main__":

    np.random.seed( 0 )

    ##### setting the training data & the validation data
    #
    X, label, t = gendat( 'L' )
    xm = np.mean( X, axis = 0 )
    X -= xm
    XL, labelL, tL = X[:50000], label[:50000], t[:50000]
    XV, labelV, tV = X[50000:], label[50000:], t[50000:]
    NL, D = XL.shape
    NV, D = XV.shape
    K = t.shape[1]


    ##### mini batch indicies for stochastic gradient ascent
    #
    idx = np.random.permutation( NL )
    batchsize = 100
    nbatch = NL / batchsize
    assert( NL % batchsize == 0 )
    idxB = np.zeros( ( nbatch, NL ), dtype = bool )
    for ib in range( nbatch ):
        idxB[ib, idx.reshape( ( nbatch, batchsize ))[ib, :]] = True


    ##### training
    #
    #mlp = MLP2( D, 500, K )
    mlp = MLP3( D, 1000, 500, K )

    eta    = 0.01
    mu     = 0.9
    lam    = 0.00001
    nepoch = 50

    print '### training:   NL = ', NL, 'NV = ', NV, ' batchsize = ', batchsize
    print '#               eta = ', eta, 'mu = ', mu, 'lam = ', lam

    for i in range( nepoch ):

        # printing error rates etc.
        if i % 10 == 0:
            mnLLL, erL = errorrate( mlp, XL, tL, labelL )
            mnLLV, erV = errorrate( mlp, XV, tV, labelV )
            print '%d | %.4f %.2f | %.4f %.2f' % ( i, mnLLL, erL * 100, mnLLV, erV * 100 )

        # training (selecting each batch in random order)
        for ib in np.random.permutation( nbatch ):
            ii = idxB[ib, :]
            mlp.train( XL[ii], tL[ii], eta, mu, lam )

    i = nepoch

    mnLLL, erL = errorrate( mlp, XL, tL, labelL )
    mnLLV, erV = errorrate( mlp, XV, tV, labelV )
    print '%d | %.4f %.2f | %.4f %.2f' % ( i, mnLLL, erL * 100, mnLLV, erV * 100 )


    ##### setting the test data
    #
    XT, labelT, tT = gendat( 'T' )
    XT -= xm
    NT, D = XT.shape
    print '# NT = ', NT
    mnLLT, erT = errorrate( mlp, XT, tT, labelT )
    print '%d | %.4f %.2f | %.4f %.2f | %.4f %.2f' % ( i, mnLLL, erL * 100, mnLLV, erV * 100, mnLLT, erT * 100 )

## nnet150612.py
import numpy as np
import theano
import theano.tensor as T


# activation functions
d_afunc = { 'linear': lambda Y: Y,
            'sigmoid': T.nnet.sigmoid,
            'softmax': T.nnet.softmax,
            'ReLu': lambda Y: T.switch( Y > 0, Y, 0 ) }

### uniform random numbers for weight initialization
#
def randomU( shape, a ):

    # [ -a, a )
    return 2 * a * ( np.random.random_sample( shape ) - 0.5 )


### Gaussian random numbers for weight initialization
#
def randomN( shape, sig ):

    # N(0,sig)
    return sig * np.random.standard_normal( shape )


########## Layer ##########

class Layer( object ):

    def __init__( self, Din, Nunit, afunc, withBias = True, Wini = 0.01, floatX = theano.config.floatX ):

        self.Din   = Din
        self.Nunit = Nunit
        self.afunc = d_afunc[afunc]
        self.withBias = withBias

        # theano shared variables for weights & biases
        self.W = theano.shared( np.array( randomN( ( Nunit, Din ), Wini ), dtype = floatX ) )
        self.dW = theano.shared( np.zeros( ( Nunit, Din ), dtype = floatX ) )
        if withBias:
            self.b = theano.shared( np.zeros( Nunit, dtype = floatX ) )
            self.db = theano.shared( np.zeros( Nunit, dtype = floatX ) )


    def output( self, X ):

        if self.withBias:
            Y = T.dot( X, self.W.T ) + self.b  # Ndat x Nunit
        else:
            Y = T.dot( X, self.W.T )

        Z = self.afunc( Y )

        return Y, Z


########## MLP ##########

class MLP( object ):

    def __init__( self, Layers ):

        # layers - list of Layer instances
        self.Layers = Layers

        # theano functions
        self.output = self._Tfunc_output()
        self.cost   = self._Tfunc_cost()
        self.train  = self._Tfunc_train()


    ### theano function for output computation
    #
    def _Tfunc_output( self ):

        X = T.matrix()  # N x D
        Y, Z = _T_output( self.Layers, X )

        return theano.function( [ X ], [ Y, Z ] )


    ### theano function for cost computation
    #
    def _Tfunc_cost( self ):

        Z = T.matrix()  # N x K
        t = T.matrix()  # N x K
        cost = _T_cost( Z, t )

        return theano.function( [ Z, t ], cost )


    ### theano function for gradient descent learning
    #
    def _Tfunc_train( self ):

        X    = T.matrix( 'X' )  # N x D
        t    = T.matrix( 't' )  # N x K
        eta  = T.scalar( 'eta' )
        mu   = T.scalar( 'mu' )
        lam  = T.scalar( 'lambda' )
        Y, Z = _T_output( self.Layers, X )
        cost = T.mean( _T_cost( Z, t ) )

        updatesList = []
        for layer in self.Layers:
            gradW = T.grad( cost, layer.W )
            #dWnew = -eta * gradW + mu * layer.dW
            dWnew = -eta * ( gradW + lam * layer.W ) + mu * layer.dW
            Wnew  = layer.W + dWnew
            updatesList.append( ( layer.W, Wnew ) )
            updatesList.append( ( layer.dW, dWnew ) )
            if layer.withBias:
                gradb = T.grad( cost, layer.b )
                # no weight decay for bias
                dbnew = -eta * gradb + mu * layer.db
                bnew  = layer.b + dbnew
                updatesList.append( ( layer.b, bnew ) )
                updatesList.append( ( layer.db, dbnew ) )

        return theano.function( [ X, t, eta, mu, lam ], cost, updates = updatesList )


def _T_output( Layers, X ):

    Zprev = X
    for layer in Layers:
        Y, Z = layer.output( Zprev )
        Zprev = Z

    return Y, Z


def _T_cost( Z, t ):

    return T.nnet.categorical_crossentropy( Z, t )
	import struct
	import numpy as np


	class MNIST:

	def __init__( self, LT ):

	if LT == 'L':
	self.fnLabel = 'train-labels-idx1-ubyte'
	self.fnImage = 'train-images-idx3-ubyte'
	else:
	self.fnLabel = 't10k-labels-idx1-ubyte'
	self.fnImage = 't10k-images-idx3-ubyte'

	def getLabel( self ):

	return _readLabel( self.fnLabel )

	def getImage( self ):

	return _readImage( self.fnImage )


	##### reading the label file
	#
	def _readLabel( fnLabel ):

	f = open( fnLabel, 'r' )

	### header (two 4B integers, magic number(2049) & number of items)
	#
	header = f.read( 8 )
	mn, num = struct.unpack( '>2i', header ) # MSB first (bigendian)
	assert( mn == 2049 )
	#print mn, num

	### labels (unsigned byte)
	#
	label = np.array( struct.unpack( '>%dB' % num, f.read() ), dtype = int )

	f.close()

	return label


	##### reading the image file
	#
	def _readImage( fnImage ):

	f = open( fnImage, 'r' )

	### header (four 4B integers, magic number(2051), #images, #rows, and #cols
	#
	header = f.read( 16 )
	mn, num, nrow, ncol = struct.unpack( '>4i', header ) # MSB first (bigendian)
	assert( mn == 2051 )
	#print mn, num, nrow, ncol

	### pixels (unsigned byte)
	#
	npixel = ncol * nrow
	#pixel = np.empty( ( num, npixel ), dtype = int )
	#pixel = np.empty( ( num, npixel ), dtype = np.int32 )
	pixel = np.empty( ( num, npixel ) )
	for i in range( num ):
	buf = struct.unpack( '>%dB' % npixel, f.read( npixel ) )
	pixel[i, :] = np.asarray( buf )

	f.close()

	return pixel



	if __name__ == '__main__':

	print '# MNIST training data'
	mnist = MNIST( 'L' )
	lab = mnist.getLabel()
	dat = mnist.getImage()
	print lab.shape, dat.shape

	print '# MNIST test data'
	mnist = MNIST( 'T' )
	lab = mnist.getLabel()
	dat = mnist.getImage()
	print lab.shape, dat.shape
	import numpy as np
	import scipy as sp
	import mnist0118 as mnist
	import nnet150612 as nnet


	def gendat( LT ):

	mn = mnist.MNIST( LT )
	label = mn.getLabel()
	N = label.shape[0]
	K = 10
	X = np.array( mn.getImage() / 255, dtype = np.float32 ) # => in [0,1]
	t = np.zeros( ( N, K ), dtype = np.float32 )
	for ik in range( K ):
	t[label == ik, ik] = 1.0

	return X, label, t


	def errorrate( mlp, X, t, label ):

	Y, Z = mlp.output( X )
	mnLL = np.mean( mlp.cost( Z, t ) )
	er = np.mean( label != np.argmax( Z, axis = 1 ) )

	return mnLL, er


	def MLP2( D, H1, K ):

	print '### 2-layer MLP: D =', D, ' H (ReLu) =', H1, ' K (softmax) =', K
	L1 = nnet.Layer( D, H1, 'ReLu', withBias = True, Wini = 0.01 )
	L2 = nnet.Layer( H1, K, 'softmax', withBias = True, Wini = 0.01 )
	mlp = nnet.MLP( [ L1, L2 ] )

	return mlp


	def MLP3( D, H1, H2, K ):

	print '### 3-layer MLP: D =', D, ' H1 (ReLu) =', H1, ' H2 (ReLu) =', H2, ' K (softmax) =', K
	L1 = nnet.Layer( D, H1, 'ReLu', withBias = True, Wini = 0.01 )
	L2 = nnet.Layer( H1, H2, 'ReLu', withBias = True, Wini = 0.01 )
	L3 = nnet.Layer( H2, K, 'softmax', withBias = True, Wini = 0.01 )
	mlp = nnet.MLP( [ L1, L2, L3 ] )

	return mlp



	if __name__ == "__main__":

	np.random.seed( 0 )

	##### setting the training data & the validation data
	#
	X, label, t = gendat( 'L' )
	xm = np.mean( X, axis = 0 )
	X -= xm
	XL, labelL, tL = X[:50000], label[:50000], t[:50000]
	XV, labelV, tV = X[50000:], label[50000:], t[50000:]
	NL, D = XL.shape
	NV, D = XV.shape
	K = t.shape[1]


	##### mini batch indicies for stochastic gradient ascent
	#
	idx = np.random.permutation( NL )
	batchsize = 100
	nbatch = NL / batchsize
	assert( NL % batchsize == 0 )
	idxB = np.zeros( ( nbatch, NL ), dtype = bool )
	for ib in range( nbatch ):
	idxB[ib, idx.reshape( ( nbatch, batchsize ))[ib, :]] = True


	##### training
	#
	#mlp = MLP2( D, 500, K )
	mlp = MLP3( D, 1000, 500, K )

	eta = 0.01
	mu = 0.9
	lam = 0.00001
	nepoch = 50

	print '### training: NL = ', NL, 'NV = ', NV, ' batchsize = ', batchsize
	print '# eta = ', eta, 'mu = ', mu, 'lam = ', lam

	for i in range( nepoch ):

	# printing error rates etc.
	if i % 10 == 0:
	mnLLL, erL = errorrate( mlp, XL, tL, labelL )
	mnLLV, erV = errorrate( mlp, XV, tV, labelV )
	print '%d \| %.4f %.2f \| %.4f %.2f' % ( i, mnLLL, erL * 100, mnLLV, erV * 100 )

	# training (selecting each batch in random order)
	for ib in np.random.permutation( nbatch ):
	ii = idxB[ib, :]
	mlp.train( XL[ii], tL[ii], eta, mu, lam )

	i = nepoch

	mnLLL, erL = errorrate( mlp, XL, tL, labelL )
	mnLLV, erV = errorrate( mlp, XV, tV, labelV )
	print '%d \| %.4f %.2f \| %.4f %.2f' % ( i, mnLLL, erL * 100, mnLLV, erV * 100 )


	##### setting the test data
	#
	XT, labelT, tT = gendat( 'T' )
	XT -= xm
	NT, D = XT.shape
	print '# NT = ', NT
	mnLLT, erT = errorrate( mlp, XT, tT, labelT )
	print '%d \| %.4f %.2f \| %.4f %.2f \| %.4f %.2f' % ( i, mnLLL, erL * 100, mnLLV, erV * 100, mnLLT, erT * 100 )
	import numpy as np
	import theano
	import theano.tensor as T


	# activation functions
	d_afunc = { 'linear': lambda Y: Y,
	'sigmoid': T.nnet.sigmoid,
	'softmax': T.nnet.softmax,
	'ReLu': lambda Y: T.switch( Y > 0, Y, 0 ) }

	### uniform random numbers for weight initialization
	#
	def randomU( shape, a ):

	# [ -a, a )
	return 2 * a * ( np.random.random_sample( shape ) - 0.5 )


	### Gaussian random numbers for weight initialization
	#
	def randomN( shape, sig ):

	# N(0,sig)
	return sig * np.random.standard_normal( shape )



	########## Layer ##########

	class Layer( object ):

	def __init__( self, Din, Nunit, afunc, withBias = True, Wini = 0.01, floatX = theano.config.floatX ):

	self.Din = Din
	self.Nunit = Nunit
	self.afunc = d_afunc[afunc]
	self.withBias = withBias

	# theano shared variables for weights & biases
	self.W = theano.shared( np.array( randomN( ( Nunit, Din ), Wini ), dtype = floatX ) )
	self.dW = theano.shared( np.zeros( ( Nunit, Din ), dtype = floatX ) )
	if withBias:
	self.b = theano.shared( np.zeros( Nunit, dtype = floatX ) )
	self.db = theano.shared( np.zeros( Nunit, dtype = floatX ) )


	def output( self, X ):

	if self.withBias:
	Y = T.dot( X, self.W.T ) + self.b # Ndat x Nunit
	else:
	Y = T.dot( X, self.W.T )

	Z = self.afunc( Y )

	return Y, Z



	########## MLP ##########

	class MLP( object ):

	def __init__( self, Layers ):

	# layers - list of Layer instances
	self.Layers = Layers

	# theano functions
	self.output = self._Tfunc_output()
	self.cost = self._Tfunc_cost()
	self.train = self._Tfunc_train()


	### theano function for output computation
	#
	def _Tfunc_output( self ):

	X = T.matrix() # N x D
	Y, Z = _T_output( self.Layers, X )

	return theano.function( [ X ], [ Y, Z ] )


	### theano function for cost computation
	#
	def _Tfunc_cost( self ):

	Z = T.matrix() # N x K
	t = T.matrix() # N x K
	cost = _T_cost( Z, t )

	return theano.function( [ Z, t ], cost )


	### theano function for gradient descent learning
	#
	def _Tfunc_train( self ):

	X = T.matrix( 'X' ) # N x D
	t = T.matrix( 't' ) # N x K
	eta = T.scalar( 'eta' )
	mu = T.scalar( 'mu' )
	lam = T.scalar( 'lambda' )
	Y, Z = _T_output( self.Layers, X )
	cost = T.mean( _T_cost( Z, t ) )

	updatesList = []
	for layer in self.Layers:
	gradW = T.grad( cost, layer.W )
	#dWnew = -eta * gradW + mu * layer.dW
	dWnew = -eta * ( gradW + lam * layer.W ) + mu * layer.dW
	Wnew = layer.W + dWnew
	updatesList.append( ( layer.W, Wnew ) )
	updatesList.append( ( layer.dW, dWnew ) )
	if layer.withBias:
	gradb = T.grad( cost, layer.b )
	# no weight decay for bias
	dbnew = -eta * gradb + mu * layer.db
	bnew = layer.b + dbnew
	updatesList.append( ( layer.b, bnew ) )
	updatesList.append( ( layer.db, dbnew ) )

	return theano.function( [ X, t, eta, mu, lam ], cost, updates = updatesList )


	def _T_output( Layers, X ):

	Zprev = X
	for layer in Layers:
	Y, Z = layer.output( Zprev )
	Zprev = Z

	return Y, Z


	def _T_cost( Z, t ):

	return T.nnet.categorical_crossentropy( Z, t )