Skip to content

Instantly share code, notes, and snippets.

@takatakamanbou
Last active August 29, 2015 14:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save takatakamanbou/5d227d3f13edf74bcce9 to your computer and use it in GitHub Desktop.
Save takatakamanbou/5d227d3f13edf74bcce9 to your computer and use it in GitHub Desktop.
import numpy as np
import scipy as sp
import mnist0117 as mnist
def gendat( LT ):
mn = mnist.MNIST( LT )
label = mn.getLabel()
N = label.shape[0]
K = 10
X = mn.getImage().reshape( ( N, -1 ) ) / 255 # => in [0,1]
t = np.zeros( ( N, K ), dtype = bool )
for ik in range( K ):
t[label == ik, ik] = True
return X, label, t
def errorrate( lg, X, t, label ):
Y, Z = lg.output( X )
mnLL = np.mean( lg.negLL( Z, t ) )
er = np.mean( label != np.argmax( Z, axis = 1 ) )
return mnLL, er
if __name__ == "__main__":
import logreg_theano0130 as logreg
np.random.seed( 0 )
##### setting the training data & the validation data
#
X, label, t = gendat( 'L' )
XL, labelL, tL = X[:50000], label[:50000], t[:50000]
XV, labelV, tV = X[50000:], label[50000:], t[50000:]
NL, D = XL.shape
NV, D = XV.shape
K = t.shape[1]
##### mini batch indicies for stochastic gradient ascent
#
idx = np.random.permutation( NL )
batchsize = 1000
nbatch = NL / batchsize
assert( NL % batchsize == 0 )
idxB = np.zeros( ( nbatch, NL ), dtype = bool )
for ib in range( nbatch ):
idxB[ib, idx.reshape( ( nbatch, batchsize ))[ib, :]] = True
print '### training: NL = ', NL, 'NV = ', NV, ' D = ', D, ' K = ', K, ' batchsize = ', batchsize
##### training
#
eta = 0.5
mu = 0.8
nepoch = 100
lg = logreg.LogisticRegression( D, K )
for i in range( nepoch ):
# printing error rates etc.
if i % 10 == 0:
mnLLL, erL = errorrate( lg, XL, tL, labelL )
mnLLV, erV = errorrate( lg, XV, tV, labelV )
print i, mnLLL, erL * 100, erV * 100
# training (selecting each batch in random order)
for ib in np.random.permutation( nbatch ):
ii = idxB[ib, :]
lg.train( XL[ii], tL[ii], eta, mu )
i = nepoch
mnLLL, erL = errorrate( lg, XL, tL, labelL )
mnLLV, erV = errorrate( lg, XV, tV, labelV )
print i, mnLLL, erL * 100, erV * 100
##### setting the test data
#
XT, labelT, tT = gendat( 'T' )
NT, D = XT.shape
print '# NT = ', NT
mnLLT, erT = errorrate( lg, XT, tT, labelT )
print i, mnLLT, erL * 100, erV * 100, erT * 100
import numpy as np
import theano
import theano.tensor as T
import nnet0130 as nnet
class LogisticRegression():
def __init__( self, D, K ):
# shared variables
self.W = theano.shared( nnet.random( ( K, D ), 0.1 ) )
self.b = theano.shared( nnet.random( K, 0.1 ) )
self.dW = theano.shared( np.zeros( ( K, D ) ) )
self.db = theano.shared( np.zeros( K ) )
# theano functions
self.output = self._Tfunc_output()
self.negLL = self._Tfunc_negLL()
self.train = self._Tfunc_train()
### output
#
def _Tfunc_output( self ):
X = T.dmatrix( 'X' ) # N x D
Y, Z = nnet.T_softmax( self.W, self.b, X )
return theano.function( [ X ], [ Y, Z ] )
### negative log-likelihood
#
def _Tfunc_negLL( self ):
Z = T.dmatrix() # N x K
t = T.dmatrix() # N x K
LL = nnet.T_negLL( Z, t )
return theano.function( [ Z, t ], LL )
### train
#
def _Tfunc_train( self ):
W, b, dW, db = self.W, self.b, self.dW, self.db
X = T.dmatrix( 'X' ) # N x D
t = T.dmatrix( 't' ) # N x K
eta = T.dscalar( 'eta' )
mu = T.dscalar( 'mu' )
Y, Z = nnet.T_softmax( W, b, X )
cost = T.mean( nnet.T_negLL( Z, t ) )
gradW, gradb = T.grad( cost, [ W, b ] )
dW_new = -eta * gradW + mu * dW
db_new = -eta * gradb + mu * db
W_new = W + dW_new
b_new = b + db_new
updatesList = [ ( W, W_new ), ( b, b_new ), ( dW, dW_new ), ( db, db_new ) ]
return theano.function( [ X, t, eta, mu ], cost, updates = updatesList )
import numpy as np
import theano
import theano.tensor as T
import nnet0130 as nnet
class MLP():
def __init__( self, D, H, K ):
# shared variables for the 1st layer (sigmoid or relu)
self.W1 = theano.shared( nnet.random( ( H, D ), 0.1 ) )
self.b1 = theano.shared( nnet.random( H, 0.1 ) )
self.dW1 = theano.shared( np.zeros( ( H, D ) ) )
self.db1 = theano.shared( np.zeros( H ) )
# shared variables for the 2nd layer (softmax)
self.W2 = theano.shared( nnet.random( ( K, H ), 0.1 ) )
self.b2 = theano.shared( nnet.random( K, 0.1 ) )
self.dW2 = theano.shared( np.zeros( ( K, H ) ) )
self.db2 = theano.shared( np.zeros( K ) )
# theano functions
self.output = self._Tfunc_output()
self.negLL = self._Tfunc_negLL()
self.train = self._Tfunc_train()
### output
#
def _Tfunc_output( self ):
X = T.dmatrix() # N x D
Y, Z = _T_output( self.W1, self.b1, self.W2, self.b2, X )
return theano.function( [ X ], [ Y, Z ] )
### negative log-likelihood
#
def _Tfunc_negLL( self ):
Z = T.dmatrix() # N x K
t = T.dmatrix() # N x K
LL = nnet.T_negLL( Z, t )
return theano.function( [ Z, t ], LL )
### train
#
def _Tfunc_train( self ):
W1, dW1, b1, db1 = self.W1, self.dW1, self.b1, self.db1
W2, dW2, b2, db2 = self.W2, self.dW2, self.b2, self.db2
X = T.dmatrix( 'X' ) # N x D
t = T.dmatrix( 't' ) # N x K
eta = T.dscalar( 'eta' )
mu = T.dscalar( 'mu' )
Y2, Z2 = _T_output( W1, b1, W2, b2, X )
cost = T.mean( nnet.T_negLL( Z2, t ) )
gradW1, gradb1, gradW2, gradb2 = T.grad( cost, [ W1, b1, W2, b2 ] )
dW1_new = -eta * gradW1 + mu * dW1
db1_new = -eta * gradb1 + mu * db1
dW2_new = -eta * gradW2 + mu * dW2
db2_new = -eta * gradb2 + mu * db2
W1_new = W1 + dW1_new
b1_new = b1 + db1_new
W2_new = W2 + dW2_new
b2_new = b2 + db2_new
updatesList = [
( W1, W1_new ), ( b1, b1_new ), ( dW1, dW1_new ), ( db1, db1_new ),
( W2, W2_new ), ( b2, b2_new ), ( dW2, dW2_new ), ( db2, db2_new ) ]
return theano.function( [ X, t, eta, mu ], cost, updates = updatesList )
def _T_output( W1, b1, W2, b2, X ):
#Y1, Z1 = nnet.T_sigmoid( W1, b1, X )
Y1, Z1 = nnet.T_relu( W1, b1, X )
Y2, Z2 = nnet.T_softmax( W2, b2, Z1 )
return Y2, Z2
import numpy as np
import theano
import theano.tensor as T
import nnet0130 as nnet
class MLP():
def __init__( self, D, H1, H2, K ):
# shared variables for the 1st layer (sigmoid or relu)
self.W1 = theano.shared( nnet.random( ( H1, D ), 0.1 ) )
self.b1 = theano.shared( nnet.random( H1, 0.1 ) )
self.dW1 = theano.shared( np.zeros( ( H1, D ) ) )
self.db1 = theano.shared( np.zeros( H1 ) )
# shared variables for the 2nd layer (sigmoid or relu)
self.W2 = theano.shared( nnet.random( ( H2, H1 ), 0.1 ) )
self.b2 = theano.shared( nnet.random( H2, 0.1 ) )
self.dW2 = theano.shared( np.zeros( ( H2, H1 ) ) )
self.db2 = theano.shared( np.zeros( H2 ) )
# shared variables for the 3rd layer (softmax)
self.W3 = theano.shared( nnet.random( ( K, H2 ), 0.1 ) )
self.b3 = theano.shared( nnet.random( K, 0.1 ) )
self.dW3 = theano.shared( np.zeros( ( K, H2 ) ) )
self.db3 = theano.shared( np.zeros( K ) )
# theano functions
self.output = self._Tfunc_output()
self.negLL = self._Tfunc_negLL()
self.train = self._Tfunc_train()
### output
#
def _Tfunc_output( self ):
X = T.dmatrix() # N x D
Y, Z = _T_output( self.W1, self.b1, self.W2, self.b2, self.W3, self.b3, X )
return theano.function( [ X ], [ Y, Z ] )
### negative log-likelihood
#
def _Tfunc_negLL( self ):
Z = T.dmatrix() # N x K
t = T.dmatrix() # N x K
LL = nnet.T_negLL( Z, t )
return theano.function( [ Z, t ], LL )
### train
#
def _Tfunc_train( self ):
W1, dW1, b1, db1 = self.W1, self.dW1, self.b1, self.db1
W2, dW2, b2, db2 = self.W2, self.dW2, self.b2, self.db2
W3, dW3, b3, db3 = self.W3, self.dW3, self.b3, self.db3
X = T.dmatrix( 'X' ) # N x D
t = T.dmatrix( 't' ) # N x K
eta = T.dscalar( 'eta' )
mu = T.dscalar( 'mu' )
Y3, Z3 = _T_output( W1, b1, W2, b2, W3, b3, X )
cost = T.mean( nnet.T_negLL( Z3, t ) )
grad = T.grad( cost, [ W1, b1, W2, b2, W3, b3 ] )
gradW1, gradb1, gradW2, gradb2, gradW3, gradb3 = grad
dW1_new = -eta * gradW1 + mu * dW1
db1_new = -eta * gradb1 + mu * db1
dW2_new = -eta * gradW2 + mu * dW2
db2_new = -eta * gradb2 + mu * db2
dW3_new = -eta * gradW3 + mu * dW3
db3_new = -eta * gradb3 + mu * db3
W1_new = W1 + dW1_new
b1_new = b1 + db1_new
W2_new = W2 + dW2_new
b2_new = b2 + db2_new
W3_new = W3 + dW3_new
b3_new = b3 + db3_new
updatesList = [
( W1, W1_new ), ( b1, b1_new ), ( dW1, dW1_new ), ( db1, db1_new ),
( W2, W2_new ), ( b2, b2_new ), ( dW2, dW2_new ), ( db2, db2_new ),
( W3, W3_new ), ( b3, b3_new ), ( dW3, dW3_new ), ( db3, db3_new ),
]
return theano.function( [ X, t, eta, mu ], cost, updates = updatesList )
def _T_output( W1, b1, W2, b2, W3, b3, X ):
#Y1, Z1 = nnet.T_sigmoid( W1, b1, X )
Y1, Z1 = nnet.T_relu( W1, b1, X )
Y2, Z2 = nnet.T_relu( W2, b2, Z1 )
Y3, Z3 = nnet.T_softmax( W3, b3, Z2 )
return Y3, Z3
import numpy as np
import scipy as sp
import mnist0117 as mnist
def gendat( LT ):
mn = mnist.MNIST( LT )
label = mn.getLabel()
N = label.shape[0]
K = 10
X = mn.getImage().reshape( ( N, -1 ) ) / 255 # => in [0,1]
t = np.zeros( ( N, K ), dtype = bool )
for ik in range( K ):
t[label == ik, ik] = True
return X, label, t
def errorrate( mlp, X, t, label ):
Y, Z = mlp.output( X )
mnLL = np.mean( mlp.negLL( Z, t ) )
er = np.mean( label != np.argmax( Z, axis = 1 ) )
return mnLL, er
if __name__ == "__main__":
import mlp_2layer0130 as mlp_2layer
import mlp_3layer0130 as mlp_3layer
np.random.seed( 0 )
##### setting the training data & the validation data
#
X, label, t = gendat( 'L' )
XL, labelL, tL = X[:50000], label[:50000], t[:50000]
XV, labelV, tV = X[50000:], label[50000:], t[50000:]
NL, D = XL.shape
NV, D = XV.shape
K = t.shape[1]
##### mini batch indicies for stochastic gradient ascent
#
idx = np.random.permutation( NL )
batchsize = 1000
nbatch = NL / batchsize
assert( NL % batchsize == 0 )
idxB = np.zeros( ( nbatch, NL ), dtype = bool )
for ib in range( nbatch ):
idxB[ib, idx.reshape( ( nbatch, batchsize ))[ib, :]] = True
##### training
#
#H1, H2 = 500, 0
H1, H2 = 500, 1000
eta = 0.5
mu = 0.8
nepoch = 20
if H2 <= 0:
mlp = mlp_2layer.MLP( D, H1, K )
print '### 2-layer MLP: D = ', D, ' H = ', H1, ' K = ', K
else:
mlp = mlp_3layer.MLP( D, H1, H2, K )
print '### 3-layer MLP: D = ', D, ' H1 = ', H1, ' H2 = ', H2, ' K = ', K
print '### training: NL = ', NL, 'NV = ', NV, ' D = ', D, ' K = ', K, ' batchsize = ', batchsize
for i in range( nepoch ):
# printing error rates etc.
if i % 10 == 0:
mnLLL, erL = errorrate( mlp, XL, tL, labelL )
mnLLV, erV = errorrate( mlp, XV, tV, labelV )
print i, mnLLL, erL * 100, erV * 100
# training (selecting each batch in random order)
for ib in np.random.permutation( nbatch ):
ii = idxB[ib, :]
mlp.train( XL[ii], tL[ii], eta, mu )
i = nepoch
mnLLL, erL = errorrate( mlp, XL, tL, labelL )
mnLLV, erV = errorrate( mlp, XV, tV, labelV )
print i, mnLLL, erL * 100, erV * 100
##### setting the test data
#
XT, labelT, tT = gendat( 'T' )
NT, D = XT.shape
print '# NT = ', NT
mnLLT, erT = errorrate( mlp, XT, tT, labelT )
print i, mnLLT, erL * 100, erV * 100, erT * 100
import numpy as np
import theano
import theano.tensor as T
### random numbers for weight initialization
#
def random( shape, r ):
# [ -r/2, r/2 )
return r * ( np.random.random_sample( shape ) - 0.5 )
### softmax layer
#
def T_softmax( W, b, X ):
Y = T.dot( X, W.T ) + b # Ndat x Dout
Z = T.nnet.softmax( Y )
return Y, Z
### sigmoid layer
#
def T_sigmoid( W, b, X ):
Y = T.dot( X, W.T ) + b # Ndat x Dout
Z = T.nnet.sigmoid( Y )
return Y, Z
### rectified linear layer
#
def T_relu( W, b, X ):
Y = T.dot( X, W.T ) + b # Ndat x Dout
Z = T.switch( Y > 0, Y, 0 )
return Y, Z
### negative log-likelihood
#
def T_negLL( Z, t ):
return T.nnet.categorical_crossentropy( Z, t )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment