-
-
Save takatakamanbou/5d227d3f13edf74bcce9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import scipy as sp | |
import mnist0117 as mnist | |
def gendat( LT ): | |
mn = mnist.MNIST( LT ) | |
label = mn.getLabel() | |
N = label.shape[0] | |
K = 10 | |
X = mn.getImage().reshape( ( N, -1 ) ) / 255 # => in [0,1] | |
t = np.zeros( ( N, K ), dtype = bool ) | |
for ik in range( K ): | |
t[label == ik, ik] = True | |
return X, label, t | |
def errorrate( lg, X, t, label ): | |
Y, Z = lg.output( X ) | |
mnLL = np.mean( lg.negLL( Z, t ) ) | |
er = np.mean( label != np.argmax( Z, axis = 1 ) ) | |
return mnLL, er | |
if __name__ == "__main__": | |
import logreg_theano0130 as logreg | |
np.random.seed( 0 ) | |
##### setting the training data & the validation data | |
# | |
X, label, t = gendat( 'L' ) | |
XL, labelL, tL = X[:50000], label[:50000], t[:50000] | |
XV, labelV, tV = X[50000:], label[50000:], t[50000:] | |
NL, D = XL.shape | |
NV, D = XV.shape | |
K = t.shape[1] | |
##### mini batch indicies for stochastic gradient ascent | |
# | |
idx = np.random.permutation( NL ) | |
batchsize = 1000 | |
nbatch = NL / batchsize | |
assert( NL % batchsize == 0 ) | |
idxB = np.zeros( ( nbatch, NL ), dtype = bool ) | |
for ib in range( nbatch ): | |
idxB[ib, idx.reshape( ( nbatch, batchsize ))[ib, :]] = True | |
print '### training: NL = ', NL, 'NV = ', NV, ' D = ', D, ' K = ', K, ' batchsize = ', batchsize | |
##### training | |
# | |
eta = 0.5 | |
mu = 0.8 | |
nepoch = 100 | |
lg = logreg.LogisticRegression( D, K ) | |
for i in range( nepoch ): | |
# printing error rates etc. | |
if i % 10 == 0: | |
mnLLL, erL = errorrate( lg, XL, tL, labelL ) | |
mnLLV, erV = errorrate( lg, XV, tV, labelV ) | |
print i, mnLLL, erL * 100, erV * 100 | |
# training (selecting each batch in random order) | |
for ib in np.random.permutation( nbatch ): | |
ii = idxB[ib, :] | |
lg.train( XL[ii], tL[ii], eta, mu ) | |
i = nepoch | |
mnLLL, erL = errorrate( lg, XL, tL, labelL ) | |
mnLLV, erV = errorrate( lg, XV, tV, labelV ) | |
print i, mnLLL, erL * 100, erV * 100 | |
##### setting the test data | |
# | |
XT, labelT, tT = gendat( 'T' ) | |
NT, D = XT.shape | |
print '# NT = ', NT | |
mnLLT, erT = errorrate( lg, XT, tT, labelT ) | |
print i, mnLLT, erL * 100, erV * 100, erT * 100 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import theano | |
import theano.tensor as T | |
import nnet0130 as nnet | |
class LogisticRegression(): | |
def __init__( self, D, K ): | |
# shared variables | |
self.W = theano.shared( nnet.random( ( K, D ), 0.1 ) ) | |
self.b = theano.shared( nnet.random( K, 0.1 ) ) | |
self.dW = theano.shared( np.zeros( ( K, D ) ) ) | |
self.db = theano.shared( np.zeros( K ) ) | |
# theano functions | |
self.output = self._Tfunc_output() | |
self.negLL = self._Tfunc_negLL() | |
self.train = self._Tfunc_train() | |
### output | |
# | |
def _Tfunc_output( self ): | |
X = T.dmatrix( 'X' ) # N x D | |
Y, Z = nnet.T_softmax( self.W, self.b, X ) | |
return theano.function( [ X ], [ Y, Z ] ) | |
### negative log-likelihood | |
# | |
def _Tfunc_negLL( self ): | |
Z = T.dmatrix() # N x K | |
t = T.dmatrix() # N x K | |
LL = nnet.T_negLL( Z, t ) | |
return theano.function( [ Z, t ], LL ) | |
### train | |
# | |
def _Tfunc_train( self ): | |
W, b, dW, db = self.W, self.b, self.dW, self.db | |
X = T.dmatrix( 'X' ) # N x D | |
t = T.dmatrix( 't' ) # N x K | |
eta = T.dscalar( 'eta' ) | |
mu = T.dscalar( 'mu' ) | |
Y, Z = nnet.T_softmax( W, b, X ) | |
cost = T.mean( nnet.T_negLL( Z, t ) ) | |
gradW, gradb = T.grad( cost, [ W, b ] ) | |
dW_new = -eta * gradW + mu * dW | |
db_new = -eta * gradb + mu * db | |
W_new = W + dW_new | |
b_new = b + db_new | |
updatesList = [ ( W, W_new ), ( b, b_new ), ( dW, dW_new ), ( db, db_new ) ] | |
return theano.function( [ X, t, eta, mu ], cost, updates = updatesList ) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import theano | |
import theano.tensor as T | |
import nnet0130 as nnet | |
class MLP(): | |
def __init__( self, D, H, K ): | |
# shared variables for the 1st layer (sigmoid or relu) | |
self.W1 = theano.shared( nnet.random( ( H, D ), 0.1 ) ) | |
self.b1 = theano.shared( nnet.random( H, 0.1 ) ) | |
self.dW1 = theano.shared( np.zeros( ( H, D ) ) ) | |
self.db1 = theano.shared( np.zeros( H ) ) | |
# shared variables for the 2nd layer (softmax) | |
self.W2 = theano.shared( nnet.random( ( K, H ), 0.1 ) ) | |
self.b2 = theano.shared( nnet.random( K, 0.1 ) ) | |
self.dW2 = theano.shared( np.zeros( ( K, H ) ) ) | |
self.db2 = theano.shared( np.zeros( K ) ) | |
# theano functions | |
self.output = self._Tfunc_output() | |
self.negLL = self._Tfunc_negLL() | |
self.train = self._Tfunc_train() | |
### output | |
# | |
def _Tfunc_output( self ): | |
X = T.dmatrix() # N x D | |
Y, Z = _T_output( self.W1, self.b1, self.W2, self.b2, X ) | |
return theano.function( [ X ], [ Y, Z ] ) | |
### negative log-likelihood | |
# | |
def _Tfunc_negLL( self ): | |
Z = T.dmatrix() # N x K | |
t = T.dmatrix() # N x K | |
LL = nnet.T_negLL( Z, t ) | |
return theano.function( [ Z, t ], LL ) | |
### train | |
# | |
def _Tfunc_train( self ): | |
W1, dW1, b1, db1 = self.W1, self.dW1, self.b1, self.db1 | |
W2, dW2, b2, db2 = self.W2, self.dW2, self.b2, self.db2 | |
X = T.dmatrix( 'X' ) # N x D | |
t = T.dmatrix( 't' ) # N x K | |
eta = T.dscalar( 'eta' ) | |
mu = T.dscalar( 'mu' ) | |
Y2, Z2 = _T_output( W1, b1, W2, b2, X ) | |
cost = T.mean( nnet.T_negLL( Z2, t ) ) | |
gradW1, gradb1, gradW2, gradb2 = T.grad( cost, [ W1, b1, W2, b2 ] ) | |
dW1_new = -eta * gradW1 + mu * dW1 | |
db1_new = -eta * gradb1 + mu * db1 | |
dW2_new = -eta * gradW2 + mu * dW2 | |
db2_new = -eta * gradb2 + mu * db2 | |
W1_new = W1 + dW1_new | |
b1_new = b1 + db1_new | |
W2_new = W2 + dW2_new | |
b2_new = b2 + db2_new | |
updatesList = [ | |
( W1, W1_new ), ( b1, b1_new ), ( dW1, dW1_new ), ( db1, db1_new ), | |
( W2, W2_new ), ( b2, b2_new ), ( dW2, dW2_new ), ( db2, db2_new ) ] | |
return theano.function( [ X, t, eta, mu ], cost, updates = updatesList ) | |
def _T_output( W1, b1, W2, b2, X ): | |
#Y1, Z1 = nnet.T_sigmoid( W1, b1, X ) | |
Y1, Z1 = nnet.T_relu( W1, b1, X ) | |
Y2, Z2 = nnet.T_softmax( W2, b2, Z1 ) | |
return Y2, Z2 | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import theano | |
import theano.tensor as T | |
import nnet0130 as nnet | |
class MLP(): | |
def __init__( self, D, H1, H2, K ): | |
# shared variables for the 1st layer (sigmoid or relu) | |
self.W1 = theano.shared( nnet.random( ( H1, D ), 0.1 ) ) | |
self.b1 = theano.shared( nnet.random( H1, 0.1 ) ) | |
self.dW1 = theano.shared( np.zeros( ( H1, D ) ) ) | |
self.db1 = theano.shared( np.zeros( H1 ) ) | |
# shared variables for the 2nd layer (sigmoid or relu) | |
self.W2 = theano.shared( nnet.random( ( H2, H1 ), 0.1 ) ) | |
self.b2 = theano.shared( nnet.random( H2, 0.1 ) ) | |
self.dW2 = theano.shared( np.zeros( ( H2, H1 ) ) ) | |
self.db2 = theano.shared( np.zeros( H2 ) ) | |
# shared variables for the 3rd layer (softmax) | |
self.W3 = theano.shared( nnet.random( ( K, H2 ), 0.1 ) ) | |
self.b3 = theano.shared( nnet.random( K, 0.1 ) ) | |
self.dW3 = theano.shared( np.zeros( ( K, H2 ) ) ) | |
self.db3 = theano.shared( np.zeros( K ) ) | |
# theano functions | |
self.output = self._Tfunc_output() | |
self.negLL = self._Tfunc_negLL() | |
self.train = self._Tfunc_train() | |
### output | |
# | |
def _Tfunc_output( self ): | |
X = T.dmatrix() # N x D | |
Y, Z = _T_output( self.W1, self.b1, self.W2, self.b2, self.W3, self.b3, X ) | |
return theano.function( [ X ], [ Y, Z ] ) | |
### negative log-likelihood | |
# | |
def _Tfunc_negLL( self ): | |
Z = T.dmatrix() # N x K | |
t = T.dmatrix() # N x K | |
LL = nnet.T_negLL( Z, t ) | |
return theano.function( [ Z, t ], LL ) | |
### train | |
# | |
def _Tfunc_train( self ): | |
W1, dW1, b1, db1 = self.W1, self.dW1, self.b1, self.db1 | |
W2, dW2, b2, db2 = self.W2, self.dW2, self.b2, self.db2 | |
W3, dW3, b3, db3 = self.W3, self.dW3, self.b3, self.db3 | |
X = T.dmatrix( 'X' ) # N x D | |
t = T.dmatrix( 't' ) # N x K | |
eta = T.dscalar( 'eta' ) | |
mu = T.dscalar( 'mu' ) | |
Y3, Z3 = _T_output( W1, b1, W2, b2, W3, b3, X ) | |
cost = T.mean( nnet.T_negLL( Z3, t ) ) | |
grad = T.grad( cost, [ W1, b1, W2, b2, W3, b3 ] ) | |
gradW1, gradb1, gradW2, gradb2, gradW3, gradb3 = grad | |
dW1_new = -eta * gradW1 + mu * dW1 | |
db1_new = -eta * gradb1 + mu * db1 | |
dW2_new = -eta * gradW2 + mu * dW2 | |
db2_new = -eta * gradb2 + mu * db2 | |
dW3_new = -eta * gradW3 + mu * dW3 | |
db3_new = -eta * gradb3 + mu * db3 | |
W1_new = W1 + dW1_new | |
b1_new = b1 + db1_new | |
W2_new = W2 + dW2_new | |
b2_new = b2 + db2_new | |
W3_new = W3 + dW3_new | |
b3_new = b3 + db3_new | |
updatesList = [ | |
( W1, W1_new ), ( b1, b1_new ), ( dW1, dW1_new ), ( db1, db1_new ), | |
( W2, W2_new ), ( b2, b2_new ), ( dW2, dW2_new ), ( db2, db2_new ), | |
( W3, W3_new ), ( b3, b3_new ), ( dW3, dW3_new ), ( db3, db3_new ), | |
] | |
return theano.function( [ X, t, eta, mu ], cost, updates = updatesList ) | |
def _T_output( W1, b1, W2, b2, W3, b3, X ): | |
#Y1, Z1 = nnet.T_sigmoid( W1, b1, X ) | |
Y1, Z1 = nnet.T_relu( W1, b1, X ) | |
Y2, Z2 = nnet.T_relu( W2, b2, Z1 ) | |
Y3, Z3 = nnet.T_softmax( W3, b3, Z2 ) | |
return Y3, Z3 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import scipy as sp | |
import mnist0117 as mnist | |
def gendat( LT ): | |
mn = mnist.MNIST( LT ) | |
label = mn.getLabel() | |
N = label.shape[0] | |
K = 10 | |
X = mn.getImage().reshape( ( N, -1 ) ) / 255 # => in [0,1] | |
t = np.zeros( ( N, K ), dtype = bool ) | |
for ik in range( K ): | |
t[label == ik, ik] = True | |
return X, label, t | |
def errorrate( mlp, X, t, label ): | |
Y, Z = mlp.output( X ) | |
mnLL = np.mean( mlp.negLL( Z, t ) ) | |
er = np.mean( label != np.argmax( Z, axis = 1 ) ) | |
return mnLL, er | |
if __name__ == "__main__": | |
import mlp_2layer0130 as mlp_2layer | |
import mlp_3layer0130 as mlp_3layer | |
np.random.seed( 0 ) | |
##### setting the training data & the validation data | |
# | |
X, label, t = gendat( 'L' ) | |
XL, labelL, tL = X[:50000], label[:50000], t[:50000] | |
XV, labelV, tV = X[50000:], label[50000:], t[50000:] | |
NL, D = XL.shape | |
NV, D = XV.shape | |
K = t.shape[1] | |
##### mini batch indicies for stochastic gradient ascent | |
# | |
idx = np.random.permutation( NL ) | |
batchsize = 1000 | |
nbatch = NL / batchsize | |
assert( NL % batchsize == 0 ) | |
idxB = np.zeros( ( nbatch, NL ), dtype = bool ) | |
for ib in range( nbatch ): | |
idxB[ib, idx.reshape( ( nbatch, batchsize ))[ib, :]] = True | |
##### training | |
# | |
#H1, H2 = 500, 0 | |
H1, H2 = 500, 1000 | |
eta = 0.5 | |
mu = 0.8 | |
nepoch = 20 | |
if H2 <= 0: | |
mlp = mlp_2layer.MLP( D, H1, K ) | |
print '### 2-layer MLP: D = ', D, ' H = ', H1, ' K = ', K | |
else: | |
mlp = mlp_3layer.MLP( D, H1, H2, K ) | |
print '### 3-layer MLP: D = ', D, ' H1 = ', H1, ' H2 = ', H2, ' K = ', K | |
print '### training: NL = ', NL, 'NV = ', NV, ' D = ', D, ' K = ', K, ' batchsize = ', batchsize | |
for i in range( nepoch ): | |
# printing error rates etc. | |
if i % 10 == 0: | |
mnLLL, erL = errorrate( mlp, XL, tL, labelL ) | |
mnLLV, erV = errorrate( mlp, XV, tV, labelV ) | |
print i, mnLLL, erL * 100, erV * 100 | |
# training (selecting each batch in random order) | |
for ib in np.random.permutation( nbatch ): | |
ii = idxB[ib, :] | |
mlp.train( XL[ii], tL[ii], eta, mu ) | |
i = nepoch | |
mnLLL, erL = errorrate( mlp, XL, tL, labelL ) | |
mnLLV, erV = errorrate( mlp, XV, tV, labelV ) | |
print i, mnLLL, erL * 100, erV * 100 | |
##### setting the test data | |
# | |
XT, labelT, tT = gendat( 'T' ) | |
NT, D = XT.shape | |
print '# NT = ', NT | |
mnLLT, erT = errorrate( mlp, XT, tT, labelT ) | |
print i, mnLLT, erL * 100, erV * 100, erT * 100 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import theano | |
import theano.tensor as T | |
### random numbers for weight initialization | |
# | |
def random( shape, r ): | |
# [ -r/2, r/2 ) | |
return r * ( np.random.random_sample( shape ) - 0.5 ) | |
### softmax layer | |
# | |
def T_softmax( W, b, X ): | |
Y = T.dot( X, W.T ) + b # Ndat x Dout | |
Z = T.nnet.softmax( Y ) | |
return Y, Z | |
### sigmoid layer | |
# | |
def T_sigmoid( W, b, X ): | |
Y = T.dot( X, W.T ) + b # Ndat x Dout | |
Z = T.nnet.sigmoid( Y ) | |
return Y, Z | |
### rectified linear layer | |
# | |
def T_relu( W, b, X ): | |
Y = T.dot( X, W.T ) + b # Ndat x Dout | |
Z = T.switch( Y > 0, Y, 0 ) | |
return Y, Z | |
### negative log-likelihood | |
# | |
def T_negLL( Z, t ): | |
return T.nnet.categorical_crossentropy( Z, t ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment