Skip to content

Instantly share code, notes, and snippets.

@takatakamanbou takatakamanbou/logreg_mnist0128v1.py Secret
Last active Aug 29, 2015

Embed
What would you like to do?
import numpy as np
import scipy as sp
import mnist0117 as mnist
def gendat( LT ):
mn = mnist.MNIST( LT )
label = mn.getLabel()
N = label.shape[0]
K = 10
X = mn.getImage().reshape( ( N, -1 ) ) / 255 # => in [0,1]
t = np.zeros( ( N, K ), dtype = bool )
for ik in range( K ):
t[label == ik, ik] = True
return X, label, t
def errorrate( W, b, X, t, label ):
Y, Z = softmax( W, b, X )
mnLL = np.mean( negLL( Z, t ) )
er = np.mean( label != np.argmax( Z, axis = 1 ) )
return mnLL, er
if __name__ == "__main__":
import logreg_theano0128v1 as logreg
np.random.seed( 0 )
##### setting the training data & the validation data
#
X, label, t = gendat( 'L' )
XL, labelL, tL = X[:50000], label[:50000], t[:50000]
XV, labelV, tV = X[50000:], label[50000:], t[50000:]
NL, D = XL.shape
NV, D = XV.shape
K = t.shape[1]
##### mini batch indicies for stochastic gradient ascent
#
idx = np.random.permutation( NL )
batchsize = 1000
nbatch = NL / batchsize
assert( NL % batchsize == 0 )
idxB = np.zeros( ( nbatch, NL ), dtype = bool )
for ib in range( nbatch ):
idxB[ib, idx.reshape( ( nbatch, batchsize ))[ib, :]] = True
print '### training: NL = ', NL, 'NV = ', NV, ' D = ', D, ' K = ', K, ' batchsize = ', batchsize
##### training
#
eta = 0.5
nepoch = 100
W = 0.1 * ( np.random.random_sample( ( K, D ) ) - 0.5 )
b = 0.1 * ( np.random.random_sample( K ) - 0.5 )
# Theano functions
softmax = logreg.Tfunc_softmax()
negLL = logreg.Tfunc_negLL()
grad = logreg.Tfunc_grad()
for i in range( nepoch ):
# printing error rates etc.
if i % 10 == 0:
mnLLL, erL = errorrate( W, b, XL, tL, labelL )
mnLLV, erV = errorrate( W, b, XV, tV, labelV )
print i, mnLLL, erL * 100, erV * 100
# training (selecting each batch in random order)
for ib in np.random.permutation( nbatch ):
ii = idxB[ib, :]
Y, Z = softmax( W, b, XL[ii] )
dW, db = grad( W, b, XL[ii], tL[ii] )
W -= eta * dW
b -= eta * db
i = nepoch
mnLLL, erL = errorrate( W, b, XL, tL, labelL )
mnLLV, erV = errorrate( W, b, XV, tV, labelV )
print i, mnLLL, erL * 100, erV * 100
##### setting the test data
#
XT, labelT, tT = gendat( 'T' )
NT, D = XT.shape
print '### test: NT = ', NT
mnLLT, erT = errorrate( W, b, XT, tT, labelT )
print i, mnLLT, erL * 100, erV * 100, erT * 100
import numpy as np
import scipy as sp
import mnist0117 as mnist
def gendat( LT ):
mn = mnist.MNIST( LT )
label = mn.getLabel()
N = label.shape[0]
K = 10
X = mn.getImage().reshape( ( N, -1 ) ) / 255 # => in [0,1]
t = np.zeros( ( N, K ), dtype = bool )
for ik in range( K ):
t[label == ik, ik] = True
return X, label, t
def errorrate( lg, X, t, label ):
Y, Z = lg.output( X )
mnLL = np.mean( lg.negLL( Z, t ) )
er = np.mean( label != np.argmax( Z, axis = 1 ) )
return mnLL, er
if __name__ == "__main__":
import logreg_theano0128v2 as logreg
np.random.seed( 0 )
##### setting the training data & the validation data
#
X, label, t = gendat( 'L' )
XL, labelL, tL = X[:50000], label[:50000], t[:50000]
XV, labelV, tV = X[50000:], label[50000:], t[50000:]
NL, D = XL.shape
NV, D = XV.shape
K = t.shape[1]
##### mini batch indicies for stochastic gradient ascent
#
idx = np.random.permutation( NL )
batchsize = 1000
nbatch = NL / batchsize
assert( NL % batchsize == 0 )
idxB = np.zeros( ( nbatch, NL ), dtype = bool )
for ib in range( nbatch ):
idxB[ib, idx.reshape( ( nbatch, batchsize ))[ib, :]] = True
print '### training: NL = ', NL, 'NV = ', NV, ' D = ', D, ' K = ', K, ' batchsize = ', batchsize
##### training
#
eta = 0.5
nepoch = 100
lg = logreg.LogisticRegression( D, K )
for i in range( nepoch ):
# printing error rates etc.
if i % 10 == 0:
mnLLL, erL = errorrate( lg, XL, tL, labelL )
mnLLV, erV = errorrate( lg, XV, tV, labelV )
print i, mnLLL, erL * 100, erV * 100
# training (selecting each batch in random order)
for ib in np.random.permutation( nbatch ):
ii = idxB[ib, :]
lg.train( XL[ii], tL[ii], eta )
i = nepoch
mnLLL, erL = errorrate( lg, XL, tL, labelL )
mnLLV, erV = errorrate( lg, XV, tV, labelV )
print i, mnLLL, erL * 100, erV * 100
##### setting the test data
#
XT, labelT, tT = gendat( 'T' )
NT, D = XT.shape
print '# NT = ', NT
mnLLT, erT = errorrate( lg, XT, tT, labelT )
print i, mnLLT, erL * 100, erV * 100, erT * 100
import numpy as np
import scipy as sp
import mnist0117 as mnist
def gendat( LT ):
mn = mnist.MNIST( LT )
label = mn.getLabel()
N = label.shape[0]
K = 10
X = mn.getImage().reshape( ( N, -1 ) ) / 255 # => in [0,1]
t = np.zeros( ( N, K ), dtype = bool )
for ik in range( K ):
t[label == ik, ik] = True
return X, label, t
def errorrate( lg, X, t, label ):
Y, Z = lg.output( X )
mnLL = np.mean( lg.negLL( Z, t ) )
er = np.mean( label != np.argmax( Z, axis = 1 ) )
return mnLL, er
if __name__ == "__main__":
import logreg_theano0128v3 as logreg
np.random.seed( 0 )
##### setting the training data & the validation data
#
X, label, t = gendat( 'L' )
XL, labelL, tL = X[:50000], label[:50000], t[:50000]
XV, labelV, tV = X[50000:], label[50000:], t[50000:]
NL, D = XL.shape
NV, D = XV.shape
K = t.shape[1]
##### mini batch indicies for stochastic gradient ascent
#
idx = np.random.permutation( NL )
batchsize = 1000
nbatch = NL / batchsize
assert( NL % batchsize == 0 )
idxB = np.zeros( ( nbatch, NL ), dtype = bool )
for ib in range( nbatch ):
idxB[ib, idx.reshape( ( nbatch, batchsize ))[ib, :]] = True
print '### training: NL = ', NL, 'NV = ', NV, ' D = ', D, ' K = ', K, ' batchsize = ', batchsize
##### training
#
eta = 0.5
nepoch = 100
lg = logreg.LogisticRegression( D, K )
for i in range( nepoch ):
# printing error rates etc.
if i % 10 == 0:
mnLLL, erL = errorrate( lg, XL, tL, labelL )
mnLLV, erV = errorrate( lg, XV, tV, labelV )
print i, mnLLL, erL * 100, erV * 100
# training (selecting each batch in random order)
for ib in np.random.permutation( nbatch ):
ii = idxB[ib, :]
lg.train( XL[ii], tL[ii], eta )
i = nepoch
mnLLL, erL = errorrate( lg, XL, tL, labelL )
mnLLV, erV = errorrate( lg, XV, tV, labelV )
print i, mnLLL, erL * 100, erV * 100
##### setting the test data
#
XT, labelT, tT = gendat( 'T' )
NT, D = XT.shape
print '# NT = ', NT
mnLLT, erT = errorrate( lg, XT, tT, labelT )
print i, mnLLT, erL * 100, erV * 100, erT * 100
import numpy as np
import scipy as sp
import mnist0117 as mnist
def gendat( LT ):
mn = mnist.MNIST( LT )
label = mn.getLabel()
N = label.shape[0]
K = 10
X = mn.getImage().reshape( ( N, -1 ) ) / 255 # => in [0,1]
t = np.zeros( ( N, K ), dtype = bool )
for ik in range( K ):
t[label == ik, ik] = True
return X, label, t
def errorrate( lg, X, t, label ):
Y, Z = lg.output( X )
mnLL = np.mean( lg.negLL( Z, t ) )
er = np.mean( label != np.argmax( Z, axis = 1 ) )
return mnLL, er
if __name__ == "__main__":
import logreg_theano0128v4 as logreg
np.random.seed( 0 )
##### setting the training data & the validation data
#
X, label, t = gendat( 'L' )
XL, labelL, tL = X[:50000], label[:50000], t[:50000]
XV, labelV, tV = X[50000:], label[50000:], t[50000:]
NL, D = XL.shape
NV, D = XV.shape
K = t.shape[1]
##### mini batch indicies for stochastic gradient ascent
#
idx = np.random.permutation( NL )
batchsize = 1000
nbatch = NL / batchsize
assert( NL % batchsize == 0 )
idxB = np.zeros( ( nbatch, NL ), dtype = bool )
for ib in range( nbatch ):
idxB[ib, idx.reshape( ( nbatch, batchsize ))[ib, :]] = True
print '### training: NL = ', NL, 'NV = ', NV, ' D = ', D, ' K = ', K, ' batchsize = ', batchsize
##### training
#
eta = 0.5
mu = 0.8
nepoch = 100
lg = logreg.LogisticRegression( D, K )
for i in range( nepoch ):
# printing error rates etc.
if i % 10 == 0:
mnLLL, erL = errorrate( lg, XL, tL, labelL )
mnLLV, erV = errorrate( lg, XV, tV, labelV )
print i, mnLLL, erL * 100, erV * 100
# training (selecting each batch in random order)
for ib in np.random.permutation( nbatch ):
ii = idxB[ib, :]
lg.train( XL[ii], tL[ii], eta, mu )
i = nepoch
mnLLL, erL = errorrate( lg, XL, tL, labelL )
mnLLV, erV = errorrate( lg, XV, tV, labelV )
print i, mnLLL, erL * 100, erV * 100
##### setting the test data
#
XT, labelT, tT = gendat( 'T' )
NT, D = XT.shape
print '# NT = ', NT
mnLLT, erT = errorrate( lg, XT, tT, labelT )
print i, mnLLT, erL * 100, erV * 100, erT * 100
import theano
import theano.tensor as T
### Softmax Units
#
def T_softmax( W, b, X ):
Y = T.dot( X, W.T ) + b # N x K
Z = T.nnet.softmax( Y )
return Y, Z
def Tfunc_softmax():
W = T.dmatrix( 'W' ) # K x D
b = T.dvector( 'b' ) # K-dim
X = T.dmatrix( 'X' ) # N x D
Y, Z = T_softmax( W, b, X )
return theano.function( [ W, b, X ], [ Y, Z ] )
### negative log-likelihood
#
def T_negLL( Z, t ):
return T.nnet.categorical_crossentropy( Z, t )
def Tfunc_negLL():
Z = T.dmatrix( 'Z' ) # N x K
t = T.dmatrix( 't' ) # N x K
LL = T_negLL( Z, t )
return theano.function( [ Z, t ], LL )
### gradient
#
def Tfunc_grad():
W = T.dmatrix( 'W' ) # K x D
b = T.dvector( 'b' ) # K-dim
X = T.dmatrix( 'X' ) # N x D
t = T.dmatrix( 't' ) # N x K
Y, Z = T_softmax( W, b, X )
cost = T.mean( T_negLL( Z, t ) )
dW, db = T.grad( cost, [ W, b ] )
return theano.function( [ W, b, X, t ], [ dW, db ] )
import numpy as np
import theano
import theano.tensor as T
class LogisticRegression():
def __init__( self, D, K ):
self.W = 0.1 * ( np.random.random_sample( ( K, D ) ) - 0.5 )
self.b = 0.1 * ( np.random.random_sample( K ) - 0.5 )
# compiling theano functions
self.Tfunc_softmax = _Tfunc_softmax()
self.Tfunc_negLL = _Tfunc_negLL()
self.Tfunc_grad = _Tfunc_grad()
def output( self, X ):
return self.Tfunc_softmax( self.W, self.b, X )
def negLL( self, Z, t ):
return self.Tfunc_negLL( Z, t )
def train( self, X, t, eta ):
dW, db = self.Tfunc_grad( self.W, self.b, X, t )
self.W -= eta * dW
self.b -= eta * db
### Softmax Units
#
def _T_softmax( W, b, X ):
Y = T.dot( X, W.T ) + b # N x K
Z = T.nnet.softmax( Y )
return Y, Z
def _Tfunc_softmax():
W = T.dmatrix( 'W' ) # K x D
b = T.dvector( 'b' ) # K-dim
X = T.dmatrix( 'X' ) # N x D
Y, Z = _T_softmax( W, b, X )
return theano.function( [ W, b, X ], [ Y, Z ] )
### negative log-likelihood
#
def _T_negLL( Z, t ):
return T.nnet.categorical_crossentropy( Z, t )
def _Tfunc_negLL():
Z = T.dmatrix( 'Z' ) # N x K
t = T.dmatrix( 't' ) # N x K
LL = _T_negLL( Z, t )
return theano.function( [ Z, t ], LL )
### gradient
#
def _Tfunc_grad():
W = T.dmatrix( 'W' ) # K x D
b = T.dvector( 'b' ) # K-dim
X = T.dmatrix( 'X' ) # N x D
t = T.dmatrix( 't' ) # N x K
Y, Z = _T_softmax( W, b, X )
cost = T.mean( _T_negLL( Z, t ) )
dW, db = T.grad( cost, [ W, b ] )
return theano.function( [ W, b, X, t ], [ dW, db ] )
import numpy as np
import theano
import theano.tensor as T
class LogisticRegression():
def __init__( self, D, K ):
# shared variables
self.W = theano.shared( 0.1 * ( np.random.random_sample( ( K, D ) ) - 0.5 ) )
self.b = theano.shared( 0.1 * ( np.random.random_sample( K ) ) - 0.5 )
# compiling theano functions
self.Tfunc_softmax = _Tfunc_softmax( self.W, self.b )
self.Tfunc_negLL = _Tfunc_negLL()
self.Tfunc_train = _Tfunc_train( self.W, self.b )
def output( self, X ):
return self.Tfunc_softmax( X )
def negLL( self, Z, t ):
return self.Tfunc_negLL( Z, t )
def train( self, X, t, eta ):
return self.Tfunc_train( X, t, eta )
### Softmax Units
#
def _T_softmax( W, b, X ):
Y = T.dot( X, W.T ) + b # N x K
Z = T.nnet.softmax( Y )
return Y, Z
def _Tfunc_softmax( W_shared, b_shared ):
W = W_shared
b = b_shared
X = T.dmatrix( 'X' ) # N x D
Y, Z = _T_softmax( W, b, X )
return theano.function( [ X ], [ Y, Z ] )
### negative log-likelihood
#
def _T_negLL( Z, t ):
return T.nnet.categorical_crossentropy( Z, t )
def _Tfunc_negLL():
Z = T.dmatrix( 'Z' ) # N x K
t = T.dmatrix( 't' ) # N x K
LL = _T_negLL( Z, t )
return theano.function( [ Z, t ], LL )
### train
#
def _Tfunc_train( W_shared, b_shared ):
W = W_shared
b = b_shared
X = T.dmatrix( 'X' ) # N x D
t = T.dmatrix( 't' ) # N x K
eta = T.dscalar( 'eta' )
Y, Z = _T_softmax( W, b, X )
cost = T.mean( _T_negLL( Z, t ) )
dW, db = T.grad( cost, [ W, b ] )
return theano.function( [ X, t, eta ], cost,
updates = [ ( W, W - eta * dW ), ( b, b - eta * db ) ] )
import numpy as np
import theano
import theano.tensor as T
class LogisticRegression():
def __init__( self, D, K ):
# shared variables
self.W = theano.shared( 0.1 * ( np.random.random_sample( ( K, D ) ) - 0.5 ) )
self.b = theano.shared( 0.1 * ( np.random.random_sample( K ) ) - 0.5 )
self.dW = theano.shared( np.zeros( ( K, D ) ) )
self.db = theano.shared( np.zeros( K ) )
# compiling theano functions
self.Tfunc_softmax = _Tfunc_softmax( self.W, self.b )
self.Tfunc_negLL = _Tfunc_negLL()
self.Tfunc_train = _Tfunc_train( self.W, self.b, self.dW, self.db )
def output( self, X ):
return self.Tfunc_softmax( X )
def negLL( self, Z, t ):
return self.Tfunc_negLL( Z, t )
def train( self, X, t, eta, mu ):
return self.Tfunc_train( X, t, eta, mu )
### Softmax Units
#
def _T_softmax( W, b, X ):
Y = T.dot( X, W.T ) + b # N x K
Z = T.nnet.softmax( Y )
return Y, Z
def _Tfunc_softmax( W_shared, b_shared ):
W = W_shared
b = b_shared
X = T.dmatrix( 'X' ) # N x D
Y, Z = _T_softmax( W, b, X )
return theano.function( [ X ], [ Y, Z ] )
### negative log-likelihood
#
def _T_negLL( Z, t ):
return T.nnet.categorical_crossentropy( Z, t )
def _Tfunc_negLL():
Z = T.dmatrix( 'Z' ) # N x K
t = T.dmatrix( 't' ) # N x K
LL = _T_negLL( Z, t )
return theano.function( [ Z, t ], LL )
### train
#
def _Tfunc_train( W_shared, b_shared, dW_shared, db_shared ):
W, b = W_shared, b_shared
dW, db = dW_shared, db_shared
X = T.dmatrix( 'X' ) # N x D
t = T.dmatrix( 't' ) # N x K
eta = T.dscalar( 'eta' )
mu = T.dscalar( 'mu' )
Y, Z = _T_softmax( W, b, X )
cost = T.mean( _T_negLL( Z, t ) )
gradW, gradb = T.grad( cost, [ W, b ] )
dW_new = -eta * gradW + mu * dW
db_new = -eta * gradb + mu * db
W_new = W + dW_new
b_new = b + db_new
updatesList = [ ( W, W_new ), ( b, b_new ), ( dW, dW_new ), ( db, db_new ) ]
return theano.function( [ X, t, eta, mu ], cost, updates = updatesList )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.