Skip to content

Instantly share code, notes, and snippets.

@takatakamanbou
Last active August 17, 2017 08:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save takatakamanbou/10df57ac520f0d2c75988442418b864c to your computer and use it in GitHub Desktop.
Save takatakamanbou/10df57ac520f0d2c75988442418b864c to your computer and use it in GitHub Desktop.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import tensorflow as tf
import tensorflow.contrib as tfc
### definition of the CNN
#
class CNN(object):
def __init__(self, Xshape, K):
### conv-pool-conv-pool-fc-softmax
#
params = {
'conv1':{'filters':32, 'kernel_size':[5, 5], 'strides':[2, 2],
'padding':'same', 'activation':tf.nn.relu, 'use_bias':True,
'kernel_initializer':tfc.layers.xavier_initializer_conv2d(),
'bias_initializer':tf.zeros_initializer()},
'pool1':{'pool_size':[2, 2], 'strides':[2, 2], 'padding':'same'},
'conv2':{'filters':64, 'kernel_size':[5, 5], 'strides':[2, 2],
'padding':'same', 'activation':tf.nn.relu, 'use_bias':True,
'kernel_initializer':tfc.layers.xavier_initializer_conv2d(),
'bias_initializer':tf.zeros_initializer()},
'pool2':{'pool_size':[2, 2], 'strides':[2, 2], 'padding':'same'},
'fc':{'units':1024, 'activation':tf.nn.relu, 'use_bias':True,
'kernel_initializer':tfc.layers.xavier_initializer(),
'bias_initializer':tf.zeros_initializer()},
'logit':{'units':K, 'activation':None, 'use_bias':True,
'kernel_initializer':tfc.layers.xavier_initializer(),
'bias_initializer':tf.zeros_initializer()},
}
self.params = params
self.inputs = tf.placeholder(tf.float32, shape = [None] + Xshape)
self.conv1 = tf.layers.conv2d(inputs = self.inputs, **params['conv1'])
self.pool1 = tf.layers.max_pooling2d(inputs = self.conv1, **params['pool1'])
self.conv2 = tf.layers.conv2d(inputs = self.pool1, **params['conv2'])
self.pool2 = tf.layers.max_pooling2d(inputs = self.conv2, **params['pool2'])
pool2_flat = tfc.layers.flatten(self.pool2)
self.fc = tf.layers.dense(inputs = pool2_flat, **params['fc'])
self.logit = tf.layers.dense(inputs = self.fc, **params['logit'])
self.outputs = tf.nn.softmax(self.logit)
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import tensorflow as tf
import numpy as np
useCNN = True
import mnist
if useCNN:
import cnn170415 as cnn
else:
import mlp170415 as mlp
import nnlearner170415 as nnlearner
### mini batch indicies for stochastic gradient ascent
#
def makeBatchIndex(N, batchsize):
idx = np.random.permutation(N)
nbatch = int(np.ceil(float(N) / batchsize))
idxB = np.zeros(( nbatch, N ), dtype = bool)
for ib in range(nbatch - 1):
idxB[ib, idx[ib*batchsize:(ib+1)*batchsize]] = True
ib = nbatch - 1
idxB[ib, idx[ib*batchsize:]] = True
return idxB
if __name__ == '__main__':
### reading and preparing the training data
#
mn = mnist.MNIST(pathMNIST = '.')
X = mn.getImage('L') / 255.0
lab = mn.getLabel('L')
D = X.shape[1]
K = mn.nclass
xm = np.mean(X, axis = 0)
X -= xm
if useCNN:
Xshape = [28, 28, 1]
X = X.reshape(tuple([-1] + Xshape))
XL, labL = X[:50000], lab[:50000]
XV, labV = X[50000:], lab[50000:]
NL = XL.shape[0]
NV = XV.shape[0]
### initializing the network
#
if useCNN:
nncfg = cnn.CNN(Xshape, K)
else:
#nncfg = mlp.LogisticRegression(D, K)
#nncfg = mlp.MLP2(D, K)
nncfg = mlp.MLP3(D, K)
eta = 0.1
mu = 0.9
optimizer = tf.train.MomentumOptimizer(eta, mu)
nn = nnlearner.NNLearner(nncfg, optimizer)
print('# nncfg:', nncfg.__class__)
print('# nn:', nn.__class__)
### training
#
batchsize = 100
idxB = makeBatchIndex(NL, batchsize)
nbatch = idxB.shape[0]
print('# ceL accL ceV accV ceT accT')
nitr = 20
for i in range(nitr):
if i <= 10 or i % 10 == 0:
ceL, accL = nn.test(XL, labL)
ceV, accV = nn.test(XV, labV)
print('%d %f %.2f %f %.2f' % (i, ceL, (1.0 - accL)*100, ceV, (1.0 - accV)*100))
for ib in np.random.permutation(nbatch):
ii = idxB[ib, :]
nn.train(XL[ii], labL[ii])
### test
#
XT = mn.getImage('T') / 255.0
labT = mn.getLabel('T')
NT = XT.shape[0]
XT -= xm
if useCNN:
XT = XT.reshape(tuple([-1] + Xshape))
ceL, accL = nn.test(XL, labL)
ceV, accV = nn.test(XV, labV)
ceT, accT = nn.test(XT, labT)
print('%d %f %.2f %f %.2f %f %.2f' % (nitr, ceL, (1.0 - accL)*100, ceV, (1.0 - accV)*100, ceT, (1.0 - accT)*100))
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import tensorflow as tf
import numpy as np
useCNN = True
import mnist
if useCNN:
import cnn170415 as cnn
else:
import mlp170415 as mlp
import nnclassifier170729 as nnclassifier
### mini batch indicies for stochastic gradient ascent
#
def makeBatchIndex(N, batchsize):
idx = np.random.permutation(N)
nbatch = int(np.ceil(float(N) / batchsize))
idxB = np.zeros(( nbatch, N ), dtype = bool)
for ib in range(nbatch - 1):
idxB[ib, idx[ib*batchsize:(ib+1)*batchsize]] = True
ib = nbatch - 1
idxB[ib, idx[ib*batchsize:]] = True
return idxB
if __name__ == '__main__':
### reading and preparing the training data
#
mn = mnist.MNIST(pathMNIST = '.')
X = mn.getImage('L') / 255.0
lab = mn.getLabel('L')
D = X.shape[1]
K = mn.nclass
xm = np.mean(X, axis = 0)
X -= xm
if useCNN:
Xshape = [28, 28, 1]
X = X.reshape(tuple([-1] + Xshape))
XL, labL = X[:50000], lab[:50000]
XV, labV = X[50000:], lab[50000:]
NL = XL.shape[0]
NV = XV.shape[0]
### initializing the network
#
if useCNN:
nncfg = cnn.CNN(Xshape, K)
else:
#nncfg = mlp.LogisticRegression(D, K)
#nncfg = mlp.MLP2(D, K)
nncfg = mlp.MLP3(D, K)
eta = 0.1
mu = 0.9
optimizer = tf.train.MomentumOptimizer(eta, mu)
nn = nnclassifier.NNClassifier(nncfg, optimizer)
nn.init()
print('# nncfg:', nncfg.__class__)
print('# nn:', nn.__class__)
### training
#
batchsize = 100
idxB = makeBatchIndex(NL, batchsize)
nbatch = idxB.shape[0]
print('# ceL accL ceV accV ceT accT')
nitr = 20
for i in range(nitr):
if i <= 10 or i % 10 == 0:
ceL, accL = nn.test(XL, labL)
ceV, accV = nn.test(XV, labV)
print('%d %f %.2f %f %.2f' % (i, ceL, (1.0 - accL)*100, ceV, (1.0 - accV)*100))
for ib in np.random.permutation(nbatch):
ii = idxB[ib, :]
nn.train(XL[ii], labL[ii])
nn.save('hoge')
### test
#
XT = mn.getImage('T') / 255.0
labT = mn.getLabel('T')
NT = XT.shape[0]
XT -= xm
if useCNN:
XT = XT.reshape(tuple([-1] + Xshape))
ceL, accL = nn.test(XL, labL)
ceV, accV = nn.test(XV, labV)
ceT, accT = nn.test(XT, labT)
print('%d %f %.2f %f %.2f %f %.2f' % (nitr, ceL, (1.0 - accL)*100, ceV, (1.0 - accV)*100, ceT, (1.0 - accT)*100))
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import tensorflow as tf
import numpy as np
useCNN = True
import mnist
if useCNN:
import cnn170415 as cnn
else:
import mlp170415 as mlp
import nnclassifier170729 as nnclassifier
### mini batch indicies for stochastic gradient ascent
#
def makeBatchIndex(N, batchsize):
idx = np.random.permutation(N)
nbatch = int(np.ceil(float(N) / batchsize))
idxB = np.zeros(( nbatch, N ), dtype = bool)
for ib in range(nbatch - 1):
idxB[ib, idx[ib*batchsize:(ib+1)*batchsize]] = True
ib = nbatch - 1
idxB[ib, idx[ib*batchsize:]] = True
return idxB
if __name__ == '__main__':
### reading and preparing the training data
#
mn = mnist.MNIST(pathMNIST = '.')
X = mn.getImage('L') / 255.0
lab = mn.getLabel('L')
D = X.shape[1]
K = mn.nclass
xm = np.mean(X, axis = 0)
X -= xm
if useCNN:
Xshape = [28, 28, 1]
X = X.reshape(tuple([-1] + Xshape))
XL, labL = X[:50000], lab[:50000]
XV, labV = X[50000:], lab[50000:]
NL = XL.shape[0]
NV = XV.shape[0]
### initializing the network
#
if useCNN:
nncfg = cnn.CNN(Xshape, K)
else:
#nncfg = mlp.LogisticRegression(D, K)
#nncfg = mlp.MLP2(D, K)
nncfg = mlp.MLP3(D, K)
nn = nnclassifier.NNClassifier(nncfg)
nn.restore('hoge')
print('# nncfg:', nncfg.__class__)
print('# nn:', nn.__class__)
### test
#
XT = mn.getImage('T') / 255.0
labT = mn.getLabel('T')
NT = XT.shape[0]
XT -= xm
if useCNN:
XT = XT.reshape(tuple([-1] + Xshape))
ceL, accL = nn.test(XL, labL)
ceV, accV = nn.test(XV, labV)
ceT, accT = nn.test(XT, labT)
print('%f %.2f %f %.2f %f %.2f' % (ceL, (1.0 - accL)*100, ceV, (1.0 - accV)*100, ceT, (1.0 - accT)*100))
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import tensorflow as tf
import tensorflow.contrib as tfc
class MLP(object):
def __init__(self, Xdim, K):
self.params = {
'fc1':{'units':1024, 'activation':tf.nn.relu, 'use_bias':True,
'kernel_initializer':tfc.layers.xavier_initializer(),
'bias_initializer':tf.zeros_initializer()},
'fc2':{'units':1024, 'activation':tf.nn.relu, 'use_bias':True,
'kernel_initializer':tfc.layers.xavier_initializer(),
'bias_initializer':tf.zeros_initializer()},
'logit':{'units':K, 'activation':None, 'use_bias':True,
'kernel_initializer':tfc.layers.xavier_initializer(),
'bias_initializer':tf.zeros_initializer()},
}
### definition of logistic regression network
#
class LogisticRegression(MLP):
def __init__(self, Xdim, K):
super().__init__(Xdim, K)
params = self.params
self.inputs = tf.placeholder(tf.float32, shape = [None, Xdim])
self.logit = tf.layers.dense(inputs = self.inputs, **params['logit'])
self.outputs = tf.nn.softmax(self.logit)
### definition of MLP with one hidden layer
#
class MLP2(MLP):
def __init__(self, Xdim, K):
super().__init__(Xdim, K)
params = self.params
self.inputs = tf.placeholder(tf.float32, shape = [None, Xdim])
self.fc1 = tf.layers.dense(inputs = self.inputs, **params['fc1'])
self.logit = tf.layers.dense(inputs = self.fc1, **params['logit'])
self.outputs = tf.nn.softmax(self.logit)
### definition of MLP with one hidden layer
#
class MLP3(MLP):
def __init__(self, Xdim, K):
super().__init__(Xdim, K)
params = self.params
self.inputs = tf.placeholder(tf.float32, shape = [None, Xdim])
self.fc1 = tf.layers.dense(inputs = self.inputs, **params['fc1'])
self.fc2 = tf.layers.dense(inputs = self.fc1, **params['fc1'])
self.logit = tf.layers.dense(inputs = self.fc2, **params['logit'])
self.outputs = tf.nn.softmax(self.logit)
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import tensorflow as tf
import tensorflow.contrib as tfc
class NNClassifier():
def __init__(self, netcfg, optimizer = None):
### definition for output computation
#
self.X = netcfg.inputs
self.Y = netcfg.logit
self.Z = netcfg.outputs
self.cg_output = (self.Y, self.Z)
### definition for cost & accuracy
#
self.label = tf.placeholder(tf.int64, shape = [None])
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels = self.label, logits = self.Y)
cost = tf.reduce_mean(cross_entropy)
correct_prediction = tf.equal(tf.argmax(self.Y, 1), self.label)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
self.cg_test = (cost, accuracy)
### definition for training
#
self.optimizer = optimizer
if optimizer != None:
self.cg_train = self.optimizer.minimize(cost)
### definition for parameter initialization
#
self.cg_init = tf.global_variables_initializer()
### starting the session
#
self.sess = tf.InteractiveSession()
def init(self):
rv = self.sess.run(self.cg_init)
return rv
def output(self, X):
d = {self.X: X}
rv = self.sess.run(self.cg_output, feed_dict = d)
return rv
def train(self, X, lab):
d = {self.X: X, self.label: lab}
rv = self.sess.run(self.cg_train, feed_dict = d)
return rv
def test(self, X, lab):
d = {self.X: X, self.label: lab}
rv = self.sess.run(self.cg_test, feed_dict = d)
return rv
def save(self, path):
saver = tf.train.Saver()
saver.save(self.sess, path)
def restore(self, path):
saver = tf.train.Saver()
saver.restore(self.sess, path)
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import tensorflow as tf
import tensorflow.contrib as tfc
class NNLearner():
def __init__(self, netcfg, optimizer):
### definition for output computation
#
self.X = netcfg.inputs
self.Y = netcfg.logit
self.Z = netcfg.outputs
self.cg_output = (self.Y, self.Z)
### definition for cost & accuracy
#
self.label = tf.placeholder(tf.int64, shape = [None])
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels = self.label, logits = self.Y)
cost = tf.reduce_mean(cross_entropy)
correct_prediction = tf.equal(tf.argmax(self.Y, 1), self.label)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
self.cg_test = (cost, accuracy)
### definition of for training
#
self.optimizer = optimizer
self.cg_train = self.optimizer.minimize(cost)
### initialization
#
self.sess = tf.InteractiveSession()
tf.global_variables_initializer().run()
def output(self, X):
d = {self.X: X}
rv = self.sess.run(self.cg_output, feed_dict = d)
return rv
def train(self, X, lab):
d = {self.X: X, self.label: lab}
rv = self.sess.run(self.cg_train, feed_dict = d)
return rv
def test(self, X, lab):
d = {self.X: X, self.label: lab}
rv = self.sess.run(self.cg_test, feed_dict = d)
return rv
In [5]: %time %run ex170415.py
# nncfg: <class 'mlp170415.LogisticRegression'>
# nn: <class 'nnlearner170415.NNLearner'>
# ceL accL ceV accV ceT accT
0 2.390957 93.36 2.387799 92.89
1 0.303419 8.71 0.293427 8.32
2 0.284006 8.01 0.276468 7.79
3 0.274720 7.79 0.272869 7.77
4 0.266706 7.46 0.271088 7.53
5 0.261407 7.46 0.268279 7.67
6 0.258741 7.27 0.265806 7.51
7 0.256548 7.18 0.265217 7.19
8 0.256009 7.19 0.267336 7.38
9 0.251504 6.98 0.265981 7.33
10 0.252759 7.06 0.264854 7.32
20 0.240434 6.64 0.269577 7.44 0.277376 7.66
CPU times: user 25.6 s, sys: 6.32 s, total: 31.9 s
Wall time: 18 s
# nncfg: <class 'mlp170415.MLP2'>
# nn: <class 'nnlearner170415.NNLearner'>
# ceL accL ceV accV ceT accT
0 2.331943 90.57 2.335567 90.88
1 0.091150 2.74 0.114948 3.43
2 0.046451 1.30 0.091733 2.68
3 0.029083 0.80 0.080276 2.28
4 0.016001 0.36 0.071696 2.01
5 0.009606 0.15 0.068113 1.75
6 0.004956 0.05 0.067724 1.73
7 0.003210 0.01 0.064313 1.66
8 0.002276 0.00 0.065525 1.64
9 0.001812 0.00 0.067270 1.70
10 0.001529 0.00 0.066747 1.66
20 0.000611 0.00 0.071750 1.68 0.063062 1.60
CPU times: user 8min 46s, sys: 34.5 s, total: 9min 20s
Wall time: 1min 18s
# nncfg: <class 'mlp170415.MLP3'>
# nn: <class 'nnlearner170415.NNLearner'>
# ceL accL ceV accV ceT accT
0 2.310501 90.04 2.311282 90.08
1 0.069830 2.16 0.092541 2.87
2 0.036888 1.15 0.083483 2.52
3 0.022205 0.67 0.082944 2.46
4 0.022174 0.74 0.102332 2.50
5 0.007903 0.24 0.081572 2.00
6 0.007393 0.23 0.089041 1.98
7 0.009970 0.31 0.087729 1.99
8 0.003919 0.12 0.088191 1.96
9 0.001342 0.03 0.085937 1.80
10 0.000602 0.01 0.079922 1.63
20 0.000047 0.00 0.088163 1.54 0.074268 1.55
CPU times: user 19min 27s, sys: 58.5 s, total: 20min 25s
Wall time: 2min 26s
# nncfg: <class 'cnn170415.CNN'>
# nn: <class 'nnlearner170415.NNLearner'>
# ceL accL ceV accV ceT accT
0 2.304340 92.57 2.304393 93.10
1 0.055175 1.58 0.065980 1.69
2 0.031726 0.97 0.056063 1.63
3 0.018410 0.54 0.050434 1.44
4 0.010353 0.30 0.042201 1.13
5 0.009955 0.29 0.053747 1.34
6 0.009532 0.35 0.064481 1.24
7 0.009785 0.33 0.065854 1.41
8 0.016824 0.58 0.076978 1.51
9 0.004220 0.14 0.059005 1.16
10 0.003715 0.14 0.052485 1.14
20 0.000019 0.00 0.065079 0.96 0.040852 0.73
CPU times: user 20min 20s, sys: 4min 58s, total: 25min 19s
Wall time: 3min 28s
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment