Create a gist now

Instantly share code, notes, and snippets.

Embed
What would you like to do?
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import tensorflow as tf
import tensorflow.contrib as tfc
import numpy as np
class CNN(object):
def __init__(self, Xshape, K, optimizer = None):
self.X = tf.placeholder(tf.float32, shape = [None] + Xshape)
self.Xshape = Xshape
self.params = dict()
### definition of the network
### conv1
#
Wshape = [5, 5, 1, 32]
W = tf.get_variable('conv1/W', shape = Wshape, initializer = tfc.layers.xavier_initializer_conv2d())
Y = tf.nn.conv2d(self.X, W, [1, 2, 2, 1], padding = 'SAME')
b = tf.get_variable('conv1/b', shape = Wshape[-1], initializer = tf.zeros_initializer)
conv1 = tf.nn.relu(Y + b)
self.params['conv1/W'] = W
self.params['conv1/b'] = b
### pool1
#
Wshape = [1, 2, 2, 1]
pool1 = tf.nn.max_pool(conv1, Wshape, [1, 2, 2, 1], padding = 'SAME')
### conv2
#
Wshape = [5, 5, 32, 64]
W = tf.get_variable('conv2/W', shape = Wshape, initializer = tfc.layers.xavier_initializer_conv2d())
Y = tf.nn.conv2d(pool1, W, [1, 2, 2, 1], padding = 'SAME')
b = tf.get_variable('conv2/b', shape = Wshape[-1], initializer = tf.zeros_initializer)
conv2 = tf.nn.relu(Y + b)
self.params['conv2/W'] = W
self.params['conv2/b'] = b
### pool2
#
Wshape = [1, 2, 2, 1]
pool2 = tf.nn.max_pool(conv2, Wshape, [1, 2, 2, 1], padding = 'SAME')
### fc
#
pool2_flat = tfc.layers.flatten(pool2)
Wshape = [pool2_flat.shape[1], 1024]
W = tf.get_variable('fc/W', shape = Wshape, initializer = tfc.layers.xavier_initializer())
b = tf.get_variable('fc/b', shape = Wshape[-1], initializer = tf.zeros_initializer)
fc = tf.nn.relu(tf.matmul(pool2_flat, W) + b)
self.params['fc/W'] = W
self.params['fc/b'] = b
### logit
#
Wshape = [1024, K]
W = tf.get_variable('logit/W', shape = Wshape, initializer = tfc.layers.xavier_initializer())
b = tf.get_variable('logit/b', shape = Wshape[-1], initializer = tf.zeros_initializer)
logit = tf.matmul(fc, W) + b
self.params['logit/W'] = W
self.params['logit/b'] = b
### softmax
#
self.Y = logit
self.Z = tf.nn.softmax(logit)
### definition for output computation
#
self.cg_output = self.Z
### definition for cost
#
self.label = tf.placeholder(tf.int64, shape = [None])
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels = self.label, logits = self.Y)
cost = tf.reduce_mean(cross_entropy)
correct_prediction = tf.equal(tf.argmax(self.Y, 1), self.label)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
#self.cg_test = (cost, accuracy)
self.cg_test = (cross_entropy, correct_prediction)
### definition for training
#
self.optimizer = optimizer
if optimizer != None:
self.cg_train = self.optimizer.minimize(cost)
### definition for parameter initialization
#
self.cg_init = tf.global_variables_initializer()
### starting the session
#
self.sess = tf.InteractiveSession()
def init(self):
rv = self.sess.run(self.cg_init)
return rv
def output(self, X):
d = {self.X: X}
rv = self.sess.run(self.cg_output, feed_dict = d)
return rv
def train(self, X, lab):
d = {self.X: X, self.label: lab}
rv = self.sess.run(self.cg_train, feed_dict = d)
return rv
def test(self, X, lab):
d = {self.X: X, self.label: lab}
rv = self.sess.run(self.cg_test, feed_dict = d)
return rv
def getWeight(self):
return self.sess.run(self.params)
def setWeight(self, vals_dict):
L = []
for k in vals_dict.keys():
L.append(tf.assign(self.params[k], vals_dict[k]))
self.sess.run(L)
if __name__ == '__main__':
with tf.Graph().as_default():
cnn = CNN([28, 28, 1], 10)
cnn.init()
In [5]: %time %run ex170817cnnL.py
# ceL accL ceV accV
0 0 2.30353644867 10.39 2.30287331848 10.94
500 63712 0.0529104816699 98.304 0.0640928661048 98.08
1000 127568 0.0356398470223 98.844 0.0654105557561 98.14
1500 191472 0.0205459920326 99.386 0.0544750001237 98.7
2000 255376 0.0141205127522 99.574 0.0527081821941 98.8
2500 319376 0.011272611057 99.668 0.0542131656239 98.75
3000 383328 0.0118199069849 99.61 0.0593135959094 98.52
3500 447280 0.0135101142739 99.596 0.0672322839323 98.56
4000 511232 0.00633511856047 99.784 0.0694293337396 98.59
4500 575136 0.00304744189318 99.928 0.0528560796064 98.9
5000 639040 0.00146097178215 99.962 0.0548275192713 98.89
5500 702848 0.000773658882029 99.98 0.0585367139032 98.9
6000 766752 0.000948838318876 99.964 0.0618521307983 98.91
6500 830704 0.00160421159322 99.954 0.0627802032718 98.86
7000 894656 0.00151222500126 99.958 0.0758055595694 98.84
7500 958512 0.00518531769998 99.826 0.0722700397006 98.71
8000 1022416 0.00358739747816 99.87 0.0621865498303 98.74
8500 1086416 0.00173747777842 99.942 0.0603051847215 98.87
9000 1150368 0.000801627419269 99.974 0.0712650403303 98.89
9500 1214368 0.00163371050193 99.956 0.0699203371461 98.9
CPU times: user 27min 26s, sys: 5min 1s, total: 32min 28s
Wall time: 4min 14s
In [6]: %time %run ex170817cnnL.py /gpu:0 # GeForce GTX 1080
# ceL accL ceV accV
0 0 2.30373031067 10.176 2.30391277542 10.14
500 64000 0.0722305047369 97.722 0.0771701710686 97.66
1000 127952 0.050542394675 98.454 0.076588276845 97.87
1500 191952 0.0286524880698 99.148 0.0578248742606 98.33
2000 255856 0.021113182172 99.328 0.0630280425681 98.33
2500 319856 0.0216274167825 99.288 0.0653740325063 98.38
3000 383808 0.0138078262459 99.536 0.056337662564 98.58
3500 447616 0.0120193564079 99.638 0.0564322202036 98.67
4000 511616 0.00810228453235 99.752 0.0563732981964 98.78
4500 575568 0.00540681313567 99.814 0.0546471179079 98.91
5000 639568 0.00769762589023 99.74 0.0681068109604 98.56
5500 703472 0.0031355497104 99.886 0.0656713479554 98.89
6000 767424 0.0166499800466 99.478 0.102558242047 98.27
6500 831232 0.00446105945512 99.858 0.0731693506698 98.84
7000 895136 0.00344797703806 99.904 0.0804681016457 98.73
7500 959136 0.00410345322252 99.858 0.0687284505218 98.68
8000 1023088 0.00648627110681 99.808 0.0738402955466 98.84
8500 1087088 0.00132427385638 99.956 0.0721427872962 98.88
9000 1151040 0.00365969986003 99.866 0.0850411816598 98.73
9500 1214944 0.00201407230972 99.932 0.0669418959407 98.97
CPU times: user 1min 42s, sys: 9.94 s, total: 1min 52s
Wall time: 1min 8s
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import tensorflow as tf
import numpy as np
import os
import sys
import getdata170817 as getdata
import cnn170817 as cnn
def evaluate(nn, X, lab, bindex):
nbatch, ndat = bindex.shape
cross_entropy = np.empty(nbatch)
correct_prediction = np.empty(nbatch, dtype = int)
for ib in range(nbatch):
ii = bindex[ib, :]
ce1, cp1 = nn.test(X[ii], lab[ii])
cross_entropy[ib] = np.sum(ce1)
correct_prediction[ib] = np.sum(cp1)
return np.sum(cross_entropy)/ndat, np.sum(correct_prediction)/ndat
if __name__ == '__main__':
dirResult = 'result_ex170817'
if len(sys.argv) == 1:
dev = '/cpu:0'
elif len(sys.argv) == 2:
dev = sys.argv[1]
else:
sys.exit('usage: %s [device]' % sys.argv[0])
### reading and preparing the training data
#
data = getdata.Data('../150117-mnist', nV = 10000)
K = data.nclass
XL, labL = data.getData('L')
XV, labV = data.getData('V')
Xshape = [data.nrow, data.ncol, 1]
XL = XL.reshape([-1] + Xshape)
XV = XV.reshape([-1] + Xshape)
NL = XL.shape[0]
NV = XV.shape[0]
### initializing the network
#
eta = 0.1
mu = 0.9
optimizer = tf.train.MomentumOptimizer(eta, mu)
#optimizer = tf.train.AdamOptimizer(learning_rate = 1e-4)
g = tf.Graph()
with g.as_default():
with g.device(dev):
nn = cnn.CNN(Xshape, K, optimizer = optimizer)
nn.init()
### training
#
batchsize = 128
bindexL = getdata.makeBatchIndex(NL, batchsize)
nbatchL = bindexL.shape[0]
bindexV = getdata.makeBatchIndex(NV, batchsize)
print('# ceL accL ceV accV')
nitr = 10000
nd = 0
for i in range(nitr):
if (i < 500 and i % 100 == 0) or (i % 500 == 0):
ceL, accL = evaluate(nn, XL, labL, bindexL)
ceV, accV = evaluate(nn, XV, labV, bindexV)
print(i, nd, ceL, accL*100, ceV, accV*100)
ib = np.random.randint(0, nbatchL)
ii = bindexL[ib, :]
nn.train(XL[ii], labL[ii])
nd += XL[ii].shape[0]
fnParams = os.path.join(dirResult, os.path.splitext(sys.argv[0])[0] + '-params.npz')
params_dict = nn.getWeight()
np.savez_compressed(fnParams, **params_dict)
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import tensorflow as tf
import numpy as np
import os
import sys
import getdata170817 as getdata
import cnn170817 as cnn
import ex170817cnnL
if __name__ == '__main__':
dirResult = 'result_ex170817'
### reading and preparing the data
#
data = getdata.Data('../150117-mnist', nV = 10000)
Xshape = [data.nrow, data.ncol, 1]
K = data.nclass
XL, labL = data.getData('L')
XV, labV = data.getData('V')
XT, labT = data.getData('T')
XL = XL.reshape([-1] + Xshape)
XV = XV.reshape([-1] + Xshape)
XT = XT.reshape([-1] + Xshape)
NL = XL.shape[0]
NV = XV.shape[0]
NT = XT.shape[0]
### initializing the network
#
with tf.Graph().as_default():
nn = cnn.CNN(Xshape, K)
fnParams = 'result_ex170817/ex170817cnnL-params.npz'
with np.load(fnParams) as hoge:
nn.setWeight(hoge)
### test
#
batchsize = 128
bindexL = getdata.makeBatchIndex(NL, batchsize)
bindexV = getdata.makeBatchIndex(NV, batchsize)
bindexT = getdata.makeBatchIndex(NT, batchsize)
ceL, accL = ex170817cnnL.evaluate(nn, XL, labL, bindexL)
ceV, accV = ex170817cnnL.evaluate(nn, XV, labV, bindexV)
ceT, accT = ex170817cnnL.evaluate(nn, XT, labT, bindexT)
print('# ceL accL ceV accV ceT accT')
print(ceL, accL*100, ceV, accV*100, ceT, accT*100)
In [3]: %time %run ex170817mlpL.py
# ceL accL ceV accV
0 0 2.3555366156 6.496 2.35637270432 6.45
500 63808 0.092499746201 97.128 0.107396060586 96.74
1000 127712 0.0474980571806 98.54 0.0857262135327 97.53
1500 191712 0.0336637791467 98.948 0.081242669785 97.71
2000 255664 0.020895914976 99.326 0.0812034955919 97.72
2500 319568 0.0218699501713 99.306 0.0936575091742 97.83
3000 383520 0.0119512152641 99.6 0.0822023220219 97.93
3500 447376 0.00651968133517 99.814 0.0794938009925 98.12
4000 511376 0.00644965353139 99.818 0.0850548474327 98.15
4500 575280 0.00783101631869 99.73 0.0931775015838 98.03
5000 639184 0.00184893320624 99.956 0.0824613762986 98.27
5500 703136 0.000875626418707 99.978 0.0844872379154 98.36
6000 767136 0.000925099569694 99.978 0.0881945440144 98.34
6500 831088 0.000187862729854 100.0 0.081537760105 98.5
7000 894944 0.000126250376148 100.0 0.0831974908057 98.49
7500 958752 9.08676589659e-05 100.0 0.0836290217037 98.48
8000 1022752 7.74996343546e-05 100.0 0.0851261680826 98.53
8500 1086656 6.8074029616e-05 100.0 0.0858370101379 98.51
9000 1150656 6.13513418444e-05 100.0 0.0866725751117 98.51
9500 1214608 5.54295608489e-05 100.0 0.0867583423773 98.49
CPU times: user 25min 33s, sys: 45 s, total: 26min 18s
Wall time: 3min 2s
In [4]: %time %run ex170817mlpL.py /gpu:0 # GeForce GTX 1080
# ceL accL ceV accV
0 0 2.33889436676 9.486 2.33994577332 9.08
500 63904 0.079622519722 97.646 0.101889857554 96.93
1000 127856 0.0444144365889 98.674 0.0856933221579 97.44
1500 191808 0.0248322945541 99.276 0.0734001988351 97.93
2000 255808 0.0281905970865 99.082 0.0885640421927 97.68
2500 319760 0.0199096138212 99.364 0.0922179355264 97.75
3000 383712 0.0121254081642 99.632 0.0845830722094 97.97
3500 447664 0.0050818561705 99.84 0.0805605934888 98.17
4000 511664 0.00352444288196 99.892 0.080305925148 98.22
4500 575664 0.0045654893967 99.868 0.0812806605175 98.11
5000 639568 0.000961035514828 99.978 0.0796218807612 98.3
5500 703424 0.0032331705864 99.906 0.0886421674587 98.08
6000 767424 0.000682797787583 99.982 0.0828221257709 98.39
6500 831328 0.000751670261975 99.984 0.082546234253 98.42
7000 895232 0.000205983215098 99.996 0.0809072538737 98.41
7500 959232 0.000106152159986 100.0 0.0818330566301 98.43
8000 1023232 8.4684333175e-05 100.0 0.0838491139621 98.42
8500 1087136 9.03705945995e-05 99.998 0.0850743832564 98.43
9000 1151088 6.04695013876e-05 100.0 0.085774672149 98.41
9500 1215040 5.42799999355e-05 100.0 0.0863653538564 98.43
CPU times: user 1min 10s, sys: 6.76 s, total: 1min 17s
Wall time: 47.8 s
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import tensorflow as tf
import numpy as np
import os
import sys
import getdata170817 as getdata
import mlp170817 as mlp
def evaluate(nn, X, lab, bindex):
nbatch, ndat = bindex.shape
cross_entropy = np.empty(nbatch)
correct_prediction = np.empty(nbatch, dtype = int)
for ib in range(nbatch):
ii = bindex[ib, :]
ce1, cp1 = nn.test(X[ii], lab[ii])
cross_entropy[ib] = np.sum(ce1)
correct_prediction[ib] = np.sum(cp1)
return np.sum(cross_entropy)/ndat, np.sum(correct_prediction)/ndat
if __name__ == '__main__':
dirResult = 'result_ex170817'
if len(sys.argv) == 1:
dev = '/cpu:0'
elif len(sys.argv) == 2:
dev = sys.argv[1]
else:
sys.exit('usage: %s [device]' % sys.argv[0])
### reading and preparing the training data
#
data = getdata.Data('../150117-mnist', nV = 10000)
D = data.nrow * data.ncol
K = data.nclass
XL, labL = data.getData('L')
XV, labV = data.getData('V')
NL = XL.shape[0]
NV = XV.shape[0]
### initializing the network
#
eta = 0.1
mu = 0.9
optimizer = tf.train.MomentumOptimizer(eta, mu)
#optimizer = tf.train.AdamOptimizer(learning_rate = 1e-4)
g = tf.Graph()
with g.as_default():
with g.device(dev):
nn = mlp.MLP(D, K, optimizer = optimizer)
nn.init()
### training
#
batchsize = 128
bindexL = getdata.makeBatchIndex(NL, batchsize)
nbatchL = bindexL.shape[0]
bindexV = getdata.makeBatchIndex(NV, batchsize)
print('# ceL accL ceV accV')
nitr = 10000
nd = 0
for i in range(nitr):
if (i < 500 and i % 100 == 0) or (i % 500 == 0):
ceL, accL = evaluate(nn, XL, labL, bindexL)
ceV, accV = evaluate(nn, XV, labV, bindexV)
print(i, nd, ceL, accL*100, ceV, accV*100)
ib = np.random.randint(0, nbatchL)
ii = bindexL[ib, :]
nn.train(XL[ii], labL[ii])
nd += XL[ii].shape[0]
fnParams = os.path.join(dirResult, os.path.splitext(sys.argv[0])[0] + '-params.npz')
params_dict = nn.getWeight()
np.savez_compressed(fnParams, **params_dict)
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import tensorflow as tf
import numpy as np
import os
import sys
import getdata170817 as getdata
import mlp170817 as mlp
import ex170817mlpL
if __name__ == '__main__':
dirResult = 'result_ex170817'
### reading and preparing the data
#
data = getdata.Data('../150117-mnist', nV = 10000)
D = data.nrow * data.ncol
K = data.nclass
XL, labL = data.getData('L')
XV, labV = data.getData('V')
XT, labT = data.getData('T')
NL = XL.shape[0]
NV = XV.shape[0]
NT = XT.shape[0]
### initializing the network
#
with tf.Graph().as_default():
nn = mlp.MLP(D, K)
fnParams = 'result_ex170817/ex170817mlpL-params.npz'
with np.load(fnParams) as hoge:
nn.setWeight(hoge)
### test
#
batchsize = 128
bindexL = getdata.makeBatchIndex(NL, batchsize)
bindexV = getdata.makeBatchIndex(NV, batchsize)
bindexT = getdata.makeBatchIndex(NT, batchsize)
ceL, accL = ex170817mlpL.evaluate(nn, XL, labL, bindexL)
ceV, accV = ex170817mlpL.evaluate(nn, XV, labV, bindexV)
ceT, accT = ex170817mlpL.evaluate(nn, XT, labT, bindexT)
print('# ceL accL ceV accV ceT accT')
print(ceL, accL*100, ceV, accV*100, ceT, accT*100)
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import numpy as np
import mnist
class Data(object):
def __init__(self, pathMNIST, nV = 0):
self.mnist = mnist.MNIST(pathMNIST = pathMNIST)
self.nrow = self.mnist.nrow
self.ncol = self.mnist.ncol
self.nclass = self.mnist.nclass
self.nV = nV
#self._getLV()
#self._getT()
def _getLV(self):
X = self.mnist.getImage('L') / 255
lab = self.mnist.getLabel('L')
self.nL = X.shape[0] - self.nV
self.XL, self.labL = X[:self.nL], lab[:self.nL]
self.XV, self.labV = X[self.nL:], lab[self.nL:]
def _getT(self):
X = self.mnist.getImage('T') / 255
lab = self.mnist.getLabel('T')
self.XT, self.labT = X, lab
self.nT = X.shape[0]
def getData(self, LVT):
if LVT == 'L':
if not (hasattr(self, 'XL') and hasattr(self, 'labL')):
self._getLV()
return self.XL, self.labL
elif LVT == 'V':
if not (hasattr(self, 'XV') and hasattr(self, 'labV')):
self._getLV()
return self.XV, self.labV
else:
if not (hasattr(self, 'XT') and hasattr(self, 'labT')):
self._getT()
return self.XT, self.labT
### mini batch indicies for stochastic gradient ascent
#
def makeBatchIndex(N, batchsize):
idx = np.random.permutation(N)
nbatch = int(np.ceil(float(N) / batchsize))
idxB = np.zeros(( nbatch, N ), dtype = bool)
for ib in range(nbatch - 1):
idxB[ib, idx[ib*batchsize:(ib+1)*batchsize]] = True
ib = nbatch - 1
idxB[ib, idx[ib*batchsize:]] = True
return idxB
if __name__ == '__main__':
data = Data('../150117-mnist', nV = 10000)
XL, labL = data.getData('L')
XV, labV = data.getData('V')
XT, labT = data.getData('T')
print(XL.shape, XV.shape, XT.shape)
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import tensorflow as tf
import tensorflow.contrib as tfc
import numpy as np
class MLP(object):
def __init__(self, Xdim, K, optimizer = None):
self.X = tf.placeholder(tf.float32, shape = [None, Xdim])
self.Xdim = Xdim
self.params = dict()
### definition of the network
#
Nfc1 = 1024
Nfc2 = 1024
### fc1
#
Wshape = [Xdim, Nfc1]
W = tf.get_variable('fc1/W', shape = Wshape, initializer = tfc.layers.xavier_initializer())
b = tf.get_variable('fc1/b', shape = Wshape[-1], initializer = tf.zeros_initializer)
fc1 = tf.nn.relu(tf.matmul(self.X, W) + b)
self.params['fc1/W'] = W
self.params['fc1/b'] = b
### fc2
#
Wshape = [Nfc1, Nfc2]
W = tf.get_variable('fc2/W', shape = Wshape, initializer = tfc.layers.xavier_initializer())
b = tf.get_variable('fc2/b', shape = Wshape[-1], initializer = tf.zeros_initializer)
fc2 = tf.nn.relu(tf.matmul(fc1, W) + b)
self.params['fc2/W'] = W
self.params['fc2/b'] = b
### logit
#
Wshape = [Nfc2, K]
W = tf.get_variable('logit/W', shape = Wshape, initializer = tfc.layers.xavier_initializer())
b = tf.get_variable('logit/b', shape = Wshape[-1], initializer = tf.zeros_initializer)
logit = tf.matmul(fc2, W) + b
self.params['logit/W'] = W
self.params['logit/b'] = b
### softmax
#
self.Y = logit
self.Z = tf.nn.softmax(logit)
### definition for output computation
#
self.cg_output = self.Z
### definition for cost
#
self.label = tf.placeholder(tf.int64, shape = [None])
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels = self.label, logits = self.Y)
cost = tf.reduce_mean(cross_entropy)
correct_prediction = tf.equal(tf.argmax(self.Y, 1), self.label)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
#self.cg_test = (cost, accuracy)
self.cg_test = (cross_entropy, correct_prediction)
### definition for training
#
self.optimizer = optimizer
if optimizer != None:
self.cg_train = self.optimizer.minimize(cost)
### definition for parameter initialization
#
self.cg_init = tf.global_variables_initializer()
### starting the session
#
self.sess = tf.InteractiveSession()
def init(self):
rv = self.sess.run(self.cg_init)
return rv
def output(self, X):
d = {self.X: X}
rv = self.sess.run(self.cg_output, feed_dict = d)
return rv
def train(self, X, lab):
d = {self.X: X, self.label: lab}
rv = self.sess.run(self.cg_train, feed_dict = d)
return rv
def test(self, X, lab):
d = {self.X: X, self.label: lab}
rv = self.sess.run(self.cg_test, feed_dict = d)
return rv
def getWeight(self):
return self.sess.run(self.params)
def setWeight(self, vals_dict):
L = []
for k in vals_dict.keys():
L.append(tf.assign(self.params[k], vals_dict[k]))
self.sess.run(L)
if __name__ == '__main__':
with tf.Graph().as_default():
mlp = MLP(768, 10)
mlp.init()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment