Create a gist now

Instantly share code, notes, and snippets.

Embed
What would you like to do?

TensorFlow で MLP & CNN ver.20170916

Batch Normalization を組み込んだ MLP と CNN

MLP

CNN

import tensorflow as tf
import tensorflow.contrib as tfc
import numpy as np
class CNN_BN(object):
def __init__(self, Xshape, K, optimizer = None):
self.X = tf.placeholder(tf.float32, shape = [None] + Xshape)
self.Xshape = Xshape
self.params = dict()
### definition of the network
BN_offset = None
BN_scale = None
BN_eps = 0.001
### conv1
#
Wshape = [5, 5, 1, 32]
with tf.variable_scope('conv1'):
W = tf.get_variable('W', shape = Wshape, initializer = tfc.layers.xavier_initializer_conv2d())
mean = tf.get_variable('mean', shape = Wshape[-1], initializer = tf.zeros_initializer)
var = tf.get_variable('var', shape = Wshape[-1], initializer = tf.ones_initializer)
WX = tf.nn.conv2d(self.X, W, [1, 2, 2, 1], padding = 'SAME')
Y = tf.nn.batch_normalization(WX, mean, var, BN_offset, BN_scale, BN_eps)
conv1 = tf.nn.relu(Y)
self.params[W.name] = W
self.params[mean.name] = mean
self.params[var.name] = var
### pool1
#
Wshape = [1, 2, 2, 1]
pool1 = tf.nn.max_pool(conv1, Wshape, [1, 2, 2, 1], padding = 'SAME')
### conv2
#
Wshape = [5, 5, 32, 64]
with tf.variable_scope('conv2'):
W = tf.get_variable('W', shape = Wshape, initializer = tfc.layers.xavier_initializer_conv2d())
mean = tf.get_variable('mean', shape = Wshape[-1], initializer = tf.zeros_initializer)
var = tf.get_variable('var', shape = Wshape[-1], initializer = tf.ones_initializer)
WX = tf.nn.conv2d(pool1, W, [1, 2, 2, 1], padding = 'SAME')
Y = tf.nn.batch_normalization(WX, mean, var, BN_offset, BN_scale, BN_eps)
conv2 = tf.nn.relu(Y)
self.params[W.name] = W
self.params[mean.name] = mean
self.params[var.name] = var
### pool2
#
Wshape = [1, 2, 2, 1]
pool2 = tf.nn.max_pool(conv2, Wshape, [1, 2, 2, 1], padding = 'SAME')
### fc
#
pool2_flat = tfc.layers.flatten(pool2)
Wshape = [pool2_flat.shape[1], 1024]
with tf.variable_scope('fc'):
W = tf.get_variable('W', shape = Wshape, initializer = tfc.layers.xavier_initializer())
mean = tf.get_variable('mean', shape = Wshape[-1], initializer = tf.zeros_initializer)
var = tf.get_variable('var', shape = Wshape[-1], initializer = tf.ones_initializer)
WX = tf.matmul(pool2_flat, W)
Y = tf.nn.batch_normalization(WX, mean, var, BN_offset, BN_scale, BN_eps)
fc = tf.nn.relu(Y)
self.params[W.name] = W
self.params[mean.name] = mean
self.params[var.name] = var
### logit
#
Wshape = [1024, K]
with tf.variable_scope('logit'):
W = tf.get_variable('W', shape = Wshape, initializer = tfc.layers.xavier_initializer())
b = tf.get_variable('b', shape = Wshape[-1], initializer = tf.zeros_initializer)
logit = tf.matmul(fc, W) + b
self.params[W.name] = W
self.params[b.name] = b
### softmax
#
self.Y = logit
self.Z = tf.nn.softmax(logit)
### definition for output computation
#
self.cg_output = self.Z
### definition for cost
#
self.label = tf.placeholder(tf.int64, shape = [None])
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels = self.label, logits = self.Y)
cost = tf.reduce_mean(cross_entropy)
correct_prediction = tf.equal(tf.argmax(self.Y, 1), self.label)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
#self.cg_test = (cost, accuracy)
self.cg_test = (cross_entropy, correct_prediction)
### definition for training
#
self.optimizer = optimizer
if optimizer != None:
self.cg_train = self.optimizer.minimize(cost)
### definition for parameter initialization
#
self.cg_init = tf.global_variables_initializer()
### starting the session
#
self.sess = tf.InteractiveSession()
def init(self):
rv = self.sess.run(self.cg_init)
return rv
def output(self, X):
d = {self.X: X}
rv = self.sess.run(self.cg_output, feed_dict = d)
return rv
def train(self, X, lab):
d = {self.X: X, self.label: lab}
rv = self.sess.run(self.cg_train, feed_dict = d)
return rv
def test(self, X, lab):
d = {self.X: X, self.label: lab}
rv = self.sess.run(self.cg_test, feed_dict = d)
return rv
def getWeight(self):
return self.sess.run(self.params)
def setWeight(self, vals_dict):
L = []
for k in vals_dict.keys():
L.append(tf.assign(self.params[k], vals_dict[k]))
self.sess.run(L)
if __name__ == '__main__':
with tf.Graph().as_default():
cnn = CNN_BN([28, 28, 1], 10)
cnn.init()
import tensorflow as tf
import numpy as np
import os
import sys
import getdata170817 as getdata
import cnn170916 as cnn
def evaluate(nn, X, lab, bindex):
nbatch, ndat = bindex.shape
cross_entropy = np.empty(nbatch)
correct_prediction = np.empty(nbatch, dtype = int)
for ib in range(nbatch):
ii = bindex[ib, :]
ce1, cp1 = nn.test(X[ii], lab[ii])
cross_entropy[ib] = np.sum(ce1)
correct_prediction[ib] = np.sum(cp1)
return np.sum(cross_entropy)/ndat, np.sum(correct_prediction)/ndat
if __name__ == '__main__':
dirResult = 'result_ex170916'
### /gpu:0 GTX 1080, /gpu:1 Tesla K20c on tortoise3
#
if len(sys.argv) == 1:
dev = '/cpu:0'
elif len(sys.argv) == 2:
dev = sys.argv[1]
else:
sys.exit('usage: %s [device]' % sys.argv[0])
### reading and preparing the training data
#
data = getdata.Data('../150117-mnist', nV = 10000)
K = data.nclass
XL, labL = data.getData('L')
XV, labV = data.getData('V')
Xshape = [data.nrow, data.ncol, 1]
XL = XL.reshape([-1] + Xshape)
XV = XV.reshape([-1] + Xshape)
NL = XL.shape[0]
NV = XV.shape[0]
### initializing the network
#
eta = 0.1
mu = 0.9
optimizer = tf.train.MomentumOptimizer(eta, mu)
#optimizer = tf.train.AdamOptimizer(learning_rate = 1e-4)
g = tf.Graph()
with g.as_default():
with g.device(dev):
nn = cnn.CNN_BN(Xshape, K, optimizer = optimizer)
nn.init()
### training
#
batchsize = 128
bindexL = getdata.makeBatchIndex(NL, batchsize)
nbatchL = bindexL.shape[0]
bindexV = getdata.makeBatchIndex(NV, batchsize)
print('# ceL accL ceV accV')
nitr = 10000
nd = 0
for i in range(nitr):
#if (i < 500 and i % 100 == 0) or (i % 500 == 0):
if (i % 500 == 0):
ceL, accL = evaluate(nn, XL, labL, bindexL)
ceV, accV = evaluate(nn, XV, labV, bindexV)
print(i, nd, ceL, accL*100, ceV, accV*100)
ib = np.random.randint(0, nbatchL)
ii = bindexL[ib, :]
nn.train(XL[ii], labL[ii])
nd += XL[ii].shape[0]
fnParams = os.path.join(dirResult, os.path.splitext(sys.argv[0])[0] + '-params.npz')
params_dict = nn.getWeight()
np.savez_compressed(fnParams, **params_dict)
import tensorflow as tf
import numpy as np
import os
import sys
import getdata170817 as getdata
import cnn170916 as cnn
import ex170916cnnL as ex
if __name__ == '__main__':
dirResult = 'result_ex170916'
### reading and preparing the data
#
data = getdata.Data('../150117-mnist', nV = 10000)
Xshape = [data.nrow, data.ncol, 1]
K = data.nclass
XL, labL = data.getData('L')
XV, labV = data.getData('V')
XT, labT = data.getData('T')
XL = XL.reshape([-1] + Xshape)
XV = XV.reshape([-1] + Xshape)
XT = XT.reshape([-1] + Xshape)
NL = XL.shape[0]
NV = XV.shape[0]
NT = XT.shape[0]
### initializing the network
#
with tf.Graph().as_default():
nn = cnn.CNN_BN(Xshape, K)
fnParams = 'result_ex170916/ex170916cnnL-params.npz'
with np.load(fnParams) as hoge:
nn.setWeight(hoge)
### test
#
batchsize = 128
bindexL = getdata.makeBatchIndex(NL, batchsize)
bindexV = getdata.makeBatchIndex(NV, batchsize)
bindexT = getdata.makeBatchIndex(NT, batchsize)
ceL, accL = ex.evaluate(nn, XL, labL, bindexL)
ceV, accV = ex.evaluate(nn, XV, labV, bindexV)
ceT, accT = ex.evaluate(nn, XT, labT, bindexT)
print('# ceL accL ceV accV ceT accT')
print(ceL, accL*100, ceV, accV*100, ceT, accT*100)
import tensorflow as tf
import numpy as np
import os
import sys
import getdata170817 as getdata
import mlp170916 as mlp
def evaluate(nn, X, lab, bindex):
nbatch, ndat = bindex.shape
cross_entropy = np.empty(nbatch)
correct_prediction = np.empty(nbatch, dtype = int)
for ib in range(nbatch):
ii = bindex[ib, :]
ce1, cp1 = nn.test(X[ii], lab[ii])
cross_entropy[ib] = np.sum(ce1)
correct_prediction[ib] = np.sum(cp1)
return np.sum(cross_entropy)/ndat, np.sum(correct_prediction)/ndat
if __name__ == '__main__':
dirResult = 'result_ex170916'
### /gpu:0 GTX 1080, /gpu:1 Tesla K20c on tortoise3
#
if len(sys.argv) == 1:
dev = '/cpu:0'
elif len(sys.argv) == 2:
dev = sys.argv[1]
else:
sys.exit('usage: %s [device]' % sys.argv[0])
### reading and preparing the training data
#
data = getdata.Data('../150117-mnist', nV = 10000)
D = data.nrow * data.ncol
K = data.nclass
XL, labL = data.getData('L')
XV, labV = data.getData('V')
NL = XL.shape[0]
NV = XV.shape[0]
### initializing the network
#
eta = 0.1
mu = 0.9
optimizer = tf.train.MomentumOptimizer(eta, mu)
#optimizer = tf.train.AdamOptimizer(learning_rate = 1e-4)
g = tf.Graph()
with g.as_default():
with g.device(dev):
nn = mlp.MLP_BN(D, K, optimizer = optimizer)
nn.init()
### training
#
batchsize = 128
bindexL = getdata.makeBatchIndex(NL, batchsize)
nbatchL = bindexL.shape[0]
bindexV = getdata.makeBatchIndex(NV, batchsize)
print('# ceL accL ceV accV')
nitr = 10000
nd = 0
for i in range(nitr):
#if (i < 500 and i % 100 == 0) or (i % 500 == 0):
if (i % 500 == 0):
ceL, accL = evaluate(nn, XL, labL, bindexL)
ceV, accV = evaluate(nn, XV, labV, bindexV)
print(i, nd, ceL, accL*100, ceV, accV*100)
ib = np.random.randint(0, nbatchL)
ii = bindexL[ib, :]
nn.train(XL[ii], labL[ii])
nd += XL[ii].shape[0]
fnParams = os.path.join(dirResult, os.path.splitext(sys.argv[0])[0] + '-params.npz')
params_dict = nn.getWeight()
np.savez_compressed(fnParams, **params_dict)
import tensorflow as tf
import numpy as np
import os
import sys
import getdata170817 as getdata
import mlp170916 as mlp
import ex170916mlpL as ex
if __name__ == '__main__':
dirResult = 'result_ex170916'
### reading and preparing the data
#
data = getdata.Data('../150117-mnist', nV = 10000)
D = data.nrow * data.ncol
K = data.nclass
XL, labL = data.getData('L')
XV, labV = data.getData('V')
XT, labT = data.getData('T')
NL = XL.shape[0]
NV = XV.shape[0]
NT = XT.shape[0]
### initializing the network
#
with tf.Graph().as_default():
nn = mlp.MLP_BN(D, K)
fnParams = 'result_ex170916/ex170916mlpL-params.npz'
with np.load(fnParams) as hoge:
nn.setWeight(hoge)
### test
#
batchsize = 128
bindexL = getdata.makeBatchIndex(NL, batchsize)
bindexV = getdata.makeBatchIndex(NV, batchsize)
bindexT = getdata.makeBatchIndex(NT, batchsize)
ceL, accL = ex.evaluate(nn, XL, labL, bindexL)
ceV, accV = ex.evaluate(nn, XV, labV, bindexV)
ceT, accT = ex.evaluate(nn, XT, labT, bindexT)
print('# ceL accL ceV accV ceT accT')
print(ceL, accL*100, ceV, accV*100, ceT, accT*100)
import tensorflow as tf
import tensorflow.contrib as tfc
import numpy as np
class MLP_BN(object):
def __init__(self, Xdim, K, optimizer = None):
self.X = tf.placeholder(tf.float32, shape = [None, Xdim])
self.Xdim = Xdim
self.params = dict()
### definition of the network
#
Nfc1 = 1024
Nfc2 = 1024
BN_offset = None
BN_scale = None
BN_eps = 0.001
### fc1
#
Wshape = [Xdim, Nfc1]
with tf.variable_scope('fc1'):
W = tf.get_variable('W', shape = Wshape, initializer = tfc.layers.xavier_initializer())
mean = tf.get_variable('mean', shape = Nfc1, initializer = tf.zeros_initializer)
var = tf.get_variable('var', shape = Nfc1, initializer = tf.ones_initializer)
WX = tf.matmul(self.X, W)
Y = tf.nn.batch_normalization(WX, mean, var, BN_offset, BN_scale, BN_eps)
fc1 = tf.nn.relu(Y)
self.params[W.name] = W
self.params[mean.name] = mean
self.params[var.name] = var
### fc2
#
Wshape = [Nfc1, Nfc2]
with tf.variable_scope('fc2'):
W = tf.get_variable('W', shape = Wshape, initializer = tfc.layers.xavier_initializer())
mean = tf.get_variable('mean', shape = Nfc2, initializer = tf.zeros_initializer)
var = tf.get_variable('var', shape = Nfc2, initializer = tf.ones_initializer)
WX = tf.matmul(fc1, W)
Y = tf.nn.batch_normalization(WX, mean, var, BN_offset, BN_scale, BN_eps)
fc2 = tf.nn.relu(Y)
self.params[W.name] = W
self.params[mean.name] = mean
self.params[var.name] = var
### logit
#
Wshape = [Nfc2, K]
with tf.variable_scope('logit'):
W = tf.get_variable('W', shape = Wshape, initializer = tfc.layers.xavier_initializer())
b = tf.get_variable('b', shape = Wshape[-1], initializer = tf.zeros_initializer)
logit = tf.matmul(fc2, W) + b
self.params[W.name] = W
self.params[b.name] = b
### softmax
#
self.Y = logit
self.Z = tf.nn.softmax(logit)
### definition for output computation
#
self.cg_output = self.Z
### definition for cost
#
self.label = tf.placeholder(tf.int64, shape = [None])
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels = self.label, logits = self.Y)
cost = tf.reduce_mean(cross_entropy)
correct_prediction = tf.equal(tf.argmax(self.Y, 1), self.label)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
#self.cg_test = (cost, accuracy)
self.cg_test = (cross_entropy, correct_prediction)
### definition for training
#
self.optimizer = optimizer
if optimizer != None:
self.cg_train = self.optimizer.minimize(cost)
### definition for parameter initialization
#
self.cg_init = tf.global_variables_initializer()
### starting the session
#
self.sess = tf.InteractiveSession()
def init(self):
rv = self.sess.run(self.cg_init)
return rv
def output(self, X):
d = {self.X: X}
rv = self.sess.run(self.cg_output, feed_dict = d)
return rv
def train(self, X, lab):
d = {self.X: X, self.label: lab}
rv = self.sess.run(self.cg_train, feed_dict = d)
return rv
def test(self, X, lab):
d = {self.X: X, self.label: lab}
rv = self.sess.run(self.cg_test, feed_dict = d)
return rv
def getWeight(self):
return self.sess.run(self.params)
def setWeight(self, vals_dict):
L = []
for k in vals_dict.keys():
L.append(tf.assign(self.params[k], vals_dict[k]))
self.sess.run(L)
if __name__ == '__main__':
with tf.Graph().as_default():
mlp = MLP_BN(768, 10)
mlp.init()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment