takatakamanbou/00_CNNbyTensorFlow3.md Secret

## 00_CNNbyTensorFlow3.md

      
    Raw
  

              00_CNNbyTensorFlow3.md
            
          
    TensorFlow で MLP & CNN ver.20170916

Batch Normalization を組み込んだ MLP と CNN

TensorFlow で MLP & CNN ver.20170415
TensorFlow で MLP & CNN ver.20170817
https://www-tlab.math.ryukoku.ac.jp/tlab/?takataka/note/2017-09-16

MLP


mlp170916.py MLP の定義
ex170916mlpL.py MLP の学習
ex170916mlpT.py  テスト
getdata170817.py データの読み込み
mnist.py MNISTデータの読み込み

CNN


cnn170916.py CNN の定義
ex170916cnnL.py CNN の学習
ex170916cnnT.py テスト
getdata170817.py
mnist.py


## cnn170916.py
import tensorflow as tf
import tensorflow.contrib as tfc
import numpy as np


class CNN_BN(object):

  def __init__(self, Xshape, K, optimizer = None):

    self.X = tf.placeholder(tf.float32, shape = [None] + Xshape)
    self.Xshape = Xshape
    self.params = dict()

    ### definition of the network
    BN_offset = None
    BN_scale = None
    BN_eps = 0.001

    ### conv1
    #
    Wshape = [5, 5, 1, 32]
    with tf.variable_scope('conv1'):
      W = tf.get_variable('W', shape = Wshape, initializer = tfc.layers.xavier_initializer_conv2d())
      mean = tf.get_variable('mean', shape = Wshape[-1], initializer = tf.zeros_initializer)
      var = tf.get_variable('var', shape = Wshape[-1], initializer = tf.ones_initializer)

    WX = tf.nn.conv2d(self.X, W, [1, 2, 2, 1], padding = 'SAME')
    Y = tf.nn.batch_normalization(WX, mean, var, BN_offset, BN_scale, BN_eps)
    conv1 = tf.nn.relu(Y)
    self.params[W.name] = W
    self.params[mean.name] = mean
    self.params[var.name] = var

    ### pool1
    #
    Wshape = [1, 2, 2, 1]
    pool1 = tf.nn.max_pool(conv1, Wshape, [1, 2, 2, 1], padding = 'SAME')

    ### conv2
    #
    Wshape = [5, 5, 32, 64]
    with tf.variable_scope('conv2'):
      W = tf.get_variable('W', shape = Wshape, initializer = tfc.layers.xavier_initializer_conv2d())
      mean = tf.get_variable('mean', shape = Wshape[-1], initializer = tf.zeros_initializer)
      var = tf.get_variable('var', shape = Wshape[-1], initializer = tf.ones_initializer)

    WX = tf.nn.conv2d(pool1, W, [1, 2, 2, 1], padding = 'SAME')
    Y = tf.nn.batch_normalization(WX, mean, var, BN_offset, BN_scale, BN_eps)
    conv2 = tf.nn.relu(Y)
    self.params[W.name] = W
    self.params[mean.name] = mean
    self.params[var.name] = var

    ### pool2
    #
    Wshape = [1, 2, 2, 1]
    pool2 = tf.nn.max_pool(conv2, Wshape, [1, 2, 2, 1], padding = 'SAME')

    ### fc
    #
    pool2_flat = tfc.layers.flatten(pool2)
    Wshape = [pool2_flat.shape[1], 1024]
    with tf.variable_scope('fc'):
      W = tf.get_variable('W', shape = Wshape, initializer = tfc.layers.xavier_initializer())
      mean = tf.get_variable('mean', shape = Wshape[-1], initializer = tf.zeros_initializer)
      var = tf.get_variable('var', shape = Wshape[-1], initializer = tf.ones_initializer)

    WX = tf.matmul(pool2_flat, W)
    Y = tf.nn.batch_normalization(WX, mean, var, BN_offset, BN_scale, BN_eps)
    fc = tf.nn.relu(Y)
    self.params[W.name] = W
    self.params[mean.name] = mean
    self.params[var.name] = var

    ### logit
    #
    Wshape = [1024, K]
    with tf.variable_scope('logit'):
      W = tf.get_variable('W', shape = Wshape, initializer = tfc.layers.xavier_initializer())
      b = tf.get_variable('b', shape = Wshape[-1], initializer = tf.zeros_initializer)

    logit = tf.matmul(fc, W) + b
    self.params[W.name] = W
    self.params[b.name] = b

    ### softmax
    #
    self.Y = logit
    self.Z = tf.nn.softmax(logit)


    ### definition for output computation
    #
    self.cg_output = self.Z

    ### definition for cost
    #
    self.label = tf.placeholder(tf.int64, shape = [None])
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels = self.label, logits = self.Y)
    cost = tf.reduce_mean(cross_entropy)
    correct_prediction = tf.equal(tf.argmax(self.Y, 1), self.label)
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    #self.cg_test = (cost, accuracy)
    self.cg_test = (cross_entropy, correct_prediction)

    ### definition for training
    #
    self.optimizer = optimizer
    if optimizer != None:
      self.cg_train = self.optimizer.minimize(cost)

    ### definition for parameter initialization
    #
    self.cg_init = tf.global_variables_initializer()

    ### starting the session
    #
    self.sess = tf.InteractiveSession()


  def init(self):

    rv = self.sess.run(self.cg_init)
    return rv


  def output(self, X):

    d = {self.X: X}
    rv = self.sess.run(self.cg_output, feed_dict = d)
    return rv


  def train(self, X, lab):

    d = {self.X: X, self.label: lab}
    rv = self.sess.run(self.cg_train, feed_dict = d)
    return rv


  def test(self, X, lab):

    d = {self.X: X, self.label: lab}
    rv = self.sess.run(self.cg_test, feed_dict = d)
    return rv


  def getWeight(self):

    return self.sess.run(self.params)


  def setWeight(self, vals_dict):

    L = []
    for k in vals_dict.keys():
      L.append(tf.assign(self.params[k], vals_dict[k]))
    self.sess.run(L)


if __name__ == '__main__':

  with tf.Graph().as_default():
    cnn = CNN_BN([28, 28, 1], 10)

  cnn.init()

## ex170916cnnL.py
import tensorflow as tf
import numpy as np
import os
import sys

import getdata170817 as getdata
import cnn170916 as cnn


def evaluate(nn, X, lab, bindex):

    nbatch, ndat = bindex.shape
    cross_entropy = np.empty(nbatch)
    correct_prediction = np.empty(nbatch, dtype = int)
    for ib in range(nbatch):
        ii = bindex[ib, :]
        ce1, cp1 = nn.test(X[ii], lab[ii])
        cross_entropy[ib]      = np.sum(ce1)
        correct_prediction[ib] = np.sum(cp1)

    return np.sum(cross_entropy)/ndat, np.sum(correct_prediction)/ndat


if __name__ == '__main__':

    dirResult = 'result_ex170916'

    ###  /gpu:0  GTX 1080,  /gpu:1  Tesla K20c  on tortoise3
    #
    if len(sys.argv) == 1:
        dev = '/cpu:0'
    elif len(sys.argv) == 2:
        dev = sys.argv[1]
    else:
        sys.exit('usage: %s [device]' % sys.argv[0])

    ### reading and preparing the training data
    #
    data = getdata.Data('../150117-mnist', nV = 10000)
    K = data.nclass
    XL, labL = data.getData('L')
    XV, labV = data.getData('V')
    Xshape = [data.nrow, data.ncol, 1]
    XL = XL.reshape([-1] + Xshape)
    XV = XV.reshape([-1] + Xshape)
    NL = XL.shape[0]
    NV = XV.shape[0]

    ### initializing the network
    #
    eta = 0.1
    mu = 0.9
    optimizer = tf.train.MomentumOptimizer(eta, mu)
    #optimizer = tf.train.AdamOptimizer(learning_rate = 1e-4)

    g = tf.Graph()
    with g.as_default():
        with g.device(dev):
            nn = cnn.CNN_BN(Xshape, K, optimizer = optimizer)

    nn.init()

    ### training
    #
    batchsize = 128
    bindexL = getdata.makeBatchIndex(NL, batchsize)
    nbatchL = bindexL.shape[0]
    bindexV = getdata.makeBatchIndex(NV, batchsize)

    print('#     ceL accL       ceV accV')

    nitr = 10000
    nd = 0
    for i in range(nitr):
        #if (i < 500 and i % 100 == 0) or (i % 500 == 0):
        if (i % 500 == 0):
            ceL, accL = evaluate(nn, XL, labL, bindexL)
            ceV, accV = evaluate(nn, XV, labV, bindexV)
            print(i, nd, ceL, accL*100, ceV, accV*100)

        ib = np.random.randint(0, nbatchL)
        ii = bindexL[ib, :]
        nn.train(XL[ii], labL[ii])
        nd += XL[ii].shape[0]


    fnParams = os.path.join(dirResult, os.path.splitext(sys.argv[0])[0] + '-params.npz')
    params_dict = nn.getWeight()
    np.savez_compressed(fnParams, **params_dict)


## ex170916cnnT.py
import tensorflow as tf
import numpy as np
import os
import sys

import getdata170817 as getdata
import cnn170916 as cnn
import ex170916cnnL as ex


if __name__ == '__main__':

    dirResult = 'result_ex170916'

    ### reading and preparing the data
    #
    data = getdata.Data('../150117-mnist', nV = 10000)
    Xshape = [data.nrow, data.ncol, 1]
    K = data.nclass
    XL, labL = data.getData('L')
    XV, labV = data.getData('V')
    XT, labT = data.getData('T')
    XL = XL.reshape([-1] + Xshape)
    XV = XV.reshape([-1] + Xshape)
    XT = XT.reshape([-1] + Xshape)
    NL = XL.shape[0]
    NV = XV.shape[0]
    NT = XT.shape[0]


    ### initializing the network
    #
    with tf.Graph().as_default():
        nn = cnn.CNN_BN(Xshape, K)

    fnParams = 'result_ex170916/ex170916cnnL-params.npz'
    with np.load(fnParams) as hoge:
        nn.setWeight(hoge)


    ### test
    #
    batchsize = 128
    bindexL = getdata.makeBatchIndex(NL, batchsize)
    bindexV = getdata.makeBatchIndex(NV, batchsize)
    bindexT = getdata.makeBatchIndex(NT, batchsize)
    ceL, accL = ex.evaluate(nn, XL, labL, bindexL)
    ceV, accV = ex.evaluate(nn, XV, labV, bindexV)
    ceT, accT = ex.evaluate(nn, XT, labT, bindexT)
    print('# ceL accL       ceV accV       ceT accT')
    print(ceL, accL*100, ceV, accV*100, ceT, accT*100)


## ex170916mlpL.py
import tensorflow as tf
import numpy as np
import os
import sys

import getdata170817 as getdata
import mlp170916 as mlp


def evaluate(nn, X, lab, bindex):

    nbatch, ndat = bindex.shape
    cross_entropy = np.empty(nbatch)
    correct_prediction = np.empty(nbatch, dtype = int)
    for ib in range(nbatch):
        ii = bindex[ib, :]
        ce1, cp1 = nn.test(X[ii], lab[ii])
        cross_entropy[ib]      = np.sum(ce1)
        correct_prediction[ib] = np.sum(cp1)

    return np.sum(cross_entropy)/ndat, np.sum(correct_prediction)/ndat


if __name__ == '__main__':

    dirResult = 'result_ex170916'

    ###  /gpu:0  GTX 1080,  /gpu:1  Tesla K20c  on tortoise3
    #
    if len(sys.argv) == 1:
        dev = '/cpu:0'
    elif len(sys.argv) == 2:
        dev = sys.argv[1]
    else:
        sys.exit('usage: %s [device]' % sys.argv[0])

    ### reading and preparing the training data
    #
    data = getdata.Data('../150117-mnist', nV = 10000)
    D = data.nrow * data.ncol
    K = data.nclass
    XL, labL = data.getData('L')
    XV, labV = data.getData('V')
    NL = XL.shape[0]
    NV = XV.shape[0]

    ### initializing the network
    #
    eta = 0.1
    mu = 0.9
    optimizer = tf.train.MomentumOptimizer(eta, mu)
    #optimizer = tf.train.AdamOptimizer(learning_rate = 1e-4)

    g = tf.Graph()
    with g.as_default():
        with g.device(dev):
            nn = mlp.MLP_BN(D, K, optimizer = optimizer)

    nn.init()

    ### training
    #
    batchsize = 128
    bindexL = getdata.makeBatchIndex(NL, batchsize)
    nbatchL = bindexL.shape[0]
    bindexV = getdata.makeBatchIndex(NV, batchsize)

    print('#     ceL accL       ceV accV')

    nitr = 10000
    nd = 0
    for i in range(nitr):
        #if (i < 500 and i % 100 == 0) or (i % 500 == 0):
        if (i % 500 == 0):
            ceL, accL = evaluate(nn, XL, labL, bindexL)
            ceV, accV = evaluate(nn, XV, labV, bindexV)
            print(i, nd, ceL, accL*100, ceV, accV*100)

        ib = np.random.randint(0, nbatchL)
        ii = bindexL[ib, :]
        nn.train(XL[ii], labL[ii])
        nd += XL[ii].shape[0]


    fnParams = os.path.join(dirResult, os.path.splitext(sys.argv[0])[0] + '-params.npz')
    params_dict = nn.getWeight()
    np.savez_compressed(fnParams, **params_dict)


## ex170916mlpT.py
import tensorflow as tf
import numpy as np
import os
import sys

import getdata170817 as getdata
import mlp170916 as mlp
import ex170916mlpL as ex


if __name__ == '__main__':

    dirResult = 'result_ex170916'

    ### reading and preparing the data
    #
    data = getdata.Data('../150117-mnist', nV = 10000)
    D = data.nrow * data.ncol
    K = data.nclass
    XL, labL = data.getData('L')
    XV, labV = data.getData('V')
    XT, labT = data.getData('T')
    NL = XL.shape[0]
    NV = XV.shape[0]
    NT = XT.shape[0]


    ### initializing the network
    #
    with tf.Graph().as_default():
        nn = mlp.MLP_BN(D, K)

    fnParams = 'result_ex170916/ex170916mlpL-params.npz'
    with np.load(fnParams) as hoge:
        nn.setWeight(hoge)


    ### test
    #
    batchsize = 128
    bindexL = getdata.makeBatchIndex(NL, batchsize)
    bindexV = getdata.makeBatchIndex(NV, batchsize)
    bindexT = getdata.makeBatchIndex(NT, batchsize)
    ceL, accL = ex.evaluate(nn, XL, labL, bindexL)
    ceV, accV = ex.evaluate(nn, XV, labV, bindexV)
    ceT, accT = ex.evaluate(nn, XT, labT, bindexT)
    print('# ceL accL       ceV accV       ceT accT')
    print(ceL, accL*100, ceV, accV*100, ceT, accT*100)


## mlp170917.py
import tensorflow as tf
import tensorflow.contrib as tfc
import numpy as np


class MLP_BN(object):

  def __init__(self, Xdim, K, optimizer = None):

    self.X = tf.placeholder(tf.float32, shape = [None, Xdim])
    self.Xdim = Xdim
    self.params = dict()

    ### definition of the network
    #
    Nfc1 = 1024
    Nfc2 = 1024
    BN_offset = None
    BN_scale = None
    BN_eps = 0.001

    ### fc1
    #
    Wshape = [Xdim, Nfc1]
    with tf.variable_scope('fc1'):
      W = tf.get_variable('W', shape = Wshape, initializer = tfc.layers.xavier_initializer())
      mean = tf.get_variable('mean', shape = Nfc1, initializer = tf.zeros_initializer)
      var = tf.get_variable('var', shape = Nfc1, initializer = tf.ones_initializer)

    WX = tf.matmul(self.X, W)
    Y = tf.nn.batch_normalization(WX, mean, var, BN_offset, BN_scale, BN_eps)
    fc1 = tf.nn.relu(Y)
    self.params[W.name] = W
    self.params[mean.name] = mean
    self.params[var.name] = var


    ### fc2
    #
    Wshape = [Nfc1, Nfc2]
    with tf.variable_scope('fc2'):
      W = tf.get_variable('W', shape = Wshape, initializer = tfc.layers.xavier_initializer())
      mean = tf.get_variable('mean', shape = Nfc2, initializer = tf.zeros_initializer)
      var = tf.get_variable('var', shape = Nfc2, initializer = tf.ones_initializer)

    WX = tf.matmul(fc1, W)
    Y = tf.nn.batch_normalization(WX, mean, var, BN_offset, BN_scale, BN_eps)
    fc2 = tf.nn.relu(Y)
    self.params[W.name] = W
    self.params[mean.name] = mean
    self.params[var.name] = var

    ### logit
    #
    Wshape = [Nfc2, K]
    with tf.variable_scope('logit'):
      W = tf.get_variable('W', shape = Wshape, initializer = tfc.layers.xavier_initializer())
      b = tf.get_variable('b', shape = Wshape[-1], initializer = tf.zeros_initializer)

    logit = tf.matmul(fc2, W) + b
    self.params[W.name] = W
    self.params[b.name] = b

    ### softmax
    #
    self.Y = logit
    self.Z = tf.nn.softmax(logit)


    ### definition for output computation
    #
    self.cg_output = self.Z

    ### definition for cost
    #
    self.label = tf.placeholder(tf.int64, shape = [None])
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels = self.label, logits = self.Y)
    cost = tf.reduce_mean(cross_entropy)
    correct_prediction = tf.equal(tf.argmax(self.Y, 1), self.label)
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    #self.cg_test = (cost, accuracy)
    self.cg_test = (cross_entropy, correct_prediction)

    ### definition for training
    #
    self.optimizer = optimizer
    if optimizer != None:
      self.cg_train = self.optimizer.minimize(cost)

    ### definition for parameter initialization
    #
    self.cg_init = tf.global_variables_initializer()

    ### starting the session
    #
    self.sess = tf.InteractiveSession()


  def init(self):

    rv = self.sess.run(self.cg_init)
    return rv


  def output(self, X):

    d = {self.X: X}
    rv = self.sess.run(self.cg_output, feed_dict = d)
    return rv


  def train(self, X, lab):

    d = {self.X: X, self.label: lab}
    rv = self.sess.run(self.cg_train, feed_dict = d)
    return rv


  def test(self, X, lab):

    d = {self.X: X, self.label: lab}
    rv = self.sess.run(self.cg_test, feed_dict = d)
    return rv


  def getWeight(self):

    return self.sess.run(self.params)


  def setWeight(self, vals_dict):

    L = []
    for k in vals_dict.keys():
      L.append(tf.assign(self.params[k], vals_dict[k]))
    self.sess.run(L)


if __name__ == '__main__':

  with tf.Graph().as_default():
    mlp = MLP_BN(768, 10)

  mlp.init()
	import tensorflow as tf
	import tensorflow.contrib as tfc
	import numpy as np


	class CNN_BN(object):

	def __init__(self, Xshape, K, optimizer = None):

	self.X = tf.placeholder(tf.float32, shape = [None] + Xshape)
	self.Xshape = Xshape
	self.params = dict()

	### definition of the network
	BN_offset = None
	BN_scale = None
	BN_eps = 0.001

	### conv1
	#
	Wshape = [5, 5, 1, 32]
	with tf.variable_scope('conv1'):
	W = tf.get_variable('W', shape = Wshape, initializer = tfc.layers.xavier_initializer_conv2d())
	mean = tf.get_variable('mean', shape = Wshape[-1], initializer = tf.zeros_initializer)
	var = tf.get_variable('var', shape = Wshape[-1], initializer = tf.ones_initializer)

	WX = tf.nn.conv2d(self.X, W, [1, 2, 2, 1], padding = 'SAME')
	Y = tf.nn.batch_normalization(WX, mean, var, BN_offset, BN_scale, BN_eps)
	conv1 = tf.nn.relu(Y)
	self.params[W.name] = W
	self.params[mean.name] = mean
	self.params[var.name] = var

	### pool1
	#
	Wshape = [1, 2, 2, 1]
	pool1 = tf.nn.max_pool(conv1, Wshape, [1, 2, 2, 1], padding = 'SAME')

	### conv2
	#
	Wshape = [5, 5, 32, 64]
	with tf.variable_scope('conv2'):
	W = tf.get_variable('W', shape = Wshape, initializer = tfc.layers.xavier_initializer_conv2d())
	mean = tf.get_variable('mean', shape = Wshape[-1], initializer = tf.zeros_initializer)
	var = tf.get_variable('var', shape = Wshape[-1], initializer = tf.ones_initializer)

	WX = tf.nn.conv2d(pool1, W, [1, 2, 2, 1], padding = 'SAME')
	Y = tf.nn.batch_normalization(WX, mean, var, BN_offset, BN_scale, BN_eps)
	conv2 = tf.nn.relu(Y)
	self.params[W.name] = W
	self.params[mean.name] = mean
	self.params[var.name] = var

	### pool2
	#
	Wshape = [1, 2, 2, 1]
	pool2 = tf.nn.max_pool(conv2, Wshape, [1, 2, 2, 1], padding = 'SAME')

	### fc
	#
	pool2_flat = tfc.layers.flatten(pool2)
	Wshape = [pool2_flat.shape[1], 1024]
	with tf.variable_scope('fc'):
	W = tf.get_variable('W', shape = Wshape, initializer = tfc.layers.xavier_initializer())
	mean = tf.get_variable('mean', shape = Wshape[-1], initializer = tf.zeros_initializer)
	var = tf.get_variable('var', shape = Wshape[-1], initializer = tf.ones_initializer)

	WX = tf.matmul(pool2_flat, W)
	Y = tf.nn.batch_normalization(WX, mean, var, BN_offset, BN_scale, BN_eps)
	fc = tf.nn.relu(Y)
	self.params[W.name] = W
	self.params[mean.name] = mean
	self.params[var.name] = var

	### logit
	#
	Wshape = [1024, K]
	with tf.variable_scope('logit'):
	W = tf.get_variable('W', shape = Wshape, initializer = tfc.layers.xavier_initializer())
	b = tf.get_variable('b', shape = Wshape[-1], initializer = tf.zeros_initializer)

	logit = tf.matmul(fc, W) + b
	self.params[W.name] = W
	self.params[b.name] = b

	### softmax
	#
	self.Y = logit
	self.Z = tf.nn.softmax(logit)


	### definition for output computation
	#
	self.cg_output = self.Z

	### definition for cost
	#
	self.label = tf.placeholder(tf.int64, shape = [None])
	cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels = self.label, logits = self.Y)
	cost = tf.reduce_mean(cross_entropy)
	correct_prediction = tf.equal(tf.argmax(self.Y, 1), self.label)
	accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
	#self.cg_test = (cost, accuracy)
	self.cg_test = (cross_entropy, correct_prediction)

	### definition for training
	#
	self.optimizer = optimizer
	if optimizer != None:
	self.cg_train = self.optimizer.minimize(cost)

	### definition for parameter initialization
	#
	self.cg_init = tf.global_variables_initializer()

	### starting the session
	#
	self.sess = tf.InteractiveSession()


	def init(self):

	rv = self.sess.run(self.cg_init)
	return rv


	def output(self, X):

	d = {self.X: X}
	rv = self.sess.run(self.cg_output, feed_dict = d)
	return rv


	def train(self, X, lab):

	d = {self.X: X, self.label: lab}
	rv = self.sess.run(self.cg_train, feed_dict = d)
	return rv


	def test(self, X, lab):

	d = {self.X: X, self.label: lab}
	rv = self.sess.run(self.cg_test, feed_dict = d)
	return rv


	def getWeight(self):

	return self.sess.run(self.params)


	def setWeight(self, vals_dict):

	L = []
	for k in vals_dict.keys():
	L.append(tf.assign(self.params[k], vals_dict[k]))
	self.sess.run(L)


	if __name__ == '__main__':

	with tf.Graph().as_default():
	cnn = CNN_BN([28, 28, 1], 10)

	cnn.init()
	import tensorflow as tf
	import numpy as np
	import os
	import sys

	import getdata170817 as getdata
	import cnn170916 as cnn


	def evaluate(nn, X, lab, bindex):

	nbatch, ndat = bindex.shape
	cross_entropy = np.empty(nbatch)
	correct_prediction = np.empty(nbatch, dtype = int)
	for ib in range(nbatch):
	ii = bindex[ib, :]
	ce1, cp1 = nn.test(X[ii], lab[ii])
	cross_entropy[ib] = np.sum(ce1)
	correct_prediction[ib] = np.sum(cp1)

	return np.sum(cross_entropy)/ndat, np.sum(correct_prediction)/ndat


	if __name__ == '__main__':

	dirResult = 'result_ex170916'

	### /gpu:0 GTX 1080, /gpu:1 Tesla K20c on tortoise3
	#
	if len(sys.argv) == 1:
	dev = '/cpu:0'
	elif len(sys.argv) == 2:
	dev = sys.argv[1]
	else:
	sys.exit('usage: %s [device]' % sys.argv[0])

	### reading and preparing the training data
	#
	data = getdata.Data('../150117-mnist', nV = 10000)
	K = data.nclass
	XL, labL = data.getData('L')
	XV, labV = data.getData('V')
	Xshape = [data.nrow, data.ncol, 1]
	XL = XL.reshape([-1] + Xshape)
	XV = XV.reshape([-1] + Xshape)
	NL = XL.shape[0]
	NV = XV.shape[0]

	### initializing the network
	#
	eta = 0.1
	mu = 0.9
	optimizer = tf.train.MomentumOptimizer(eta, mu)
	#optimizer = tf.train.AdamOptimizer(learning_rate = 1e-4)

	g = tf.Graph()
	with g.as_default():
	with g.device(dev):
	nn = cnn.CNN_BN(Xshape, K, optimizer = optimizer)

	nn.init()

	### training
	#
	batchsize = 128
	bindexL = getdata.makeBatchIndex(NL, batchsize)
	nbatchL = bindexL.shape[0]
	bindexV = getdata.makeBatchIndex(NV, batchsize)

	print('# ceL accL ceV accV')

	nitr = 10000
	nd = 0
	for i in range(nitr):
	#if (i < 500 and i % 100 == 0) or (i % 500 == 0):
	if (i % 500 == 0):
	ceL, accL = evaluate(nn, XL, labL, bindexL)
	ceV, accV = evaluate(nn, XV, labV, bindexV)
	print(i, nd, ceL, accL100, ceV, accV100)

	ib = np.random.randint(0, nbatchL)
	ii = bindexL[ib, :]
	nn.train(XL[ii], labL[ii])
	nd += XL[ii].shape[0]


	fnParams = os.path.join(dirResult, os.path.splitext(sys.argv[0])[0] + '-params.npz')
	params_dict = nn.getWeight()
	np.savez_compressed(fnParams, **params_dict)