koshian2/case5.py

## case5.py
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import History, LearningRateScheduler
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.contrib.tpu.python.tpu import keras_support
from tensorflow.keras.optimizers import SGD
import tensorflow.keras.backend as K
from keras.regularizers import l2

import numpy as np
import os, json
from keras.datasets import cifar10
from keras.utils import to_categorical

def conv_bn_relu(input, ch):
    x = layers.Conv2D(ch, 3, padding="same")(input)
    x = layers.BatchNormalization()(x)
    return layers.Activation("relu")(x)

def create_network():
    input = layers.Input((32,32,3))
    x = input
    for i in range(3):
        x = conv_bn_relu(x, 64)
    x = layers.AveragePooling2D(2)(x)
    for i in range(3):
        x = conv_bn_relu(x, 128)
    x = layers.AveragePooling2D(2)(x)
    for i in range(3):
        x = conv_bn_relu(x, 256)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(10, activation="softmax")(x)

    return Model(input, x)

def normal_generator(X, y, batch_size):
    while True:
        indices = np.random.permutation(X.shape[0])
        for i in range(X.shape[0]//batch_size):
            current_indices = indices[i*batch_size:(i+1)*batch_size]
            X_batch = (X[current_indices] / 255.0).astype(np.float32)
            y_batch = y[current_indices]
            yield X_batch, y_batch

def acc(y_true, y_pred):
    true_label = K.argmax(y_true, axis=-1)
    pred_label = K.argmax(y_pred, axis=-1)
    return K.cast(K.equal(true_label, pred_label), "float")

def bclearning_generator(base_generator, batch_size, sample_steps, n_steps):
    assert batch_size >= sample_steps
    assert batch_size % sample_steps == 0
    X_cache, y_cache = [], []
    while True:
        for i in range(n_steps):
            while True:
                current_images, current_onehots = next(base_generator)
                if current_images.shape[0] == sample_steps and current_onehots.shape[0] == sample_steps:
                    break
            current_labels = np.sum(np.arange(current_onehots.shape[1]) * current_onehots, axis=-1)
            for j in range(batch_size//sample_steps):
                for k in range(sample_steps):
                    diff_indices = np.where(current_labels != current_labels[k])[0]
                    mix_ind = np.random.choice(diff_indices)
                    rnd = np.random.rand()
                    if rnd < 0.5: rnd = 1.0 - rnd # 主画像を偏らさないために必要
                    mix_img = rnd * current_images[k] + (1.0-rnd) * current_images[mix_ind]
                    mix_onehot = rnd * current_onehots[k] + (1.0-rnd) * current_onehots[mix_ind]
                    X_cache.append(mix_img)
                    y_cache.append(mix_onehot)
            X_batch = np.asarray(X_cache, dtype=np.float32) / 255.0
            y_batch = np.asarray(y_cache, dtype=np.float32)
            X_cache, y_cache = [], []
            yield X_batch, y_batch

def step_decay(epoch):
    x = 1e-3
    if epoch >= 100: return 2e-4
    elif epoch >= 150: return 4e-5
    elif epoch >= 200: return 8e-6
    return x

def train(use_bc, step_size):
    (X_train, y_train), (X_test, y_test) = cifar10.load_data()
    y_train = to_categorical(y_train)
    y_test = to_categorical(y_test)

    model = create_network()
    if use_bc:
        model.compile("adam", "kullback_leibler_divergence", [acc])
    else:
        model.compile(tf.train.AdamOptimizer(1e-3), "categorical_crossentropy", ["acc"])

    batch_size = 128

    if use_bc:
        base_gen = ImageDataGenerator(horizontal_flip=True, width_shift_range=4.0/32.0,
                                      height_shift_range=4.0/32.0).flow(X_train, y_train, step_size)
        train_gen = bclearning_generator(base_gen, batch_size, step_size, X_train.shape[0]//step_size)
    else:
        train_gen = normal_generator(X_train, y_train, batch_size)
    val_gen = normal_generator(X_test, y_test, step_size)

    tpu_grpc_url = "grpc://"+os.environ["COLAB_TPU_ADDR"]
    tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(tpu_grpc_url)
    strategy = keras_support.TPUDistributionStrategy(tpu_cluster_resolver)
    model = tf.contrib.tpu.keras_to_tpu_model(model, strategy=strategy)

    hist = History()
    scheduler = LearningRateScheduler(step_decay)

    model.fit_generator(train_gen, steps_per_epoch=X_train.shape[0]//step_size,
                        validation_data=val_gen, validation_steps=X_test.shape[0]//step_size,
                        callbacks=[hist, scheduler], epochs=250)

    history = hist.history
    with open(f"bc_learning_{use_bc}_{step_size}.json", "w") as fp:
        json.dump(history, fp)


if __name__ == "__main__":
    K.clear_session()
    train(True, 128)
	import tensorflow as tf
	from tensorflow.keras import layers
	from tensorflow.keras.models import Model
	from tensorflow.keras.callbacks import History, LearningRateScheduler
	from tensorflow.keras.preprocessing.image import ImageDataGenerator
	from tensorflow.contrib.tpu.python.tpu import keras_support
	from tensorflow.keras.optimizers import SGD
	import tensorflow.keras.backend as K
	from keras.regularizers import l2

	import numpy as np
	import os, json
	from keras.datasets import cifar10
	from keras.utils import to_categorical

	def conv_bn_relu(input, ch):
	x = layers.Conv2D(ch, 3, padding="same")(input)
	x = layers.BatchNormalization()(x)
	return layers.Activation("relu")(x)

	def create_network():
	input = layers.Input((32,32,3))
	x = input
	for i in range(3):
	x = conv_bn_relu(x, 64)
	x = layers.AveragePooling2D(2)(x)
	for i in range(3):
	x = conv_bn_relu(x, 128)
	x = layers.AveragePooling2D(2)(x)
	for i in range(3):
	x = conv_bn_relu(x, 256)
	x = layers.GlobalAveragePooling2D()(x)
	x = layers.Dense(10, activation="softmax")(x)

	return Model(input, x)

	def normal_generator(X, y, batch_size):
	while True:
	indices = np.random.permutation(X.shape[0])
	for i in range(X.shape[0]//batch_size):
	current_indices = indices[ibatch_size:(i+1)batch_size]
	X_batch = (X[current_indices] / 255.0).astype(np.float32)
	y_batch = y[current_indices]
	yield X_batch, y_batch

	def acc(y_true, y_pred):
	true_label = K.argmax(y_true, axis=-1)
	pred_label = K.argmax(y_pred, axis=-1)
	return K.cast(K.equal(true_label, pred_label), "float")

	def bclearning_generator(base_generator, batch_size, sample_steps, n_steps):
	assert batch_size >= sample_steps
	assert batch_size % sample_steps == 0
	X_cache, y_cache = [], []
	while True:
	for i in range(n_steps):
	while True:
	current_images, current_onehots = next(base_generator)
	if current_images.shape[0] == sample_steps and current_onehots.shape[0] == sample_steps:
	break
	current_labels = np.sum(np.arange(current_onehots.shape[1]) * current_onehots, axis=-1)
	for j in range(batch_size//sample_steps):
	for k in range(sample_steps):
	diff_indices = np.where(current_labels != current_labels[k])[0]
	mix_ind = np.random.choice(diff_indices)
	rnd = np.random.rand()
	if rnd < 0.5: rnd = 1.0 - rnd # 主画像を偏らさないために必要
	mix_img = rnd * current_images[k] + (1.0-rnd) * current_images[mix_ind]
	mix_onehot = rnd * current_onehots[k] + (1.0-rnd) * current_onehots[mix_ind]
	X_cache.append(mix_img)
	y_cache.append(mix_onehot)
	X_batch = np.asarray(X_cache, dtype=np.float32) / 255.0
	y_batch = np.asarray(y_cache, dtype=np.float32)
	X_cache, y_cache = [], []
	yield X_batch, y_batch

	def step_decay(epoch):
	x = 1e-3
	if epoch >= 100: return 2e-4
	elif epoch >= 150: return 4e-5
	elif epoch >= 200: return 8e-6
	return x

	def train(use_bc, step_size):
	(X_train, y_train), (X_test, y_test) = cifar10.load_data()
	y_train = to_categorical(y_train)
	y_test = to_categorical(y_test)

	model = create_network()
	if use_bc:
	model.compile("adam", "kullback_leibler_divergence", [acc])
	else:
	model.compile(tf.train.AdamOptimizer(1e-3), "categorical_crossentropy", ["acc"])

	batch_size = 128

	if use_bc:
	base_gen = ImageDataGenerator(horizontal_flip=True, width_shift_range=4.0/32.0,
	height_shift_range=4.0/32.0).flow(X_train, y_train, step_size)
	train_gen = bclearning_generator(base_gen, batch_size, step_size, X_train.shape[0]//step_size)
	else:
	train_gen = normal_generator(X_train, y_train, batch_size)
	val_gen = normal_generator(X_test, y_test, step_size)

	tpu_grpc_url = "grpc://"+os.environ["COLAB_TPU_ADDR"]
	tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(tpu_grpc_url)
	strategy = keras_support.TPUDistributionStrategy(tpu_cluster_resolver)
	model = tf.contrib.tpu.keras_to_tpu_model(model, strategy=strategy)

	hist = History()
	scheduler = LearningRateScheduler(step_decay)

	model.fit_generator(train_gen, steps_per_epoch=X_train.shape[0]//step_size,
	validation_data=val_gen, validation_steps=X_test.shape[0]//step_size,
	callbacks=[hist, scheduler], epochs=250)

	history = hist.history
	with open(f"bc_learning_{use_bc}_{step_size}.json", "w") as fp:
	json.dump(history, fp)


	if __name__ == "__main__":
	K.clear_session()
	train(True, 128)