Created
February 18, 2019 03:49
-
-
Save koshian2/b3bfbb0865eda74692a1ab0ab6451726 to your computer and use it in GitHub Desktop.
Don't Decay the Learning Rate, Increase the Batch Size
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tensorflow as tf | |
from tensorflow.keras import layers | |
from tensorflow.keras.models import Model | |
from tensorflow.keras.preprocessing.image import ImageDataGenerator | |
from tensorflow.keras.callbacks import History, LearningRateScheduler | |
from tensorflow.keras.optimizers import SGD | |
from tensorflow.contrib.tpu.python.tpu import keras_support | |
import tensorflow.keras.backend as K | |
from keras.datasets import cifar10 | |
from keras.utils import to_categorical | |
import pickle, os | |
def create_block(input, ch, reps): | |
x = input | |
for i in range(reps): | |
x = layers.Conv2D(ch, 3, padding="same")(x) | |
x = layers.BatchNormalization()(x) | |
x = layers.Activation("relu")(x) | |
return x | |
def create_model(): | |
input = layers.Input((32,32,3)) | |
x = create_block(input, 64, 3) | |
x = layers.AveragePooling2D(2)(x) | |
x = create_block(x, 128, 3) | |
x = layers.AveragePooling2D(2)(x) | |
x = create_block(x, 256, 3) | |
x = layers.GlobalAveragePooling2D()(x) | |
x = layers.Dense(10, activation="softmax")(x) | |
return Model(input, x) | |
def step_decay(epoch): | |
x = 0.1 | |
if epoch >= 100: x /= 5.0 | |
if epoch >= 150: x /= 5.0 | |
if epoch >= 200: x /= 5.0 | |
return x | |
def train_with_some_batchsize(model, data, batch_size, nb_epoch, decay): | |
X_train, y_train, X_test, y_test = data | |
train_gen = ImageDataGenerator(rescale=1.0/255.0, horizontal_flip=True, | |
width_shift_range=4.0/32.0, height_shift_range=4.0/32.0).flow(X_train, y_train, batch_size) | |
test_gen = ImageDataGenerator(rescale=1.0/255.0).flow(X_test, y_test, 128) # testgenのBatchSizeは動かさない | |
hist = History() | |
model.fit_generator(train_gen, steps_per_epoch=X_train.shape[0]//batch_size, | |
validation_data=test_gen, validation_steps=X_test.shape[0]//batch_size, | |
epochs=nb_epoch, callbacks=[hist, decay]) | |
return hist.history | |
def train(train_mode): | |
# train_mode | |
# 0 = normal batch_size=128, 0.1, 0.9 → decay lr | |
# 1 = increase batch = 128 , 0.1, 0.9 → increase batch_size | |
# 2 = increase momentum = 512, 0.1, 0.975 → decay lr | |
# 3 = increase initial lr = 512, 0.5, 0.9 → decay lr | |
model = create_model() | |
if train_mode <= 1: | |
model.compile(SGD(0.1, 0.9), "categorical_crossentropy", ["acc"]) | |
elif train_mode == 2: | |
model.compile(SGD(0.1, 0.975), "categorical_crossentropy", ["acc"]) | |
elif train_mode == 3: | |
model.compile(SGD(0.5, 0.9), "categorical_crossentropy", ["acc"]) | |
(X_train, y_train), (X_test, y_test) = cifar10.load_data() | |
y_train = to_categorical(y_train) | |
y_test = to_categorical(y_test) | |
data = (X_train, y_train, X_test, y_test) | |
tpu_grpc_url = "grpc://"+os.environ["COLAB_TPU_ADDR"] | |
tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(tpu_grpc_url) | |
strategy = keras_support.TPUDistributionStrategy(tpu_cluster_resolver) | |
model = tf.contrib.tpu.keras_to_tpu_model(model, strategy=strategy) | |
# 0と2と3はDecayを入れながら一発、1は訓練を分割 | |
histories = [] | |
decay = LearningRateScheduler(step_decay) | |
same_lr = LearningRateScheduler(lambda epoch: 0.1) | |
if train_mode == 0: | |
histories.append(train_with_some_batchsize(model, data, 128, 250, decay)) | |
if train_mode == 1: | |
histories.append(train_with_some_batchsize(model, data, 128, 100, same_lr)) | |
histories.append(train_with_some_batchsize(model, data, 640, 50, same_lr)) | |
histories.append(train_with_some_batchsize(model, data, 3200, 50, same_lr)) | |
histories.append(train_with_some_batchsize(model, data, 16000, 50, same_lr)) | |
if train_mode == 2: | |
histories.append(train_with_some_batchsize(model, data, 512, 250, decay)) | |
if train_mode == 3: | |
histories.append(train_with_some_batchsize(model, data, 512, 250, decay)) | |
# ヒストリの統合 | |
joined_history = histories[0] | |
for i in range(1, len(histories)): | |
for key, value in histories[i].items(): | |
joined_history[key] = joined_history[key] + value | |
with open(f"increase_batchsize_mode_{train_mode}.dat", "wb") as fp: | |
pickle.dump(joined_history, fp) | |
if __name__ == "__main__": | |
tf.logging.set_verbosity(tf.logging.ERROR) | |
K.clear_session() | |
train(0) |
Oh, sorry! I forgot to write. Run this code in the Google Colab TPU environment.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
May I ask that how to solve the problem:
with open(f"increase_batchsize_mode_{train_mode}.dat", "wb") as fp:
raise KeyError(key) from None
KeyError: 'COLAB_TPU_ADDR'
in pycharm