Skip to content

Instantly share code, notes, and snippets.

@koshian2
Created February 18, 2019 03:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save koshian2/b3bfbb0865eda74692a1ab0ab6451726 to your computer and use it in GitHub Desktop.
Save koshian2/b3bfbb0865eda74692a1ab0ab6451726 to your computer and use it in GitHub Desktop.
Don't Decay the Learning Rate, Increase the Batch Size
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import History, LearningRateScheduler
from tensorflow.keras.optimizers import SGD
from tensorflow.contrib.tpu.python.tpu import keras_support
import tensorflow.keras.backend as K
from keras.datasets import cifar10
from keras.utils import to_categorical
import pickle, os
def create_block(input, ch, reps):
x = input
for i in range(reps):
x = layers.Conv2D(ch, 3, padding="same")(x)
x = layers.BatchNormalization()(x)
x = layers.Activation("relu")(x)
return x
def create_model():
input = layers.Input((32,32,3))
x = create_block(input, 64, 3)
x = layers.AveragePooling2D(2)(x)
x = create_block(x, 128, 3)
x = layers.AveragePooling2D(2)(x)
x = create_block(x, 256, 3)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(10, activation="softmax")(x)
return Model(input, x)
def step_decay(epoch):
x = 0.1
if epoch >= 100: x /= 5.0
if epoch >= 150: x /= 5.0
if epoch >= 200: x /= 5.0
return x
def train_with_some_batchsize(model, data, batch_size, nb_epoch, decay):
X_train, y_train, X_test, y_test = data
train_gen = ImageDataGenerator(rescale=1.0/255.0, horizontal_flip=True,
width_shift_range=4.0/32.0, height_shift_range=4.0/32.0).flow(X_train, y_train, batch_size)
test_gen = ImageDataGenerator(rescale=1.0/255.0).flow(X_test, y_test, 128) # testgenのBatchSizeは動かさない
hist = History()
model.fit_generator(train_gen, steps_per_epoch=X_train.shape[0]//batch_size,
validation_data=test_gen, validation_steps=X_test.shape[0]//batch_size,
epochs=nb_epoch, callbacks=[hist, decay])
return hist.history
def train(train_mode):
# train_mode
# 0 = normal batch_size=128, 0.1, 0.9 → decay lr
# 1 = increase batch = 128 , 0.1, 0.9 → increase batch_size
# 2 = increase momentum = 512, 0.1, 0.975 → decay lr
# 3 = increase initial lr = 512, 0.5, 0.9 → decay lr
model = create_model()
if train_mode <= 1:
model.compile(SGD(0.1, 0.9), "categorical_crossentropy", ["acc"])
elif train_mode == 2:
model.compile(SGD(0.1, 0.975), "categorical_crossentropy", ["acc"])
elif train_mode == 3:
model.compile(SGD(0.5, 0.9), "categorical_crossentropy", ["acc"])
(X_train, y_train), (X_test, y_test) = cifar10.load_data()
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
data = (X_train, y_train, X_test, y_test)
tpu_grpc_url = "grpc://"+os.environ["COLAB_TPU_ADDR"]
tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(tpu_grpc_url)
strategy = keras_support.TPUDistributionStrategy(tpu_cluster_resolver)
model = tf.contrib.tpu.keras_to_tpu_model(model, strategy=strategy)
# 0と2と3はDecayを入れながら一発、1は訓練を分割
histories = []
decay = LearningRateScheduler(step_decay)
same_lr = LearningRateScheduler(lambda epoch: 0.1)
if train_mode == 0:
histories.append(train_with_some_batchsize(model, data, 128, 250, decay))
if train_mode == 1:
histories.append(train_with_some_batchsize(model, data, 128, 100, same_lr))
histories.append(train_with_some_batchsize(model, data, 640, 50, same_lr))
histories.append(train_with_some_batchsize(model, data, 3200, 50, same_lr))
histories.append(train_with_some_batchsize(model, data, 16000, 50, same_lr))
if train_mode == 2:
histories.append(train_with_some_batchsize(model, data, 512, 250, decay))
if train_mode == 3:
histories.append(train_with_some_batchsize(model, data, 512, 250, decay))
# ヒストリの統合
joined_history = histories[0]
for i in range(1, len(histories)):
for key, value in histories[i].items():
joined_history[key] = joined_history[key] + value
with open(f"increase_batchsize_mode_{train_mode}.dat", "wb") as fp:
pickle.dump(joined_history, fp)
if __name__ == "__main__":
tf.logging.set_verbosity(tf.logging.ERROR)
K.clear_session()
train(0)
@ShuuTsubaki
Copy link

May I ask that how to solve the problem:
with open(f"increase_batchsize_mode_{train_mode}.dat", "wb") as fp:
raise KeyError(key) from None
KeyError: 'COLAB_TPU_ADDR'
in pycharm

@koshian2
Copy link
Author

koshian2 commented May 4, 2019

Oh, sorry! I forgot to write. Run this code in the Google Colab TPU environment.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment