Skip to content

Instantly share code, notes, and snippets.

@jainxy
Created March 3, 2021 04:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jainxy/c079f1fcda5893f5cbb1c5c59ca0111b to your computer and use it in GitHub Desktop.
Save jainxy/c079f1fcda5893f5cbb1c5c59ca0111b to your computer and use it in GitHub Desktop.
Keras and related code samples
"""
Training
Validation on a holdout set generated from the original training data
Evaluation on the test data
- correct and test batch generation
- Normalize input by 255?
- add batchnorm layers? use model(x, training=False) then
- tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(batch_size) ? dataset = dataset.cache()?
- get_compiled_model()
- test last batch having non-dividing batch-size aka residual batch issue
- model.evaluate(test_dataset) -> setup command
- https://keras.io/api/models/model_training_apis/#evaluate-method
- Try Y-channel only
- tf.data.dataset.prefetch(buffer_size)
- tf.one_hot(y,num_classes) to get tensor form
- NN model for tabular data
- Checkpoint
"""
# ======================================================DATA
# Preprocess the data (these are NumPy arrays)
x_train = x_train.reshape(60000, 784).astype("float32") / 255
x_test = x_test.reshape(10000, 784).astype("float32") / 255
y_train = y_train.astype("float32")
y_test = y_test.astype("float32")
# Reserve 10,000 samples for validation
x_val = x_train[-10000:]
y_val = y_train[-10000:]
x_train = x_train[:-10000]
y_train = y_train[:-10000]
# ======================================================DATASET
ds_counter = tf.data.Dataset.from_generator(count, args=[25], output_types=tf.int32, output_shapes = (), )
# ======================================================COMPILE
model.compile(
optimizer=keras.optimizers.RMSprop(learning_rate=1e-3),
loss=keras.losses.SparseCategoricalCrossentropy(),
metrics=[keras.metrics.SparseCategoricalAccuracy()],)
model.compile(
optimizer="rmsprop",
loss="sparse_categorical_crossentropy",
metrics=["sparse_categorical_accuracy"],)
## OPTIMIZERS: SGD() (w/ or w/o momentum) - RMSprop() - Adam()
## LOSS: SparseCategoricalCrossentropy() - CategoricalCrossentropy()
## METRICS: AUC() - Precision() - Recall()
# ======================================================LR Decay
initial_learning_rate = 0.1
lr_schedule = keras.optimizers.schedules.ExponentialDecay(initial_learning_rate, decay_steps=100000, decay_rate=0.96, staircase=True)
optimizer = keras.optimizers.RMSprop(learning_rate=lr_schedule)
## Static LR Decays: ExponentialDecay, PiecewiseConstantDecay, PolynomialDecay, and InverseTimeDecay
## Dynamic LR Decays:
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2,
patience=5, min_lr=0.0)
model.fit(X_train, Y_train, callbacks=[reduce_lr])
# ======================================================CALLBACKS
# -- General --
# global callback syntax
on_(train|test|predict)_(begin|end)(self, logs=None)
# batch-level
on_(train|test|predict)_batch_(begin|end)(self, batch, logs=None) # For batch_end, logs is a dict containing metrics results
# epoch-level
on_train_(begin|end)(self, epoch, logs=None)
# -- Early stopping --
tf.keras.callbacks.EarlyStopping(patience=1)
# -- Checkpoint --
# Prepare a directory to store all the checkpoints.
checkpoint_dir = "./ckpt"
if not os.path.exists(checkpoint_dir):
os.makedirs(checkpoint_dir)
def make_or_restore_model():
# Either restore the latest model, or create a fresh one
# if there is no checkpoint available.
checkpoints = [checkpoint_dir + "/" + name for name in os.listdir(checkpoint_dir)]
if checkpoints:
latest_checkpoint = max(checkpoints, key=os.path.getctime)
print("Restoring from", latest_checkpoint)
return keras.models.load_model(latest_checkpoint)
print("Creating a new model")
return get_compiled_model()
model = make_or_restore_model()
callbacks = [
# This callback saves a SavedModel every epoch
# We include the current epoch in the folder name.
keras.callbacks.ModelCheckpoint(
filepath=checkpoint_dir + "/model_3dcnn_<HP-values>-{epoch}", save_freq="epoch"/100)]
callbacks = [
keras.callbacks.ModelCheckpoint(
# Path where to save the model. The two parameters below mean that we will overwrite
# the current checkpoint if and only if the `val_loss` score has improved.
# The saved model name will include the current epoch.
filepath="mymodel_{epoch}",
save_best_only=True, # Only save a model if `val_loss` has improved.
monitor="val_loss",
verbose=1,)]
# -- Lambda:printlogs after each batch --
from keras.callbacks import LambdaCallback
callbacks = callbacks=[LambdaCallback(on_batch_end=lambda batch,logs:print(logs))]
# ======================================================FIT/TRAIN
print("Fit model on training data")
history = model.fit(
x_train,
y_train,
batch_size=64,
epochs=2,
# We pass some validation for
# monitoring validation loss and metrics
# at the end of each epoch
validation_data=(x_val, y_val),
)
history.history
# ======================================================EVALUATE/PREDICT
# Evaluate the model on the test data using `evaluate`
print("Evaluate on test data")
results = model.evaluate(x_test, y_test, batch_size=128)
print("test loss, test acc:", results)
dict(zip(model.metrics_names, result))
# Generate predictions (probabilities -- the output of the last layer)
# on new data using `predict`
print("Generate predictions for 3 samples")
predictions = model.predict(x_test[:3])
print("predictions shape:", predictions.shape)
# ====================================================== Function/Class model
## Create function for model def and compilation, for repeated calls. CAN PARAMETRIZE to customize things during search
## DO IT FOR DATA GENERATR AS WELL
def get_uncompiled_model():
inputs = keras.Input(shape=(784,), name="digits")
x = layers.Dense(64, activation="relu", name="dense_1")(inputs)
x = layers.Dense(64, activation="relu", name="dense_2")(x)
outputs = layers.Dense(10, activation="softmax", name="predictions")(x)
model = keras.Model(inputs=inputs, outputs=outputs)
return model
def get_compiled_model():
model = get_uncompiled_model()
model.compile(
optimizer="rmsprop",
loss="sparse_categorical_crossentropy",
metrics=["sparse_categorical_accuracy"],
)
return model
# ======================================================BATCHING
"""
if using 'steps_per_epoch' -> create an infinitely-looping Dataset
"""
padded_batch(batch_size, padded_shapes=None, padding_values=None, drop_remainder=False)
dataset = dataset.batch(5).shuffle(3, reshuffle_each_iteration=True).repeat(4)
ds_series = tf.data.Dataset.from_generator(
gen_series,
output_types=(tf.int32, tf.float32),
output_shapes=((), (None,)))
ds_counter = tf.data.Dataset.from_generator(count, args=[25], output_types=tf.int32, output_shapes = (), )
def count(start, end, batch_size):
sample_count = end-start
n_batches = int(sample_count//batch_size)
remainder_samples = sample_count%batch_size
if remainder_samples>0:
n_batches = n_batches + 1
for idx in range(0, n_batches):
if idx == n_batches - 1:
pad = random.choices(range(start+idx*batch_size,end), k=(start+batch_size*(idx+1)-end))
batch = list(range(start+idx*batch_size,end)) + pad
else:
batch = list(range(start + idx*batch_size , start + idx*batch_size+batch_size))
yield batch
ds_counter = tf.data.Dataset.from_generator(count, args=[6,100,5], output_types=tf.int32, output_shapes = (5), )
for count_batch in ds_counter.repeat().batch(10, drop_remainder=False).take(10):
print(count_batch.numpy())
ds_counter = tf.data.Dataset.from_generator(count, args=[25], output_types=tf.int32, output_shapes = (), )
ds_counter = ds_counter.padded_batch(5, padded_shapes=None, drop_remainder=True).shuffle(100)
for count_batch in ds_counter:
print(count_batch.numpy())
dataset2 = dataset.padded_batch(2,
padded_shapes=([4], [None]),
padding_values=(-1, 100))
## ========================================================EVALUATE/ANALYSE
# Generate generalization metrics
score = model.evaluate(X_test, targets_test, verbose=0)
print(f'Test loss: {score[0]} / Test accuracy: {score[1]}')
# Plot history: Categorical crossentropy & Accuracy
plt.plot(history.history['loss'], label='Categorical crossentropy (training data)')
plt.plot(history.history['val_loss'], label='Categorical crossentropy (validation data)')
plt.plot(history.history['accuracy'], label='Accuracy (training data)')
plt.plot(history.history['val_accuracy'], label='Accuracy (validation data)')
plt.title('Model performance for 3D MNIST Keras Conv3D example')
plt.ylabel('Loss value')
plt.xlabel('No. epoch')
plt.legend(loc="upper left")
plt.show()
##
fig, ax = plt.subplots(1, 2, figsize=(20, 3))
ax = ax.ravel()
for i, metric in enumerate(["acc", "loss"]):
ax[i].plot(model.history.history[metric])
ax[i].plot(model.history.history["val_" + metric])
ax[i].set_title("Model {}".format(metric))
ax[i].set_xlabel("epochs")
ax[i].set_ylabel(metric)
ax[i].legend(["train", "val"])
#======================== Tensorboard
bucket = sagemaker_session.default_bucket()
prefix = 'tensorboard_keras_cifar10'
tensorflow_logs_path = "s3://{}/{}/logs".format(bucket, prefix)
print('Bucket: {}'.format(bucket))
print('SageMaker ver: ' + sagemaker.__version__)
print('Tensorflow ver: ' + tf.__version__)
writer = tf.io.TFRecordWriter(filename)
aws_region = sagemaker_session.boto_region_name
!AWS_REGION={aws_region} tensorboard --logdir {tensorflow_logs_path}
#========================
# Print number of batches ; print epoch# at every 10th epoch; save-model check
# Print lr; # save stdout => checkpoints and logs ; #
"""
"""
# Let's check:
np.testing.assert_allclose(model.predict(test_input), reconstructed_model.predict(test_input))
## DO analysis on source files -> feature generation etc.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment