Skip to content

Instantly share code, notes, and snippets.

@maarten-devries
Created March 24, 2021 15:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save maarten-devries/514206e765ebf9fb97bd371ebb369aaa to your computer and use it in GitHub Desktop.
Save maarten-devries/514206e765ebf9fb97bd371ebb369aaa to your computer and use it in GitHub Desktop.
HW2
def get_cnn_model_weights(model):
"""retrieves the regularizable weights from a model
given a cnn model (acording to the spec above) this function will
return a flatted `tf.Tensor` of all the model weights
Arguments:
model: the `K.models.Model` object
Returns:
weights: a 1D `tf.Tensor` vector of the regularizable weights
"""
######################## BEGIN YOUR ANSWER ########################
weight_list = []
for layer in model.layers:
if (type(layer) is FCLayer) or (type(layer) is ConvLayer):
weight_list.append(layer.w)
weights = tf.concat([K.backend.flatten(w) for w in weight_list], axis=0)
######################### END YOUR ANSWER #########################
return weights
def compute_cce(y, p):
"""computes categorical cross entropy loss
Arguments:
y: true labels
p: predicted probabilities
Returns:
cce: the cross entropy as a `tf.Tensor` scalar
"""
######################## BEGIN YOUR ANSWER ########################
cce_vector = -tf.reduce_sum(y * tf.math.log(p), axis=1)
cce = tf.reduce_mean(cce_vector)
######################### END YOUR ANSWER #########################
return cce
def compute_l2(w):
"""computes the L2 norm given a list of weights
Arguments:
w: a 1D `tf.Tensor` Tensor of weights
Returns:
l2: the computed norm
"""
######################## BEGIN YOUR ANSWER ########################
l2 = tf.cast(tf.sqrt(tf.reduce_sum(w*w)), dtype='float32')
######################### END YOUR ANSWER #########################
return l2
def compute_cnn_objective(model_outputs,
labels,
weights,
reg_coeff):
"""computes the cnn loss
computes the loss as described in the problem set cnn model
specification
Arguments:
model_output: $\hat{y}$, the logits/probabilites output by the model
labels: $y$, the one-hot encoded labels
reg_losses: a list of regularization losses
reg_coeff: $\lambda$, the regularization coeffecient
Returns:
total_loss: the loss as a `tf.Tensor` scalar
"""
######################## BEGIN YOUR ANSWER ########################
cce = compute_cce(labels, model_outputs)
reg_loss = compute_l2(weights)
total_loss = cce + reg_coeff * reg_loss
######################### END YOUR ANSWER #########################
return total_loss
def cnn_train_step(x, y, model, loss, optimizer):
"""performs a single training step
performs one training step on a model given a loss, optimizer, inputs,
and labels.
Arguments:
x: model training inputs
y: model training labels
model: the model on which the pass will be performed
loss: the loss function to be evaluated, from which the gradients will be
computed
optimizer: an object from `tf.optimizers` defining the optimization scheme
Returns:
pass_loss: the computed loss for the forward training pass
"""
######################## BEGIN YOUR ANSWER ########################
with tf.GradientTape() as tape:
predictions = model(x)
pass_loss = loss(predictions, y, get_cnn_model_weights(model))
gradients = tape.gradient(pass_loss, model.trainable_variables)
optimizer.apply_gradients(zip(gradients, model.trainable_variables))
######################### END YOUR ANSWER #########################
return pass_loss
def cnn_train(training_dataset,
validation_dataset,
dropout_rate,
reg_coeff,
learning_rate,
batch_size,
num_epochs,
num_training=None,
num_validation=None,
do_save_model=False,
model_dir=None,
pbars=None,
verbose=False,
do_create_graph=True):
"""main training loop for the cnn model
trains the supplied model according to the passed hyperparameters and the
above cnn model specification
Arguments:
training_dataset: training data and labels
* an iterable of length 2 containing: (inputs, labels)
validation_dataset: validation data and labels
* an iterable of length 2 containing: (inputs, labels)
dropout_rate: dropout rate (1 - keep probability) for the dropout layer
reg_coeff: coefficient $\lambda$ for L2 regularization
learning_rate: learning rate for RMSprop optimizer
num_epochs: number of epochs to train
batch_size: training mini-batch size
num_training: (None) number of training input/label pairs to use out of
`training_dataset`. If not specified or `None` all training
examples will be batched and used.
num_validation: (None) number of validation input/label pairs to use out
of `validation_dataset`. If not specified or `None` all validation
examples will be used.
do_save_model: (False) flag for specifying if the best model be saved
* bool
model_dir: (None) location where model will be saved
pbars: (None) `tqdm` objects to be dynamically updated to display training
progress
* len(pbars) == 2
verbose: (False) flag for printing losses at each epoch
* bool
do_create_graph: (True) flag for wrapping the gradient updation function
with `tf.function` for AutoGraphing and optimization of gradient update
steps.
Returns:
best_loss: best validation loss
"""
# get model, loss, and optimizer functions/objects
model = build_cnn_model(dropout_rate=dropout_rate)
loss = functools.partial(compute_cnn_objective, reg_coeff=reg_coeff)
optimizer = tf.optimizers.RMSprop(learning_rate=learning_rate,
rho=0.9)
# defining gradient update function
if do_create_graph:
# creating optimized tensorflow graph for gradient update
_train_step = tf.function(cnn_train_step)
else:
_train_step = cnn_train_step
# if train or validation length not explicitly passed, use all elements
num_training = (training_dataset[0].shape[0]
if num_training is None else num_training)
num_validation = (validation_dataset[0].shape[0]
if num_validation is None else num_validation)
# subsample and/or shuffle the training data
x_train, y_train = sample_shuffle_data(training_dataset, num_training)
# subsample and/or shuffle the validation data
x_val, y_val = sample_shuffle_data(validation_dataset, num_validation)
# initialize `best_loss` to untrained loss
best_loss = loss(model_outputs=model(x_val, training=False),
labels=y_val,
weights=get_cnn_model_weights(model))
if verbose:
tf.print('>> epoch: __, val loss: {:5.3f} (untrained)'
.format(best_loss))
# helpful logging function
def print_epoch_loss(epoch, val_loss):
if verbose:
tf.print('>> epoch: {:02d}, val loss: {:5.3f}'
.format(epoch, val_loss))
# Implement the training loop
######################## BEGIN YOUR ANSWER ########################
for epoch in range(num_epochs):
total_batch = int(np.floor(x_train.shape[0]) / batch_size)
for step in range(total_batch):
range_begin = (step * batch_size) % (x_train.shape[0] - batch_size)
range_end = range_begin + batch_size
batch_x = x_train[range_begin:range_end, :]
batch_y = y_train[range_begin:range_end, :]
epoch_loss = cnn_train_step(batch_x, batch_y, model, loss, optimizer)
val_loss = loss(model_outputs=model(x_val, training=False), labels=y_val, weights=get_cnn_model_weights(model))
print("Loss on validation data: ", val_loss)
if tf.abs(val_loss) <= tf.abs(best_loss):
best_loss = val_loss
if do_save_model:
if os.path.isdir(os.path.join('.', model_dir)):
shutil.rmtree(os.path.join('.', model_dir))
model.save(os.path.join('.', model_dir))
######################### END YOUR ANSWER #########################
if pbars is not None:
pbars[0].refresh()
pbars[1].refresh()
return best_loss
confirm_cell_completion('Model training function defined.')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment