Created
March 24, 2021 15:02
-
-
Save maarten-devries/514206e765ebf9fb97bd371ebb369aaa to your computer and use it in GitHub Desktop.
HW2
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def get_cnn_model_weights(model): | |
"""retrieves the regularizable weights from a model | |
given a cnn model (acording to the spec above) this function will | |
return a flatted `tf.Tensor` of all the model weights | |
Arguments: | |
model: the `K.models.Model` object | |
Returns: | |
weights: a 1D `tf.Tensor` vector of the regularizable weights | |
""" | |
######################## BEGIN YOUR ANSWER ######################## | |
weight_list = [] | |
for layer in model.layers: | |
if (type(layer) is FCLayer) or (type(layer) is ConvLayer): | |
weight_list.append(layer.w) | |
weights = tf.concat([K.backend.flatten(w) for w in weight_list], axis=0) | |
######################### END YOUR ANSWER ######################### | |
return weights | |
def compute_cce(y, p): | |
"""computes categorical cross entropy loss | |
Arguments: | |
y: true labels | |
p: predicted probabilities | |
Returns: | |
cce: the cross entropy as a `tf.Tensor` scalar | |
""" | |
######################## BEGIN YOUR ANSWER ######################## | |
cce_vector = -tf.reduce_sum(y * tf.math.log(p), axis=1) | |
cce = tf.reduce_mean(cce_vector) | |
######################### END YOUR ANSWER ######################### | |
return cce | |
def compute_l2(w): | |
"""computes the L2 norm given a list of weights | |
Arguments: | |
w: a 1D `tf.Tensor` Tensor of weights | |
Returns: | |
l2: the computed norm | |
""" | |
######################## BEGIN YOUR ANSWER ######################## | |
l2 = tf.cast(tf.sqrt(tf.reduce_sum(w*w)), dtype='float32') | |
######################### END YOUR ANSWER ######################### | |
return l2 | |
def compute_cnn_objective(model_outputs, | |
labels, | |
weights, | |
reg_coeff): | |
"""computes the cnn loss | |
computes the loss as described in the problem set cnn model | |
specification | |
Arguments: | |
model_output: $\hat{y}$, the logits/probabilites output by the model | |
labels: $y$, the one-hot encoded labels | |
reg_losses: a list of regularization losses | |
reg_coeff: $\lambda$, the regularization coeffecient | |
Returns: | |
total_loss: the loss as a `tf.Tensor` scalar | |
""" | |
######################## BEGIN YOUR ANSWER ######################## | |
cce = compute_cce(labels, model_outputs) | |
reg_loss = compute_l2(weights) | |
total_loss = cce + reg_coeff * reg_loss | |
######################### END YOUR ANSWER ######################### | |
return total_loss | |
def cnn_train_step(x, y, model, loss, optimizer): | |
"""performs a single training step | |
performs one training step on a model given a loss, optimizer, inputs, | |
and labels. | |
Arguments: | |
x: model training inputs | |
y: model training labels | |
model: the model on which the pass will be performed | |
loss: the loss function to be evaluated, from which the gradients will be | |
computed | |
optimizer: an object from `tf.optimizers` defining the optimization scheme | |
Returns: | |
pass_loss: the computed loss for the forward training pass | |
""" | |
######################## BEGIN YOUR ANSWER ######################## | |
with tf.GradientTape() as tape: | |
predictions = model(x) | |
pass_loss = loss(predictions, y, get_cnn_model_weights(model)) | |
gradients = tape.gradient(pass_loss, model.trainable_variables) | |
optimizer.apply_gradients(zip(gradients, model.trainable_variables)) | |
######################### END YOUR ANSWER ######################### | |
return pass_loss | |
def cnn_train(training_dataset, | |
validation_dataset, | |
dropout_rate, | |
reg_coeff, | |
learning_rate, | |
batch_size, | |
num_epochs, | |
num_training=None, | |
num_validation=None, | |
do_save_model=False, | |
model_dir=None, | |
pbars=None, | |
verbose=False, | |
do_create_graph=True): | |
"""main training loop for the cnn model | |
trains the supplied model according to the passed hyperparameters and the | |
above cnn model specification | |
Arguments: | |
training_dataset: training data and labels | |
* an iterable of length 2 containing: (inputs, labels) | |
validation_dataset: validation data and labels | |
* an iterable of length 2 containing: (inputs, labels) | |
dropout_rate: dropout rate (1 - keep probability) for the dropout layer | |
reg_coeff: coefficient $\lambda$ for L2 regularization | |
learning_rate: learning rate for RMSprop optimizer | |
num_epochs: number of epochs to train | |
batch_size: training mini-batch size | |
num_training: (None) number of training input/label pairs to use out of | |
`training_dataset`. If not specified or `None` all training | |
examples will be batched and used. | |
num_validation: (None) number of validation input/label pairs to use out | |
of `validation_dataset`. If not specified or `None` all validation | |
examples will be used. | |
do_save_model: (False) flag for specifying if the best model be saved | |
* bool | |
model_dir: (None) location where model will be saved | |
pbars: (None) `tqdm` objects to be dynamically updated to display training | |
progress | |
* len(pbars) == 2 | |
verbose: (False) flag for printing losses at each epoch | |
* bool | |
do_create_graph: (True) flag for wrapping the gradient updation function | |
with `tf.function` for AutoGraphing and optimization of gradient update | |
steps. | |
Returns: | |
best_loss: best validation loss | |
""" | |
# get model, loss, and optimizer functions/objects | |
model = build_cnn_model(dropout_rate=dropout_rate) | |
loss = functools.partial(compute_cnn_objective, reg_coeff=reg_coeff) | |
optimizer = tf.optimizers.RMSprop(learning_rate=learning_rate, | |
rho=0.9) | |
# defining gradient update function | |
if do_create_graph: | |
# creating optimized tensorflow graph for gradient update | |
_train_step = tf.function(cnn_train_step) | |
else: | |
_train_step = cnn_train_step | |
# if train or validation length not explicitly passed, use all elements | |
num_training = (training_dataset[0].shape[0] | |
if num_training is None else num_training) | |
num_validation = (validation_dataset[0].shape[0] | |
if num_validation is None else num_validation) | |
# subsample and/or shuffle the training data | |
x_train, y_train = sample_shuffle_data(training_dataset, num_training) | |
# subsample and/or shuffle the validation data | |
x_val, y_val = sample_shuffle_data(validation_dataset, num_validation) | |
# initialize `best_loss` to untrained loss | |
best_loss = loss(model_outputs=model(x_val, training=False), | |
labels=y_val, | |
weights=get_cnn_model_weights(model)) | |
if verbose: | |
tf.print('>> epoch: __, val loss: {:5.3f} (untrained)' | |
.format(best_loss)) | |
# helpful logging function | |
def print_epoch_loss(epoch, val_loss): | |
if verbose: | |
tf.print('>> epoch: {:02d}, val loss: {:5.3f}' | |
.format(epoch, val_loss)) | |
# Implement the training loop | |
######################## BEGIN YOUR ANSWER ######################## | |
for epoch in range(num_epochs): | |
total_batch = int(np.floor(x_train.shape[0]) / batch_size) | |
for step in range(total_batch): | |
range_begin = (step * batch_size) % (x_train.shape[0] - batch_size) | |
range_end = range_begin + batch_size | |
batch_x = x_train[range_begin:range_end, :] | |
batch_y = y_train[range_begin:range_end, :] | |
epoch_loss = cnn_train_step(batch_x, batch_y, model, loss, optimizer) | |
val_loss = loss(model_outputs=model(x_val, training=False), labels=y_val, weights=get_cnn_model_weights(model)) | |
print("Loss on validation data: ", val_loss) | |
if tf.abs(val_loss) <= tf.abs(best_loss): | |
best_loss = val_loss | |
if do_save_model: | |
if os.path.isdir(os.path.join('.', model_dir)): | |
shutil.rmtree(os.path.join('.', model_dir)) | |
model.save(os.path.join('.', model_dir)) | |
######################### END YOUR ANSWER ######################### | |
if pbars is not None: | |
pbars[0].refresh() | |
pbars[1].refresh() | |
return best_loss | |
confirm_cell_completion('Model training function defined.') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment