maarten-devries/Hw2.py

## Hw2.py
def get_cnn_model_weights(model):
    """retrieves the regularizable weights from a model

    given a cnn model (acording to the spec above) this function will
    return a flatted `tf.Tensor` of all the model weights

    Arguments:
      model: the `K.models.Model` object

    Returns:
      weights: a 1D `tf.Tensor` vector of the regularizable weights
    """

    ######################## BEGIN YOUR ANSWER ########################
    weight_list = []
    for layer in model.layers:
        if (type(layer) is FCLayer) or (type(layer) is ConvLayer):
            weight_list.append(layer.w)
    weights = tf.concat([K.backend.flatten(w) for w in weight_list], axis=0)
    ######################### END YOUR ANSWER #########################
    return weights


def compute_cce(y, p):
    """computes categorical cross entropy loss

    Arguments:
      y: true labels
      p: predicted probabilities

    Returns:
      cce: the cross entropy as a `tf.Tensor` scalar
    """
    ######################## BEGIN YOUR ANSWER ########################
    cce_vector = -tf.reduce_sum(y * tf.math.log(p), axis=1)
    cce = tf.reduce_mean(cce_vector)
    ######################### END YOUR ANSWER #########################
    return cce


def compute_l2(w):
    """computes the L2 norm given a list of weights

    Arguments:
      w: a 1D `tf.Tensor` Tensor of weights

    Returns:
      l2: the computed norm
    """
    ######################## BEGIN YOUR ANSWER ########################
    l2 = tf.cast(tf.sqrt(tf.reduce_sum(w*w)), dtype='float32')
    ######################### END YOUR ANSWER #########################
    return l2


def compute_cnn_objective(model_outputs,
                          labels,
                          weights,
                          reg_coeff):
    """computes the cnn loss

    computes the loss as described in the problem set cnn model
    specification

    Arguments:
      model_output: $\hat{y}$, the logits/probabilites output by the model
      labels: $y$, the one-hot encoded labels
      reg_losses: a list of regularization losses
      reg_coeff: $\lambda$, the regularization coeffecient

    Returns:
      total_loss: the loss as a `tf.Tensor` scalar
    """
    ######################## BEGIN YOUR ANSWER ########################
    cce = compute_cce(labels, model_outputs)
    reg_loss = compute_l2(weights)
    total_loss = cce + reg_coeff * reg_loss
    ######################### END YOUR ANSWER #########################
    return total_loss


    def cnn_train_step(x, y, model, loss, optimizer):
    """performs a single training step

    performs one training step on a model given a loss, optimizer, inputs,
    and labels.

    Arguments:
      x: model training inputs
      y: model training labels
      model: the model on which the pass will be performed
      loss: the loss function to be evaluated, from which the gradients will be
        computed
      optimizer: an object from `tf.optimizers` defining the optimization scheme

    Returns:
      pass_loss: the computed loss for the forward training pass
    """
    ######################## BEGIN YOUR ANSWER ########################
    with tf.GradientTape() as tape:
        predictions = model(x)
        pass_loss = loss(predictions, y, get_cnn_model_weights(model))

    gradients = tape.gradient(pass_loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    ######################### END YOUR ANSWER #########################
    return pass_loss

def cnn_train(training_dataset,
              validation_dataset,
              dropout_rate,
              reg_coeff,
              learning_rate,
              batch_size,
              num_epochs,
              num_training=None,
              num_validation=None,
              do_save_model=False,
              model_dir=None,
              pbars=None,
              verbose=False,
              do_create_graph=True):
    """main training loop for the cnn model

    trains the supplied model according to the passed hyperparameters and the
    above cnn model specification

    Arguments:
      training_dataset: training data and labels
        * an iterable of length 2 containing: (inputs, labels)
      validation_dataset: validation data and labels
        * an iterable of length 2 containing: (inputs, labels)
      dropout_rate: dropout rate (1 - keep probability) for the dropout layer
      reg_coeff: coefficient $\lambda$ for L2 regularization
      learning_rate: learning rate for RMSprop optimizer
      num_epochs: number of epochs to train
      batch_size: training mini-batch size
      num_training: (None) number of training input/label pairs to use out of
        `training_dataset`. If not specified or `None` all training
        examples will be batched and used.
      num_validation: (None) number of validation input/label pairs to use out
        of `validation_dataset`. If not specified or `None` all validation
        examples will be used.
      do_save_model: (False) flag for specifying if the best model be saved
        * bool
      model_dir: (None) location where model will be saved
      pbars: (None) `tqdm` objects to be dynamically updated to display training
        progress
        * len(pbars) == 2
      verbose: (False) flag for printing losses at each epoch
        * bool
      do_create_graph: (True) flag for wrapping the gradient updation function
        with `tf.function` for AutoGraphing and optimization of gradient update
        steps.

    Returns:
      best_loss: best validation loss
    """
    # get model, loss, and optimizer functions/objects
    model = build_cnn_model(dropout_rate=dropout_rate)
    loss = functools.partial(compute_cnn_objective, reg_coeff=reg_coeff)
    optimizer = tf.optimizers.RMSprop(learning_rate=learning_rate,
                                      rho=0.9)

    # defining gradient update function
    if do_create_graph:
        # creating optimized tensorflow graph for gradient update
        _train_step = tf.function(cnn_train_step)
    else:
        _train_step = cnn_train_step

    # if train or validation length not explicitly passed, use all elements
    num_training = (training_dataset[0].shape[0]
                    if num_training is None else num_training)
    num_validation = (validation_dataset[0].shape[0]
                      if num_validation is None else num_validation)

    # subsample and/or shuffle the training data
    x_train, y_train = sample_shuffle_data(training_dataset, num_training)

    # subsample and/or shuffle the validation data
    x_val, y_val = sample_shuffle_data(validation_dataset, num_validation)

    # initialize `best_loss` to untrained loss
    best_loss = loss(model_outputs=model(x_val, training=False),
                     labels=y_val,
                     weights=get_cnn_model_weights(model))
    if verbose:
        tf.print('>> epoch: __, val loss: {:5.3f} (untrained)'
                 .format(best_loss))

    # helpful logging function
    def print_epoch_loss(epoch, val_loss):
        if verbose:
            tf.print('>> epoch: {:02d}, val loss: {:5.3f}'
                     .format(epoch, val_loss))

    # Implement the training loop
    ######################## BEGIN YOUR ANSWER ########################
    for epoch in range(num_epochs):
        total_batch = int(np.floor(x_train.shape[0]) / batch_size)
        for step in range(total_batch):
            range_begin = (step * batch_size) % (x_train.shape[0] - batch_size)
            range_end = range_begin + batch_size
            batch_x = x_train[range_begin:range_end, :]
            batch_y = y_train[range_begin:range_end, :]
            epoch_loss = cnn_train_step(batch_x, batch_y, model, loss, optimizer)
            val_loss = loss(model_outputs=model(x_val, training=False), labels=y_val, weights=get_cnn_model_weights(model))
            print("Loss on validation data: ", val_loss)
            if tf.abs(val_loss) <= tf.abs(best_loss):
                best_loss = val_loss
                if do_save_model:
                    if os.path.isdir(os.path.join('.', model_dir)):
                        shutil.rmtree(os.path.join('.', model_dir))
                    model.save(os.path.join('.', model_dir))
    ######################### END YOUR ANSWER #########################

    if pbars is not None:
        pbars[0].refresh()
        pbars[1].refresh()

    return best_loss

confirm_cell_completion('Model training function defined.')
	def get_cnn_model_weights(model):
	"""retrieves the regularizable weights from a model

	given a cnn model (acording to the spec above) this function will
	return a flatted `tf.Tensor` of all the model weights

	Arguments:
	model: the `K.models.Model` object

	Returns:
	weights: a 1D `tf.Tensor` vector of the regularizable weights
	"""

	######################## BEGIN YOUR ANSWER ########################
	weight_list = []
	for layer in model.layers:
	if (type(layer) is FCLayer) or (type(layer) is ConvLayer):
	weight_list.append(layer.w)
	weights = tf.concat([K.backend.flatten(w) for w in weight_list], axis=0)
	######################### END YOUR ANSWER #########################
	return weights


	def compute_cce(y, p):
	"""computes categorical cross entropy loss

	Arguments:
	y: true labels
	p: predicted probabilities

	Returns:
	cce: the cross entropy as a `tf.Tensor` scalar
	"""
	######################## BEGIN YOUR ANSWER ########################
	cce_vector = -tf.reduce_sum(y * tf.math.log(p), axis=1)
	cce = tf.reduce_mean(cce_vector)
	######################### END YOUR ANSWER #########################
	return cce


	def compute_l2(w):
	"""computes the L2 norm given a list of weights

	Arguments:
	w: a 1D `tf.Tensor` Tensor of weights

	Returns:
	l2: the computed norm
	"""
	######################## BEGIN YOUR ANSWER ########################
	l2 = tf.cast(tf.sqrt(tf.reduce_sum(w*w)), dtype='float32')
	######################### END YOUR ANSWER #########################
	return l2


	def compute_cnn_objective(model_outputs,
	labels,
	weights,
	reg_coeff):
	"""computes the cnn loss

	computes the loss as described in the problem set cnn model
	specification

	Arguments:
	model_output: $\hat{y}$, the logits/probabilites output by the model
	labels: $y$, the one-hot encoded labels
	reg_losses: a list of regularization losses
	reg_coeff: $\lambda$, the regularization coeffecient

	Returns:
	total_loss: the loss as a `tf.Tensor` scalar
	"""
	######################## BEGIN YOUR ANSWER ########################
	cce = compute_cce(labels, model_outputs)
	reg_loss = compute_l2(weights)
	total_loss = cce + reg_coeff * reg_loss
	######################### END YOUR ANSWER #########################
	return total_loss




	def cnn_train_step(x, y, model, loss, optimizer):
	"""performs a single training step

	performs one training step on a model given a loss, optimizer, inputs,
	and labels.

	Arguments:
	x: model training inputs
	y: model training labels
	model: the model on which the pass will be performed
	loss: the loss function to be evaluated, from which the gradients will be
	computed
	optimizer: an object from `tf.optimizers` defining the optimization scheme

	Returns:
	pass_loss: the computed loss for the forward training pass
	"""
	######################## BEGIN YOUR ANSWER ########################
	with tf.GradientTape() as tape:
	predictions = model(x)
	pass_loss = loss(predictions, y, get_cnn_model_weights(model))

	gradients = tape.gradient(pass_loss, model.trainable_variables)
	optimizer.apply_gradients(zip(gradients, model.trainable_variables))
	######################### END YOUR ANSWER #########################
	return pass_loss

	def cnn_train(training_dataset,
	validation_dataset,
	dropout_rate,
	reg_coeff,
	learning_rate,
	batch_size,
	num_epochs,
	num_training=None,
	num_validation=None,
	do_save_model=False,
	model_dir=None,
	pbars=None,
	verbose=False,
	do_create_graph=True):
	"""main training loop for the cnn model

	trains the supplied model according to the passed hyperparameters and the
	above cnn model specification

	Arguments:
	training_dataset: training data and labels
	* an iterable of length 2 containing: (inputs, labels)
	validation_dataset: validation data and labels
	* an iterable of length 2 containing: (inputs, labels)
	dropout_rate: dropout rate (1 - keep probability) for the dropout layer
	reg_coeff: coefficient $\lambda$ for L2 regularization
	learning_rate: learning rate for RMSprop optimizer
	num_epochs: number of epochs to train
	batch_size: training mini-batch size
	num_training: (None) number of training input/label pairs to use out of
	`training_dataset`. If not specified or `None` all training
	examples will be batched and used.
	num_validation: (None) number of validation input/label pairs to use out
	of `validation_dataset`. If not specified or `None` all validation
	examples will be used.
	do_save_model: (False) flag for specifying if the best model be saved
	* bool
	model_dir: (None) location where model will be saved
	pbars: (None) `tqdm` objects to be dynamically updated to display training
	progress
	* len(pbars) == 2
	verbose: (False) flag for printing losses at each epoch
	* bool
	do_create_graph: (True) flag for wrapping the gradient updation function
	with `tf.function` for AutoGraphing and optimization of gradient update
	steps.

	Returns:
	best_loss: best validation loss
	"""
	# get model, loss, and optimizer functions/objects
	model = build_cnn_model(dropout_rate=dropout_rate)
	loss = functools.partial(compute_cnn_objective, reg_coeff=reg_coeff)
	optimizer = tf.optimizers.RMSprop(learning_rate=learning_rate,
	rho=0.9)

	# defining gradient update function
	if do_create_graph:
	# creating optimized tensorflow graph for gradient update
	_train_step = tf.function(cnn_train_step)
	else:
	_train_step = cnn_train_step

	# if train or validation length not explicitly passed, use all elements
	num_training = (training_dataset[0].shape[0]
	if num_training is None else num_training)
	num_validation = (validation_dataset[0].shape[0]
	if num_validation is None else num_validation)

	# subsample and/or shuffle the training data
	x_train, y_train = sample_shuffle_data(training_dataset, num_training)

	# subsample and/or shuffle the validation data
	x_val, y_val = sample_shuffle_data(validation_dataset, num_validation)

	# initialize `best_loss` to untrained loss
	best_loss = loss(model_outputs=model(x_val, training=False),
	labels=y_val,
	weights=get_cnn_model_weights(model))
	if verbose:
	tf.print('>> epoch: __, val loss: {:5.3f} (untrained)'
	.format(best_loss))

	# helpful logging function
	def print_epoch_loss(epoch, val_loss):
	if verbose:
	tf.print('>> epoch: {:02d}, val loss: {:5.3f}'
	.format(epoch, val_loss))

	# Implement the training loop
	######################## BEGIN YOUR ANSWER ########################
	for epoch in range(num_epochs):
	total_batch = int(np.floor(x_train.shape[0]) / batch_size)
	for step in range(total_batch):
	range_begin = (step * batch_size) % (x_train.shape[0] - batch_size)
	range_end = range_begin + batch_size
	batch_x = x_train[range_begin:range_end, :]
	batch_y = y_train[range_begin:range_end, :]
	epoch_loss = cnn_train_step(batch_x, batch_y, model, loss, optimizer)
	val_loss = loss(model_outputs=model(x_val, training=False), labels=y_val, weights=get_cnn_model_weights(model))
	print("Loss on validation data: ", val_loss)
	if tf.abs(val_loss) <= tf.abs(best_loss):
	best_loss = val_loss
	if do_save_model:
	if os.path.isdir(os.path.join('.', model_dir)):
	shutil.rmtree(os.path.join('.', model_dir))
	model.save(os.path.join('.', model_dir))
	######################### END YOUR ANSWER #########################

	if pbars is not None:
	pbars[0].refresh()
	pbars[1].refresh()

	return best_loss

	confirm_cell_completion('Model training function defined.')