Mohana Roy Chowdhury MohanaRC

## gist:84e85ffc732f81a9ac5d8d4e034b9e74
model = base_model()

# Iterate over epochs
epochs = 10
epochs_val_losses, epochs_train_losses = [], []
for epoch in range(epochs):
    print('Start of epoch %d' % (epoch,))

    #Perform training using gradient tape
    losses_train = train_data_for_one_epoch()

## gist:c0181aa4228817304155e761a3a08551
def perform_validation():
    """
    Function for validation
    """
    losses = []
    for x_val, y_val in test:
        val_logits = model(x_val)
        val_loss = loss_object(y_true=y_val, y_pred=val_logits)
        losses.append(val_loss)
        val_acc_metric(y_val, val_logits)

## gist:19541672456ee127f9cb096f9db7e737
def train_data_for_one_epoch():
    """
    Function for computing the gradient and updating weights in each epoch.
    """
    losses = []
    # Iterate over batches of the dataset and call apply_gradient function
    for step, (x_batch_train, y_batch_train) in enumerate(train):
        logits, loss_value = apply_gradient(optimizer, model, x_batch_train, y_batch_train)
        losses.append(loss_value)
        train_acc_metric(y_batch_train, logits)

## training_loop
def apply_gradient(optimizer, model, x, y):
    """
    Function for computing gradient and updating the weights
    """
    with tf.GradientTape() as tape:
        # Get model prediction and compute the loss
        logits = model(x)
        loss_value = loss_object(y_true=y, y_pred=logits)
    # Calculate the gradient using tape.gradient and then update the model weights using our optimizer
    gradients = tape.gradient(loss_value, model.trainable_weights)

## gist:2450dd6e572a34c5c6240d44ad140cde
#Define Optimizer and Loss Function
optimizer = tf.keras.optimizers.Adam()
loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
#Define Metrics
train_acc_metric = tf.keras.metrics.SparseCategoricalAccuracy()
val_acc_metric = tf.keras.metrics.SparseCategoricalAccuracy()

## gradient_3_5
### Define the model,
def base_model():
    """
    Define the model architecture here
    """
    inputs = tf.keras.Input(shape=(784,), name='digits')
    x = tf.keras.layers.Dense(64, activation='relu', name='dense_1')(inputs)
    x = tf.keras.layers.Dense(64, activation='relu', name='dense_2')(x)
    outputs = tf.keras.layers.Dense(10, activation='softmax', name='predictions')(x)
    model = tf.keras.Model(inputs=inputs, outputs=outputs)

## gradient_3_4
batch_size = 64

train_data = train_data.map(format_image)
test_data = test_data.map(format_image)
train = train_data.shuffle(buffer_size=1024).batch(batch_size)
test =  test_data.batch(batch_size=batch_size)

## gradient_3_3
def format_image(data):
    """
       Function to reshape, format and normalize input images to make it compatible with the deep learning model
       Inputs:
       data: Input image
       Outputs:
       image: Formatted image
       data["label"]: Training label associated with the image
    """
    image = data["image"]

## gradient_3_2
train_data, info = tfds.load("mnist", split = "train", with_info = True)
test_data = tfds.load("mnist", split = "test")

## Persistent
def tf_gradient_tape_persistent(x):
    """
    Simple implementation to understand the functioning of gradient tape for chain rule with persistent set to True
    Inputs:
    x: Tensor value

    Returns:
    EagerTensor: Derivative of y with respect to input tensor x and derivate of z with respect to x
    """
    with tf.GradientTape(persistent=True) as t:
	model = base_model()

	# Iterate over epochs
	epochs = 10
	epochs_val_losses, epochs_train_losses = [], []
	for epoch in range(epochs):
	print('Start of epoch %d' % (epoch,))

	#Perform training using gradient tape
	losses_train = train_data_for_one_epoch()
	def perform_validation():
	"""
	Function for validation
	"""
	losses = []
	for x_val, y_val in test:
	val_logits = model(x_val)
	val_loss = loss_object(y_true=y_val, y_pred=val_logits)
	losses.append(val_loss)
	val_acc_metric(y_val, val_logits)
	def train_data_for_one_epoch():
	"""
	Function for computing the gradient and updating weights in each epoch.
	"""
	losses = []
	# Iterate over batches of the dataset and call apply_gradient function
	for step, (x_batch_train, y_batch_train) in enumerate(train):
	logits, loss_value = apply_gradient(optimizer, model, x_batch_train, y_batch_train)
	losses.append(loss_value)
	train_acc_metric(y_batch_train, logits)
	def apply_gradient(optimizer, model, x, y):
	"""
	Function for computing gradient and updating the weights
	"""
	with tf.GradientTape() as tape:
	# Get model prediction and compute the loss
	logits = model(x)
	loss_value = loss_object(y_true=y, y_pred=logits)
	# Calculate the gradient using tape.gradient and then update the model weights using our optimizer
	gradients = tape.gradient(loss_value, model.trainable_weights)
	#Define Optimizer and Loss Function
	optimizer = tf.keras.optimizers.Adam()
	loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
	#Define Metrics
	train_acc_metric = tf.keras.metrics.SparseCategoricalAccuracy()
	val_acc_metric = tf.keras.metrics.SparseCategoricalAccuracy()
	### Define the model,
	def base_model():
	"""
	Define the model architecture here
	"""
	inputs = tf.keras.Input(shape=(784,), name='digits')
	x = tf.keras.layers.Dense(64, activation='relu', name='dense_1')(inputs)
	x = tf.keras.layers.Dense(64, activation='relu', name='dense_2')(x)
	outputs = tf.keras.layers.Dense(10, activation='softmax', name='predictions')(x)
	model = tf.keras.Model(inputs=inputs, outputs=outputs)
	batch_size = 64

	train_data = train_data.map(format_image)
	test_data = test_data.map(format_image)
	train = train_data.shuffle(buffer_size=1024).batch(batch_size)
	test = test_data.batch(batch_size=batch_size)
	def format_image(data):
	"""
	Function to reshape, format and normalize input images to make it compatible with the deep learning model
	Inputs:
	data: Input image
	Outputs:
	image: Formatted image
	data["label"]: Training label associated with the image
	"""
	image = data["image"]
	train_data, info = tfds.load("mnist", split = "train", with_info = True)
	test_data = tfds.load("mnist", split = "test")
	def tf_gradient_tape_persistent(x):
	"""
	Simple implementation to understand the functioning of gradient tape for chain rule with persistent set to True
	Inputs:
	x: Tensor value

	Returns:
	EagerTensor: Derivative of y with respect to input tensor x and derivate of z with respect to x
	"""
	with tf.GradientTape(persistent=True) as t: