VXU1230/s8.py

## s8.py
@tf.function
def train_step(model, loss_fn, optimizer, target, context, label):
    with tf.GradientTape() as tape:
        predictions = model(target, context)
        batch_loss = loss_fn(label, predictions)
    gradients = tape.gradient(batch_loss, model.trainable_variables)
    c_gradients = [tf.clip_by_value(g, -5., 5.) for g in gradients if g is not None]
    optimizer.apply_gradients(zip(c_gradients, model.trainable_variables))
    g2 = 0
    for g in c_gradients:
        g2 += tf.square(tf.reduce_mean(g))
    grad_norm = tf.sqrt(g2)
    return batch_loss, grad_norm
	@tf.function
	def train_step(model, loss_fn, optimizer, target, context, label):
	with tf.GradientTape() as tape:
	predictions = model(target, context)
	batch_loss = loss_fn(label, predictions)
	gradients = tape.gradient(batch_loss, model.trainable_variables)
	c_gradients = [tf.clip_by_value(g, -5., 5.) for g in gradients if g is not None]
	optimizer.apply_gradients(zip(c_gradients, model.trainable_variables))
	g2 = 0
	for g in c_gradients:
	g2 += tf.square(tf.reduce_mean(g))
	grad_norm = tf.sqrt(g2)
	return batch_loss, grad_norm