ChunML/transformer_38.py

## transformer_38.py
@tf.function
def train_step(source_seq, target_seq_in, target_seq_out):
    with tf.GradientTape() as tape:
        padding_mask = 1 - tf.cast(tf.equal(source_seq, 0), dtype=tf.float32)

        # Manually add two more dimentions
        # so that the mask's shape becomes (batch_size, 1, 1, seq_len)
        padding_mask = tf.expand_dims(padding_mask, axis=1)
        padding_mask = tf.expand_dims(padding_mask, axis=1)

        encoder_output = encoder(source_seq, padding_mask)

        decoder_output = decoder(target_seq_in, encoder_output, padding_mask)

        loss = loss_func(target_seq_out, decoder_output)

    variables = encoder.trainable_variables + decoder.trainable_variables
    gradients = tape.gradient(loss, variables)
    optimizer.apply_gradients(zip(gradients, variables))

    return loss
	@tf.function
	def train_step(source_seq, target_seq_in, target_seq_out):
	with tf.GradientTape() as tape:
	padding_mask = 1 - tf.cast(tf.equal(source_seq, 0), dtype=tf.float32)

	# Manually add two more dimentions
	# so that the mask's shape becomes (batch_size, 1, 1, seq_len)
	padding_mask = tf.expand_dims(padding_mask, axis=1)
	padding_mask = tf.expand_dims(padding_mask, axis=1)

	encoder_output = encoder(source_seq, padding_mask)

	decoder_output = decoder(target_seq_in, encoder_output, padding_mask)

	loss = loss_func(target_seq_out, decoder_output)

	variables = encoder.trainable_variables + decoder.trainable_variables
	gradients = tape.gradient(loss, variables)
	optimizer.apply_gradients(zip(gradients, variables))

	return loss