ChunML/transformer_34.py

## transformer_34.py
@tf.function
def train_step(source_seq, target_seq_in, target_seq_out):
    with tf.GradientTape() as tape:
        # padding_mask of the source sequence
        # to be used in the Encoder
        # and the middle Multi-Head Attention of the Decoder
        padding_mask = 1 - tf.cast(tf.equal(source_seq, 0), dtype=tf.float32)

        encoder_output = encoder(source_seq, padding_mask)

        decoder_output = decoder(target_seq_in, encoder_output, padding_mask)

        loss = loss_func(target_seq_out, decoder_output)

    variables = encoder.trainable_variables + decoder.trainable_variables
    gradients = tape.gradient(loss, variables)
    optimizer.apply_gradients(zip(gradients, variables))

    return loss
	@tf.function
	def train_step(source_seq, target_seq_in, target_seq_out):
	with tf.GradientTape() as tape:
	# padding_mask of the source sequence
	# to be used in the Encoder
	# and the middle Multi-Head Attention of the Decoder
	padding_mask = 1 - tf.cast(tf.equal(source_seq, 0), dtype=tf.float32)

	encoder_output = encoder(source_seq, padding_mask)

	decoder_output = decoder(target_seq_in, encoder_output, padding_mask)

	loss = loss_func(target_seq_out, decoder_output)

	variables = encoder.trainable_variables + decoder.trainable_variables
	gradients = tape.gradient(loss, variables)
	optimizer.apply_gradients(zip(gradients, variables))

	return loss