nahidalam/seq-to-seq-training.py

## seq-to-seq-training.py
@tf.function
def train_step(inp, targ, enc_hidden):
    loss = 0
    with tf.GradientTape() as tape:
        enc_output, enc_hidden = encoder(inp, enc_hidden)
        dec_hidden = enc_hidden
        #initial decoder input - SOS token
        dec_input = tf.expand_dims([targ_lang.word_index['<start>']] * BATCH_SIZE, 1)

        # Teacher forcing - feeding the target as the next input
        for t in range(1, targ.shape[1]):
            predictions, dec_hidden = decoder(dec_input, dec_hidden, enc_output)
            loss += loss_function(targ[:, t], predictions)
            # using teacher forcing - decoder input for next time step is the target of the current time step
            dec_input = tf.expand_dims(targ[:, t], 1)

    batch_loss = (loss / int(targ.shape[1]))

    variables = encoder.trainable_variables + decoder.trainable_variables

    # calculate gradient with respect to the model's trainable variables
    # essentially autodiff is happening here
    gradients = tape.gradient(loss, variables)

    optimizer.apply_gradients(zip(gradients, variables))

    return batch_loss


## To execute the training process

EPOCHS = 100

for epoch in range(EPOCHS):
    start = time.time()

    enc_hidden = encoder.initialize_hidden_state()
    total_loss = 0

    for (batch, (inp, targ)) in enumerate(dataset.take(steps_per_epoch)):
        batch_loss = train_step(inp, targ, enc_hidden)
        total_loss += batch_loss

        if batch % 100 == 0:
            print('Epoch {} Batch {} Loss {:.4f}'.format(epoch + 1,
                                                   batch,
                                                   batch_loss.numpy()))
    # saving (checkpoint) the model every 2 epochs
    if (epoch + 1) % 2 == 0:
        checkpoint.save(file_prefix = checkpoint_prefix)

    print('Epoch {} Loss {:.4f}'.format(epoch + 1,
                                      total_loss / steps_per_epoch))
    print('Time taken for 1 epoch {} sec\n'.format(time.time() - start))
	@tf.function
	def train_step(inp, targ, enc_hidden):
	loss = 0
	with tf.GradientTape() as tape:
	enc_output, enc_hidden = encoder(inp, enc_hidden)
	dec_hidden = enc_hidden
	#initial decoder input - SOS token
	dec_input = tf.expand_dims([targ_lang.word_index['<start>']] * BATCH_SIZE, 1)

	# Teacher forcing - feeding the target as the next input
	for t in range(1, targ.shape[1]):
	predictions, dec_hidden = decoder(dec_input, dec_hidden, enc_output)
	loss += loss_function(targ[:, t], predictions)
	# using teacher forcing - decoder input for next time step is the target of the current time step
	dec_input = tf.expand_dims(targ[:, t], 1)

	batch_loss = (loss / int(targ.shape[1]))

	variables = encoder.trainable_variables + decoder.trainable_variables

	# calculate gradient with respect to the model's trainable variables
	# essentially autodiff is happening here
	gradients = tape.gradient(loss, variables)

	optimizer.apply_gradients(zip(gradients, variables))

	return batch_loss



	## To execute the training process

	EPOCHS = 100

	for epoch in range(EPOCHS):
	start = time.time()

	enc_hidden = encoder.initialize_hidden_state()
	total_loss = 0

	for (batch, (inp, targ)) in enumerate(dataset.take(steps_per_epoch)):
	batch_loss = train_step(inp, targ, enc_hidden)
	total_loss += batch_loss

	if batch % 100 == 0:
	print('Epoch {} Batch {} Loss {:.4f}'.format(epoch + 1,
	batch,
	batch_loss.numpy()))
	# saving (checkpoint) the model every 2 epochs
	if (epoch + 1) % 2 == 0:
	checkpoint.save(file_prefix = checkpoint_prefix)

	print('Epoch {} Loss {:.4f}'.format(epoch + 1,
	total_loss / steps_per_epoch))
	print('Time taken for 1 epoch {} sec\n'.format(time.time() - start))