kachayev/encoder-rnn-simplified.py

## encoder-rnn-simplified.py
#
# The following is the simplification of EncoderRNN code from
# "NLP From Scratch: Translation with a Sequence to Sequence Network and Attention"
# PyTorch tutorial (link: https://pytorch.org/tutorials/intermediate/seq2seq_translation_tutorial.html)
#
# In fact, `nn.GRU` module can execute loop mechanics over the given input sequence when provided
# with such. In the tutorial, the sentence is presented as a tensor of 1-word sequences hence the
# loop is handled in `train` and `evaluation` "manually". Given the fact the `nn.Embedding` handles
# set of indecies and `nn.GRU` executes loops when given (seq_len, batch_size, elem_dim) shaped
# input, encoder could be constructed as the following:
#
class EncoderRNNSimplified(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(EncoderRNNSimplified, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(input_size, hidden_size)
        # `nn.Embedding` module outputs [batch_size, seq_len, hidden_dim]
        # when `nn.GRU` expects [seq_len, batch_size, hidden_dim] by default
        # so, we either need to use `permute(1,0,2)` to get proper view from the
        # embedding tensor or set `batch_first` to `True` (in this case
        # `nn.GRU` accepts [batch_size, seq_len, hidden_dim])
        self.rnn = nn.GRU(hidden_size, hidden_size, batch_first=True)

    # `hidden` is automatically evaluated to properly shaped tensor of zeros if not given
    def forward(self, x, hidden=None):
        return self.rnn(self.embedding(x), hidden)

#
# Now, here's how the encoder should be used in `evaluate` (using simple decoder
# as an example, decoder with attention will work just the same). `training` is similar
#

def sentence_to_tensor_simplified(lang, sentence):
    # array of indices, len = (num words + 1 for EOS)
    idx = sentence_to_index(lang, sentence)
    idx.append(EOS_TOKEN)
    # note, that we don't transpose tensor here
    return torch.tensor(idx, device=device)

def evaluate_simplified(encoder, decoder, sentence, max_length=MAX_LENGTH):
    with torch.no_grad():
        input_tensor = sentence_to_tensor_simplified(input_lang, sentence)
        # `unsqueeze` here is used to create batch of size 1
        # in most practical cases, working with `DataLoader`s, that won't be
        # necessary as loaders typically return data in batches
        encoder_outputs, encoder_hidden = encoder(input_tensor.unsqueeze(0))
        # `squeeze` to "ignore" batching, the result would be exactly the same
        # as it was previously done by gathering output tensors in for loop and
        # injecting them into outer `encoder_outputs` tensor
        encoder_outputs = encoder_outputs.squeeze(0)

        # `encoder_hidden` has a shape of (1, batch_size, hidden_dim) and could be used
        # as an argument into a decoder "as is"
        decoder_hidden = encoder_hidden
        decoder_input = torch.tensor([[SOS_TOKEN]], device=device)

        decoded_words = []
        for di in range(max_length):
            decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
            topi = torch.argmax(decoder_output)
            if topi.item() == EOS_TOKEN: break
            else:
                decoded_words.append(output_lang.index2word[topi.item()])

            decoder_input = topi.detach()

        return " ".join(decoded_words)

#
# How does it work step by step with already trained `encoder`:
#
# sentence = "i am just going for a walk"
# #> 7 words
# input_tensor = sentence_to_tensor_simplified(input_lang, sentence)
# #> torch.Size([8])
# input_batch = input_tensor.unsqueeze(0)
# #> torch.Size([1, 8])
# embedding_batch = encoder.embedding(input_batch)
# #> torch.Size([1, 8, 256])
# outputs, hidden = encoder.rnn(embedding_batch)
# #> (torch.Size([1, 8, 256]), torch.Size([1, 1, 256]))
# outputs = outputs.squeeze(0)
# #> torch.Size([8, 256])
#
	#
	# The following is the simplification of EncoderRNN code from
	# "NLP From Scratch: Translation with a Sequence to Sequence Network and Attention"
	# PyTorch tutorial (link: https://pytorch.org/tutorials/intermediate/seq2seq_translation_tutorial.html)
	#
	# In fact, `nn.GRU` module can execute loop mechanics over the given input sequence when provided
	# with such. In the tutorial, the sentence is presented as a tensor of 1-word sequences hence the
	# loop is handled in `train` and `evaluation` "manually". Given the fact the `nn.Embedding` handles
	# set of indecies and `nn.GRU` executes loops when given (seq_len, batch_size, elem_dim) shaped
	# input, encoder could be constructed as the following:
	#
	class EncoderRNNSimplified(nn.Module):
	def __init__(self, input_size, hidden_size):
	super(EncoderRNNSimplified, self).__init__()
	self.input_size = input_size
	self.hidden_size = hidden_size

	self.embedding = nn.Embedding(input_size, hidden_size)
	# `nn.Embedding` module outputs [batch_size, seq_len, hidden_dim]
	# when `nn.GRU` expects [seq_len, batch_size, hidden_dim] by default
	# so, we either need to use `permute(1,0,2)` to get proper view from the
	# embedding tensor or set `batch_first` to `True` (in this case
	# `nn.GRU` accepts [batch_size, seq_len, hidden_dim])
	self.rnn = nn.GRU(hidden_size, hidden_size, batch_first=True)

	# `hidden` is automatically evaluated to properly shaped tensor of zeros if not given
	def forward(self, x, hidden=None):
	return self.rnn(self.embedding(x), hidden)

	#
	# Now, here's how the encoder should be used in `evaluate` (using simple decoder
	# as an example, decoder with attention will work just the same). `training` is similar
	#

	def sentence_to_tensor_simplified(lang, sentence):
	# array of indices, len = (num words + 1 for EOS)
	idx = sentence_to_index(lang, sentence)
	idx.append(EOS_TOKEN)
	# note, that we don't transpose tensor here
	return torch.tensor(idx, device=device)

	def evaluate_simplified(encoder, decoder, sentence, max_length=MAX_LENGTH):
	with torch.no_grad():
	input_tensor = sentence_to_tensor_simplified(input_lang, sentence)
	# `unsqueeze` here is used to create batch of size 1
	# in most practical cases, working with `DataLoader`s, that won't be
	# necessary as loaders typically return data in batches
	encoder_outputs, encoder_hidden = encoder(input_tensor.unsqueeze(0))
	# `squeeze` to "ignore" batching, the result would be exactly the same
	# as it was previously done by gathering output tensors in for loop and
	# injecting them into outer `encoder_outputs` tensor
	encoder_outputs = encoder_outputs.squeeze(0)

	# `encoder_hidden` has a shape of (1, batch_size, hidden_dim) and could be used
	# as an argument into a decoder "as is"
	decoder_hidden = encoder_hidden
	decoder_input = torch.tensor([[SOS_TOKEN]], device=device)

	decoded_words = []
	for di in range(max_length):
	decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
	topi = torch.argmax(decoder_output)
	if topi.item() == EOS_TOKEN: break
	else:
	decoded_words.append(output_lang.index2word[topi.item()])

	decoder_input = topi.detach()

	return " ".join(decoded_words)

	#
	# How does it work step by step with already trained `encoder`:
	#
	# sentence = "i am just going for a walk"
	# #> 7 words
	# input_tensor = sentence_to_tensor_simplified(input_lang, sentence)
	# #> torch.Size([8])
	# input_batch = input_tensor.unsqueeze(0)
	# #> torch.Size([1, 8])
	# embedding_batch = encoder.embedding(input_batch)
	# #> torch.Size([1, 8, 256])
	# outputs, hidden = encoder.rnn(embedding_batch)
	# #> (torch.Size([1, 8, 256]), torch.Size([1, 1, 256]))
	# outputs = outputs.squeeze(0)
	# #> torch.Size([8, 256])
	#