epwalsh/copynet_copy_score.py

## copynet_copy_score.py
class CopyNetSeq2Seq(Model):

    # snip...

    def _get_copy_scores(self, state: Dict[str, torch.Tensor]) -> torch.Tensor:
        # NOTE: here `trimmed_source_length` refers to the input sequence length minus 2,
        # so that the special START and END tokens in the source are ignored. We also need to
        # ignore PAD tokens, but that happens elsewhere using a mask.
        # shape: (batch_size, trimmed_source_length, encoder_output_dim)
        trimmed_encoder_outputs = state["encoder_outputs"][:, 1:-1]
        # shape: (batch_size, trimmed_source_length, decoder_output_dim)
        copy_projection = self._output_copying_layer(trimmed_encoder_outputs)
        # shape: (batch_size, trimmed_source_length, decoder_output_dim)
        copy_projection = torch.tanh(copy_projection)
        # shape: (batch_size, trimmed_source_length)
        copy_scores = copy_projection.bmm(state["decoder_hidden"].unsqueeze(-1)).squeeze(-1)
        return copy_scores
	class CopyNetSeq2Seq(Model):

	# snip...

	def _get_copy_scores(self, state: Dict[str, torch.Tensor]) -> torch.Tensor:
	# NOTE: here `trimmed_source_length` refers to the input sequence length minus 2,
	# so that the special START and END tokens in the source are ignored. We also need to
	# ignore PAD tokens, but that happens elsewhere using a mask.
	# shape: (batch_size, trimmed_source_length, encoder_output_dim)
	trimmed_encoder_outputs = state["encoder_outputs"][:, 1:-1]
	# shape: (batch_size, trimmed_source_length, decoder_output_dim)
	copy_projection = self._output_copying_layer(trimmed_encoder_outputs)
	# shape: (batch_size, trimmed_source_length, decoder_output_dim)
	copy_projection = torch.tanh(copy_projection)
	# shape: (batch_size, trimmed_source_length)
	copy_scores = copy_projection.bmm(state["decoder_hidden"].unsqueeze(-1)).squeeze(-1)
	return copy_scores