NMZivkovic/ScaledDotProductAttentionLayer.py

## ScaledDotProductAttentionLayer.py
class ScaledDotProductAttentionLayer():
    def calculate_output_weights(self, q, k, v, mask):
        qk = tf.matmul(q, k, transpose_b=True)
        dk = tf.cast(tf.shape(k)[-1], tf.float32)
        scaled_attention = qk / tf.math.sqrt(dk)

        if mask is not None:
            scaled_attention_logits += (mask * -1e9)

        weights = tf.nn.softmax(scaled_attention, axis=-1)
        output = tf.matmul(weights, v)

        return output, weights
	class ScaledDotProductAttentionLayer():
	def calculate_output_weights(self, q, k, v, mask):
	qk = tf.matmul(q, k, transpose_b=True)
	dk = tf.cast(tf.shape(k)[-1], tf.float32)
	scaled_attention = qk / tf.math.sqrt(dk)

	if mask is not None:
	scaled_attention_logits += (mask * -1e9)

	weights = tf.nn.softmax(scaled_attention, axis=-1)
	output = tf.matmul(weights, v)

	return output, weights