This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def call(self, sequence, encoder_output): | |
# EMBEDDING AND POSITIONAL EMBEDDING | |
embed_out = [] | |
for i in range(sequence.shape[1]): | |
embed = self.embedding(tf.expand_dims(sequence[:, i], axis=1)) | |
embed_out.append(embed + pes[i, :]) | |
embed_out = tf.concat(embed_out, axis=1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Decoder(tf.keras.Model): | |
def __init__(self, vocab_size, model_size, num_layers, h): | |
super(Decoder, self).__init__() | |
self.model_size = model_size | |
self.num_layers = num_layers | |
self.h = h | |
self.embedding = tf.keras.layers.Embedding(vocab_size, model_size) | |
self.attention_bot = [MultiHeadAttention(model_size, h) for _ in range(num_layers)] | |
self.attention_bot_norm = [tf.keras.layers.BatchNormalization() for _ in range(num_layers)] | |
self.attention_mid = [MultiHeadAttention(model_size, h) for _ in range(num_layers)] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def call(self, sequence): | |
sub_in = [] | |
for i in range(sequence.shape[1]): | |
# Compute the embedded vector | |
embed = self.embedding(tf.expand_dims(sequence[:, i], axis=1)) | |
# Add positional encoding to the embedded vector | |
sub_in.append(embed + pes[i, :]) | |
# Concatenate the result so that the shape is (batch_size, length, model_size) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# The FFN input is the output of the Multi-Head Attention | |
ffn_in = sub_out | |
ffn_out = self.dense_2[i](self.dense_1[i](ffn_in)) | |
# Add the residual connection | |
ffn_out = ffn_in + ffn_out | |
# Normalize the output | |
ffn_out = self.ffn_norm[i](ffn_out) | |
# Assign the FFN output to the next layer's Multi-Head Attention input |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Residual connection | |
sub_out = sub_in + sub_out | |
# Normalize the output | |
sub_out = self.attention_norm[i](sub_out) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# We will have num_layers of (Attention + FFN) | |
for i in range(self.num_layers): | |
sub_out = [] | |
# Iterate along the sequence length | |
for j in range(sub_in.shape[1]): | |
# Compute the context vector towards the whole sequence | |
attention = self.attention[i]( | |
tf.expand_dims(sub_in[:, j, :], axis=1), sub_in) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def call(self, sequence): | |
sub_in = [] | |
for i in range(sequence.shape[1]): | |
# Compute the embedded vector | |
embed = self.embedding(tf.expand_dims(sequence[:, i], axis=1)) | |
# Add positional encoding to the embedded vector | |
sub_in.append(embed + pes[i, :]) | |
# Concatenate the result so that the shape is (batch_size, length, model_size) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Encoder(tf.keras.Model): | |
def __init__(self, vocab_size, model_size, num_layers, h): | |
super(Encoder, self).__init__() | |
self.model_size = model_size | |
self.num_layers = num_layers | |
self.h = h | |
# One Embedding layer | |
self.embedding = tf.keras.layers.Embedding(vocab_size, model_size) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def call(self, query, value): | |
# query has shape (batch, query_len, model_size) | |
# value has shape (batch, value_len, model_size) | |
heads = [] | |
for i in range(self.h): | |
score = tf.matmul(self.wq[i](query), self.wk[i](value), transpose_b=True) | |
# Here we scale the score as described in the paper | |
score /= tf.math.sqrt(tf.dtypes.cast(self.key_size, tf.float32)) | |
# score has shape (batch, query_len, value_len) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def call(self, query, value): | |
# query has shape (batch, query_len, model_size) | |
# value has shape (batch, value_len, model_size) | |
score = tf.matmul(query, value, transpose_b=True) / tf.math.sqrt(tf.dtypes.cast(self.key_size, tf.float32)) | |
# score has shape (batch, query_len, value_len) | |
alignment = tf.nn.softmax(score, axis=2) | |
# alignment has shape (batch, query_len, value_len) | |
context = tf.matmul(alignment, value) |