Skip to content

Instantly share code, notes, and snippets.

View ChunML's full-sized avatar
✍️
Landed new site at https://trungtran.io

Trung Tran ChunML

✍️
Landed new site at https://trungtran.io
View GitHub Profile
def call(self, sequence, encoder_output):
# EMBEDDING AND POSITIONAL EMBEDDING
embed_out = []
for i in range(sequence.shape[1]):
embed = self.embedding(tf.expand_dims(sequence[:, i], axis=1))
embed_out.append(embed + pes[i, :])
embed_out = tf.concat(embed_out, axis=1)
class Decoder(tf.keras.Model):
def __init__(self, vocab_size, model_size, num_layers, h):
super(Decoder, self).__init__()
self.model_size = model_size
self.num_layers = num_layers
self.h = h
self.embedding = tf.keras.layers.Embedding(vocab_size, model_size)
self.attention_bot = [MultiHeadAttention(model_size, h) for _ in range(num_layers)]
self.attention_bot_norm = [tf.keras.layers.BatchNormalization() for _ in range(num_layers)]
self.attention_mid = [MultiHeadAttention(model_size, h) for _ in range(num_layers)]
def call(self, sequence):
sub_in = []
for i in range(sequence.shape[1]):
# Compute the embedded vector
embed = self.embedding(tf.expand_dims(sequence[:, i], axis=1))
# Add positional encoding to the embedded vector
sub_in.append(embed + pes[i, :])
# Concatenate the result so that the shape is (batch_size, length, model_size)
# The FFN input is the output of the Multi-Head Attention
ffn_in = sub_out
ffn_out = self.dense_2[i](self.dense_1[i](ffn_in))
# Add the residual connection
ffn_out = ffn_in + ffn_out
# Normalize the output
ffn_out = self.ffn_norm[i](ffn_out)
# Assign the FFN output to the next layer's Multi-Head Attention input
# Residual connection
sub_out = sub_in + sub_out
# Normalize the output
sub_out = self.attention_norm[i](sub_out)
# We will have num_layers of (Attention + FFN)
for i in range(self.num_layers):
sub_out = []
# Iterate along the sequence length
for j in range(sub_in.shape[1]):
# Compute the context vector towards the whole sequence
attention = self.attention[i](
tf.expand_dims(sub_in[:, j, :], axis=1), sub_in)
def call(self, sequence):
sub_in = []
for i in range(sequence.shape[1]):
# Compute the embedded vector
embed = self.embedding(tf.expand_dims(sequence[:, i], axis=1))
# Add positional encoding to the embedded vector
sub_in.append(embed + pes[i, :])
# Concatenate the result so that the shape is (batch_size, length, model_size)
class Encoder(tf.keras.Model):
def __init__(self, vocab_size, model_size, num_layers, h):
super(Encoder, self).__init__()
self.model_size = model_size
self.num_layers = num_layers
self.h = h
# One Embedding layer
self.embedding = tf.keras.layers.Embedding(vocab_size, model_size)
def call(self, query, value):
# query has shape (batch, query_len, model_size)
# value has shape (batch, value_len, model_size)
heads = []
for i in range(self.h):
score = tf.matmul(self.wq[i](query), self.wk[i](value), transpose_b=True)
# Here we scale the score as described in the paper
score /= tf.math.sqrt(tf.dtypes.cast(self.key_size, tf.float32))
# score has shape (batch, query_len, value_len)
def call(self, query, value):
# query has shape (batch, query_len, model_size)
# value has shape (batch, value_len, model_size)
score = tf.matmul(query, value, transpose_b=True) / tf.math.sqrt(tf.dtypes.cast(self.key_size, tf.float32))
# score has shape (batch, query_len, value_len)
alignment = tf.nn.softmax(score, axis=2)
# alignment has shape (batch, query_len, value_len)
context = tf.matmul(alignment, value)