This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@tf.function | |
def train_step(source_seq, target_seq_in, target_seq_out): | |
with tf.GradientTape() as tape: | |
encoder_output = encoder(source_seq) | |
decoder_output = decoder(target_seq_in, encoder_output) | |
loss = loss_func(target_seq_out, decoder_output) | |
variables = encoder.trainable_variables + decoder.trainable_variables |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
crossentropy = tf.keras.losses.SparseCategoricalCrossentropy( | |
from_logits=True) | |
def loss_func(targets, logits): | |
mask = tf.math.logical_not(tf.math.equal(targets, 0)) | |
mask = tf.cast(mask, dtype=tf.int64) | |
loss = crossentropy(targets, logits, sample_weight=mask) | |
return loss |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def unicode_to_ascii(s): | |
return ''.join( | |
c for c in unicodedata.normalize('NFD', s) | |
if unicodedata.category(c) != 'Mn') | |
def normalize_string(s): | |
s = unicode_to_ascii(s) | |
s = re.sub(r'([!.?])', r' \1', s) | |
s = re.sub(r'[^a-zA-Z.!?]+', r' ', s) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Input vocabulary size 98 | |
Encoder input shape (2, 10) | |
Encoder output shape (2, 10, 128) | |
Target vocabulary size 110 | |
Decoder input shape (2, 14) | |
Decoder output shape (2, 14, 110) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
H = 2 | |
NUM_LAYERS = 2 | |
en_vocab_size = len(en_tokenizer.word_index) + 1 | |
encoder = Encoder(en_vocab_size, MODEL_SIZE, NUM_LAYERS, H) | |
en_sequence_in = tf.constant([[1, 2, 3, 4, 6, 7, 8, 0, 0, 0], | |
[1, 2, 3, 4, 6, 7, 8, 0, 0, 0]]) | |
encoder_output = encoder(en_sequence_in) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def call(self, sequence, encoder_output): | |
# EMBEDDING AND POSITIONAL EMBEDDING | |
embed_out = [] | |
for i in range(sequence.shape[1]): | |
embed = self.embedding(tf.expand_dims(sequence[:, i], axis=1)) | |
embed_out.append(embed + pes[i, :]) | |
embed_out = tf.concat(embed_out, axis=1) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
logits = self.dense(ffn_out) | |
return logits |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# FFN | |
ffn_in = mid_sub_out | |
ffn_out = self.dense_2[i](self.dense_1[i](ffn_in)) | |
ffn_out = ffn_out + ffn_in | |
ffn_out = self.ffn_norm[i](ffn_out) | |
bot_sub_in = ffn_out |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# MIDDLE MULTIHEAD SUB LAYER | |
mid_sub_in = bot_sub_out | |
mid_sub_out = [] | |
for j in range(mid_sub_in.shape[1]): | |
attention = self.attention_mid[i]( | |
tf.expand_dims(mid_sub_in[:, j, :], axis=1), encoder_output) | |
mid_sub_out.append(attention) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
bot_sub_in = embed_out | |
for i in range(self.num_layers): | |
# BOTTOM MULTIHEAD SUB LAYER | |
bot_sub_out = [] | |
for j in range(bot_sub_in.shape[1]): | |
# the value vector must not contain tokens that lies on the right of the current token | |
values = bot_sub_in[:, :j, :] | |
attention = self.attention_bot[i]( |