Skip to content

Instantly share code, notes, and snippets.

View ChunML's full-sized avatar
✍️
Landed new site at https://trungtran.io

Trung Tran ChunML

✍️
Landed new site at https://trungtran.io
View GitHub Profile
@tf.function
def train_step(source_seq, target_seq_in, target_seq_out):
with tf.GradientTape() as tape:
encoder_output = encoder(source_seq)
decoder_output = decoder(target_seq_in, encoder_output)
loss = loss_func(target_seq_out, decoder_output)
variables = encoder.trainable_variables + decoder.trainable_variables
crossentropy = tf.keras.losses.SparseCategoricalCrossentropy(
from_logits=True)
def loss_func(targets, logits):
mask = tf.math.logical_not(tf.math.equal(targets, 0))
mask = tf.cast(mask, dtype=tf.int64)
loss = crossentropy(targets, logits, sample_weight=mask)
return loss
def unicode_to_ascii(s):
return ''.join(
c for c in unicodedata.normalize('NFD', s)
if unicodedata.category(c) != 'Mn')
def normalize_string(s):
s = unicode_to_ascii(s)
s = re.sub(r'([!.?])', r' \1', s)
s = re.sub(r'[^a-zA-Z.!?]+', r' ', s)
Input vocabulary size 98
Encoder input shape (2, 10)
Encoder output shape (2, 10, 128)
Target vocabulary size 110
Decoder input shape (2, 14)
Decoder output shape (2, 14, 110)
H = 2
NUM_LAYERS = 2
en_vocab_size = len(en_tokenizer.word_index) + 1
encoder = Encoder(en_vocab_size, MODEL_SIZE, NUM_LAYERS, H)
en_sequence_in = tf.constant([[1, 2, 3, 4, 6, 7, 8, 0, 0, 0],
[1, 2, 3, 4, 6, 7, 8, 0, 0, 0]])
encoder_output = encoder(en_sequence_in)
def call(self, sequence, encoder_output):
# EMBEDDING AND POSITIONAL EMBEDDING
embed_out = []
for i in range(sequence.shape[1]):
embed = self.embedding(tf.expand_dims(sequence[:, i], axis=1))
embed_out.append(embed + pes[i, :])
embed_out = tf.concat(embed_out, axis=1)
logits = self.dense(ffn_out)
return logits
# FFN
ffn_in = mid_sub_out
ffn_out = self.dense_2[i](self.dense_1[i](ffn_in))
ffn_out = ffn_out + ffn_in
ffn_out = self.ffn_norm[i](ffn_out)
bot_sub_in = ffn_out
# MIDDLE MULTIHEAD SUB LAYER
mid_sub_in = bot_sub_out
mid_sub_out = []
for j in range(mid_sub_in.shape[1]):
attention = self.attention_mid[i](
tf.expand_dims(mid_sub_in[:, j, :], axis=1), encoder_output)
mid_sub_out.append(attention)
bot_sub_in = embed_out
for i in range(self.num_layers):
# BOTTOM MULTIHEAD SUB LAYER
bot_sub_out = []
for j in range(bot_sub_in.shape[1]):
# the value vector must not contain tokens that lies on the right of the current token
values = bot_sub_in[:, :j, :]
attention = self.attention_bot[i](