Skip to content

Instantly share code, notes, and snippets.

View ChunML's full-sized avatar
✍️
Landed new site at https://trungtran.io

Trung Tran ChunML

✍️
Landed new site at https://trungtran.io
View GitHub Profile
Epoch 10 Loss 2.2374
Average elapsed time: 8.79s
He acted like he owned the place .
[[7, 82, 83, 7, 84, 4, 85, 1]]
l vous vous vous vous les les . <end>
Epoch 20 Loss 2.0102
Average elapsed time: 6.29s
Did you plant pumpkins this year ?
[[13, 2, 56, 57, 19, 58, 3]]
H = 2
NUM_LAYERS = 2
en_vocab_size = len(en_tokenizer.word_index) + 1
encoder = Encoder(en_vocab_size, MODEL_SIZE, NUM_LAYERS, H)
en_sequence_in = tf.constant([[1, 2, 3, 4, 6, 7, 8, 0, 0, 0],
[1, 2, 3, 4, 6, 7, 8, 0, 0, 0]])
encoder_output = encoder(en_sequence_in)
encoder = Encoder(en_vocab_size, MODEL_SIZE, NUM_LAYERS, H)
decoder = Decoder(fr_vocab_size, MODEL_SIZE, NUM_LAYERS, H)
NUM_EPOCHS = 100
start_time = time.time()
for e in range(NUM_EPOCHS):
for batch, (source_seq, target_seq_in, target_seq_out) in enumerate(dataset.take(-1)):
loss = train_step(source_seq, target_seq_in,
target_seq_out)
@tf.function
def train_step(source_seq, target_seq_in, target_seq_out):
with tf.GradientTape() as tape:
padding_mask = 1 - tf.cast(tf.equal(source_seq, 0), dtype=tf.float32)
# Manually add two more dimentions
# so that the mask's shape becomes (batch_size, 1, 1, seq_len)
padding_mask = tf.expand_dims(padding_mask, axis=1)
padding_mask = tf.expand_dims(padding_mask, axis=1)
class MultiHeadAttention(tf.keras.Model):
def __init__(self, model_size, h):
super(MultiHeadAttention, self).__init__()
self.key_size = model_size // h
self.h = h
self.wq = tf.keras.layers.Dense(model_size) #[tf.keras.layers.Dense(key_size) for _ in range(h)]
self.wk = tf.keras.layers.Dense(model_size) #[tf.keras.layers.Dense(key_size) for _ in range(h)]
self.wv = tf.keras.layers.Dense(model_size) #[tf.keras.layers.Dense(value_size) for _ in range(h)]
self.wo = tf.keras.layers.Dense(model_size)
NUM_EPOCHS = 100
start_time = time.time()
for e in range(NUM_EPOCHS):
for batch, (source_seq, target_seq_in, target_seq_out) in enumerate(dataset.take(-1)):
loss = train_step(source_seq, target_seq_in,
target_seq_out)
print('Epoch {} Loss {:.4f}'.format(
e + 1, loss.numpy()))
encoder = Encoder(en_vocab_size, MODEL_SIZE, NUM_LAYERS, H)
decoder = Decoder(fr_vocab_size, MODEL_SIZE, NUM_LAYERS, H)
NUM_EPOCHS = 100
start_time = time.time()
for e in range(NUM_EPOCHS):
for batch, (source_seq, target_seq_in, target_seq_out) in enumerate(dataset.take(-1)):
loss = train_step(source_seq, target_seq_in,
target_seq_out)
@tf.function
def train_step(source_seq, target_seq_in, target_seq_out):
with tf.GradientTape() as tape:
# padding_mask of the source sequence
# to be used in the Encoder
# and the middle Multi-Head Attention of the Decoder
padding_mask = 1 - tf.cast(tf.equal(source_seq, 0), dtype=tf.float32)
encoder_output = encoder(source_seq, padding_mask)
def call(self, sequence, encoder_output, padding_mask):
# EMBEDDING AND POSITIONAL EMBEDDING
embed_out = embedding(sequence)
embed_out += pes[:sequence.shape[1], :]
bot_sub_in = embed_out
for i in range(self.num_layers):
# BOTTOM MULTIHEAD SUB LAYER
look_left_only_mask = tf.linalg.band_part(tf.ones((seq_len, seq_len)), -1, 0)