This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
https://github.com/DataTalksClub/nyc-tlc-data/releases/download/yellow/yellow_tripdata_2019-01.csv.gz | https://github.com/DataTalksClub/nyc-tlc-data/releases/download/yellow/yellow_tripdata_2019-02.csv.gz | https://github.com/DataTalksClub/nyc-tlc-data/releases/download/yellow/yellow_tripdata_2019-03.csv.gz | https://github.com/DataTalksClub/nyc-tlc-data/releases/download/yellow/yellow_tripdata_2019-04.csv.gz | https://github.com/DataTalksClub/nyc-tlc-data/releases/download/yellow/yellow_tripdata_2019-05.csv.gz | https://github.com/DataTalksClub/nyc-tlc-data/releases/download/yellow/yellow_tripdata_2019-06.csv.gz | https://github.com/DataTalksClub/nyc-tlc-data/releases/download/yellow/yellow_tripdata_2019-07.csv.gz | https://github.com/DataTalksClub/nyc-tlc-data/releases/download/yellow/yellow_tripdata_2019-08.csv.gz | https://github.com/DataTalksClub/nyc-tlc-data/releases/download/yellow/yellow_tripdata_2019-09.csv.gz | https://github.com/DataTalksClub/nyc-tlc-data/releases/download/yellow/yellow_tripdata_2019-10.csv.gz | http |
---|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding=utf-8 | |
# Copyright 2023 The HuggingFace Inc. team. All rights reserved. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"public_identifier": "edumunozsala", | |
"profile_pic_url": "https://s3.us-west-000.backblazeb2.com/proxycurl/person/edumunozsala/profile?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=0004d7f56a0400b0000000001%2F20230521%2Fus-west-000%2Fs3%2Faws4_request&X-Amz-Date=20230521T110202Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Signature=35f2bd1038c2893d6f8ed5bc16c787b837376f4cecd39d4f9d6c803e9b437201", | |
"background_cover_image_url": "https://s3.us-west-000.backblazeb2.com/proxycurl/person/edumunozsala/cover?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=0004d7f56a0400b0000000001%2F20230521%2Fus-west-000%2Fs3%2Faws4_request&X-Amz-Date=20230521T110202Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Signature=b13820e856246444e5f2391103d2373e6ff9b61351206cb7a83b90d4e4d8e513", | |
"first_name": "Eduardo Mu\\u00f1oz", | |
"last_name": "Sala", | |
"full_name": "Eduardo Mu\\u00f1oz Sala", | |
"follower_count": null, | |
"occupation": "Responsable de Proyectos de Aplicaciones e Integraci\\u00f3n de datos at Berg\\u00e9 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class MultiHeadAttention(layers.Layer): | |
def __init__(self, n_heads): | |
super(MultiHeadAttention, self).__init__() | |
self.n_heads = n_heads | |
def build(self, input_shape): | |
self.d_model = input_shape[-1] | |
assert self.d_model % self.n_heads == 0 | |
# Calculate the dimension of every head or projection |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def scaled_dot_product_attention(queries, keys, values, mask): | |
# Calculate the dot product, QK_transpose | |
product = tf.matmul(queries, keys, transpose_b=True) | |
# Get the scale factor | |
keys_dim = tf.cast(tf.shape(keys)[-1], tf.float32) | |
# Apply the scale factor to the dot product | |
scaled_product = product / tf.math.sqrt(keys_dim) | |
# Apply masking when it is requiered | |
if mask is not None: | |
scaled_product += (mask * -1e9) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def predict_seq2seq_att(input_text, input_max_len, tokenizer_inputs, word2idx_outputs, idx2word_outputs): | |
if input_text is None: | |
input_text = input_data[np.random.choice(len(input_data))] | |
print(input_text) | |
# Tokenize the input text | |
input_seq = tokenizer_inputs.texts_to_sequences([input_text]) | |
# Pad the sentence | |
input_seq = pad_sequences(input_seq, maxlen=input_max_len, padding='post') | |
# Get the encoder initial states | |
en_initial_states = encoder.init_states(1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Create an Adam optimizer and clips gradients by norm | |
optimizer = tf.keras.optimizers.Adam(clipnorm=5.0) | |
# Create a checkpoint object to save the model | |
checkpoint_dir = './training_ckpt_seq2seq' | |
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt") | |
checkpoint = tf.train.Checkpoint(optimizer=optimizer, | |
encoder=encoder, | |
decoder=decoder) | |
losses, accuracies = main_train(encoder, decoder, dataset, EPOCHS, BATCH_SIZE, optimizer, checkpoint, checkpoint_prefix) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def loss_func(targets, logits): | |
crossentropy = tf.keras.losses.SparseCategoricalCrossentropy( | |
from_logits=True) | |
# Mask padding values, they do not have to compute for loss | |
mask = tf.math.logical_not(tf.math.equal(targets, 0)) | |
mask = tf.cast(mask, dtype=tf.int64) | |
# Calculate the loss value | |
loss = crossentropy(targets, logits, sample_weight=mask) | |
return loss |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# pad the input sequences | |
encoder_inputs = pad_sequences(input_sequences, maxlen=input_max_len, padding='post') | |
print("encoder_inputs.shape:", encoder_inputs.shape) | |
print("encoder_inputs[0]:", encoder_inputs[0]) | |
# pad the decoder input sequences | |
decoder_inputs = pad_sequences(target_sequences_inputs, maxlen=target_max_len, padding='post') | |
print("decoder_inputs[0]:", decoder_inputs[0]) | |
print("decoder_inputs.shape:", decoder_inputs.shape) | |
# pad the target output sequences | |
decoder_targets = pad_sequences(target_sequences, maxlen=target_max_len, padding='post') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# get the word to index mapping for input language | |
word2idx_inputs = tokenizer_inputs.word_index | |
print('Found %s unique input tokens.' % len(word2idx_inputs)) | |
# get the word to index mapping for output language | |
word2idx_outputs = tokenizer_outputs.word_index | |
print('Found %s unique output tokens.' % len(word2idx_outputs)) | |
# store number of output and input words for later | |
# remember to add 1 since indexing starts at 1 |
NewerOlder