Eduardo M Sala edumunozsala

## uris_yellow_2019.tsv

          
            https://github.com/DataTalksClub/nyc-tlc-data/releases/download/yellow/yellow_tripdata_2019-01.csv.gz
            https://github.com/DataTalksClub/nyc-tlc-data/releases/download/yellow/yellow_tripdata_2019-02.csv.gz
            https://github.com/DataTalksClub/nyc-tlc-data/releases/download/yellow/yellow_tripdata_2019-03.csv.gz
            https://github.com/DataTalksClub/nyc-tlc-data/releases/download/yellow/yellow_tripdata_2019-04.csv.gz
            https://github.com/DataTalksClub/nyc-tlc-data/releases/download/yellow/yellow_tripdata_2019-05.csv.gz
            https://github.com/DataTalksClub/nyc-tlc-data/releases/download/yellow/yellow_tripdata_2019-06.csv.gz
            https://github.com/DataTalksClub/nyc-tlc-data/releases/download/yellow/yellow_tripdata_2019-07.csv.gz
            https://github.com/DataTalksClub/nyc-tlc-data/releases/download/yellow/yellow_tripdata_2019-08.csv.gz
            https://github.com/DataTalksClub/nyc-tlc-data/releases/download/yellow/yellow_tripdata_2019-09.csv.gz
            https://github.com/DataTalksClub/nyc-tlc-data/releases/download/yellow/yellow_tripdata_2019-10.csv.gz
            http

## finetune_llama_v2.py
# coding=utf-8
# Copyright 2023 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software

## edumunozsala.json
{
	"public_identifier": "edumunozsala",
	"profile_pic_url": "https://s3.us-west-000.backblazeb2.com/proxycurl/person/edumunozsala/profile?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=0004d7f56a0400b0000000001%2F20230521%2Fus-west-000%2Fs3%2Faws4_request&X-Amz-Date=20230521T110202Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Signature=35f2bd1038c2893d6f8ed5bc16c787b837376f4cecd39d4f9d6c803e9b437201",
	"background_cover_image_url": "https://s3.us-west-000.backblazeb2.com/proxycurl/person/edumunozsala/cover?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=0004d7f56a0400b0000000001%2F20230521%2Fus-west-000%2Fs3%2Faws4_request&X-Amz-Date=20230521T110202Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Signature=b13820e856246444e5f2391103d2373e6ff9b61351206cb7a83b90d4e4d8e513",
	"first_name": "Eduardo Mu\\u00f1oz",
	"last_name": "Sala",
	"full_name": "Eduardo Mu\\u00f1oz Sala",
	"follower_count": null,
	"occupation": "Responsable de Proyectos de Aplicaciones e Integraci\\u00f3n de datos at Berg\\u00e9

## multihead_attention.py
class MultiHeadAttention(layers.Layer):

    def __init__(self, n_heads):
        super(MultiHeadAttention, self).__init__()
        self.n_heads = n_heads

    def build(self, input_shape):
        self.d_model = input_shape[-1]
        assert self.d_model % self.n_heads == 0
        # Calculate the dimension of every head or projection

## scaled_dot_product_attention.py
def scaled_dot_product_attention(queries, keys, values, mask):
    # Calculate the dot product, QK_transpose
    product = tf.matmul(queries, keys, transpose_b=True)
    # Get the scale factor
    keys_dim = tf.cast(tf.shape(keys)[-1], tf.float32)
    # Apply the scale factor to the dot product
    scaled_product = product / tf.math.sqrt(keys_dim)
    # Apply masking when it is requiered
    if mask is not None:
        scaled_product += (mask * -1e9)

## predict_att_seq2seq.py
def predict_seq2seq_att(input_text, input_max_len, tokenizer_inputs, word2idx_outputs, idx2word_outputs):
    if input_text is None:
        input_text = input_data[np.random.choice(len(input_data))]
    print(input_text)
    # Tokenize the input text
    input_seq = tokenizer_inputs.texts_to_sequences([input_text])
    # Pad the sentence
    input_seq = pad_sequences(input_seq, maxlen=input_max_len, padding='post')
    # Get the encoder initial states
    en_initial_states = encoder.init_states(1)

## train_ckpt_seq2seq.py
# Create an Adam optimizer and clips gradients by norm
optimizer = tf.keras.optimizers.Adam(clipnorm=5.0)
# Create a checkpoint object to save the model
checkpoint_dir = './training_ckpt_seq2seq'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
checkpoint = tf.train.Checkpoint(optimizer=optimizer,
                                 encoder=encoder,
                                 decoder=decoder)

losses, accuracies = main_train(encoder, decoder, dataset, EPOCHS, BATCH_SIZE, optimizer, checkpoint, checkpoint_prefix)

## loss_acc_fn_seq2seq.py
def loss_func(targets, logits):
    crossentropy = tf.keras.losses.SparseCategoricalCrossentropy(
        from_logits=True)
    # Mask padding values, they do not have to compute for loss
    mask = tf.math.logical_not(tf.math.equal(targets, 0))
    mask = tf.cast(mask, dtype=tf.int64)
    # Calculate the loss value
    loss = crossentropy(targets, logits, sample_weight=mask)

    return loss

## padding_seq2seq.py
# pad the input sequences
encoder_inputs = pad_sequences(input_sequences, maxlen=input_max_len, padding='post')
print("encoder_inputs.shape:", encoder_inputs.shape)
print("encoder_inputs[0]:", encoder_inputs[0])
# pad the decoder input sequences
decoder_inputs = pad_sequences(target_sequences_inputs, maxlen=target_max_len, padding='post')
print("decoder_inputs[0]:", decoder_inputs[0])
print("decoder_inputs.shape:", decoder_inputs.shape)
# pad the target output sequences
decoder_targets = pad_sequences(target_sequences, maxlen=target_max_len, padding='post')

## vocabularies_seq2seq.py
# get the word to index mapping for input language
word2idx_inputs = tokenizer_inputs.word_index
print('Found %s unique input tokens.' % len(word2idx_inputs))

# get the word to index mapping for output language
word2idx_outputs = tokenizer_outputs.word_index
print('Found %s unique output tokens.' % len(word2idx_outputs))

# store number of output and input words for later
# remember to add 1 since indexing starts at 1
	# coding=utf-8
	# Copyright 2023 The HuggingFace Inc. team. All rights reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	{
	"public_identifier": "edumunozsala",
	"profile_pic_url": "https://s3.us-west-000.backblazeb2.com/proxycurl/person/edumunozsala/profile?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=0004d7f56a0400b0000000001%2F20230521%2Fus-west-000%2Fs3%2Faws4_request&X-Amz-Date=20230521T110202Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Signature=35f2bd1038c2893d6f8ed5bc16c787b837376f4cecd39d4f9d6c803e9b437201",
	"background_cover_image_url": "https://s3.us-west-000.backblazeb2.com/proxycurl/person/edumunozsala/cover?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=0004d7f56a0400b0000000001%2F20230521%2Fus-west-000%2Fs3%2Faws4_request&X-Amz-Date=20230521T110202Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Signature=b13820e856246444e5f2391103d2373e6ff9b61351206cb7a83b90d4e4d8e513",
	"first_name": "Eduardo Mu\\u00f1oz",
	"last_name": "Sala",
	"full_name": "Eduardo Mu\\u00f1oz Sala",
	"follower_count": null,
	"occupation": "Responsable de Proyectos de Aplicaciones e Integraci\\u00f3n de datos at Berg\\u00e9
	class MultiHeadAttention(layers.Layer):

	def __init__(self, n_heads):
	super(MultiHeadAttention, self).__init__()
	self.n_heads = n_heads

	def build(self, input_shape):
	self.d_model = input_shape[-1]
	assert self.d_model % self.n_heads == 0
	# Calculate the dimension of every head or projection
	def scaled_dot_product_attention(queries, keys, values, mask):
	# Calculate the dot product, QK_transpose
	product = tf.matmul(queries, keys, transpose_b=True)
	# Get the scale factor
	keys_dim = tf.cast(tf.shape(keys)[-1], tf.float32)
	# Apply the scale factor to the dot product
	scaled_product = product / tf.math.sqrt(keys_dim)
	# Apply masking when it is requiered
	if mask is not None:
	scaled_product += (mask * -1e9)
	def predict_seq2seq_att(input_text, input_max_len, tokenizer_inputs, word2idx_outputs, idx2word_outputs):
	if input_text is None:
	input_text = input_data[np.random.choice(len(input_data))]
	print(input_text)
	# Tokenize the input text
	input_seq = tokenizer_inputs.texts_to_sequences([input_text])
	# Pad the sentence
	input_seq = pad_sequences(input_seq, maxlen=input_max_len, padding='post')
	# Get the encoder initial states
	en_initial_states = encoder.init_states(1)
	# Create an Adam optimizer and clips gradients by norm
	optimizer = tf.keras.optimizers.Adam(clipnorm=5.0)
	# Create a checkpoint object to save the model
	checkpoint_dir = './training_ckpt_seq2seq'
	checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
	checkpoint = tf.train.Checkpoint(optimizer=optimizer,
	encoder=encoder,
	decoder=decoder)

	losses, accuracies = main_train(encoder, decoder, dataset, EPOCHS, BATCH_SIZE, optimizer, checkpoint, checkpoint_prefix)
	def loss_func(targets, logits):
	crossentropy = tf.keras.losses.SparseCategoricalCrossentropy(
	from_logits=True)
	# Mask padding values, they do not have to compute for loss
	mask = tf.math.logical_not(tf.math.equal(targets, 0))
	mask = tf.cast(mask, dtype=tf.int64)
	# Calculate the loss value
	loss = crossentropy(targets, logits, sample_weight=mask)

	return loss
	# pad the input sequences
	encoder_inputs = pad_sequences(input_sequences, maxlen=input_max_len, padding='post')
	print("encoder_inputs.shape:", encoder_inputs.shape)
	print("encoder_inputs[0]:", encoder_inputs[0])
	# pad the decoder input sequences
	decoder_inputs = pad_sequences(target_sequences_inputs, maxlen=target_max_len, padding='post')
	print("decoder_inputs[0]:", decoder_inputs[0])
	print("decoder_inputs.shape:", decoder_inputs.shape)
	# pad the target output sequences
	decoder_targets = pad_sequences(target_sequences, maxlen=target_max_len, padding='post')
	# get the word to index mapping for input language
	word2idx_inputs = tokenizer_inputs.word_index
	print('Found %s unique input tokens.' % len(word2idx_inputs))

	# get the word to index mapping for output language
	word2idx_outputs = tokenizer_outputs.word_index
	print('Found %s unique output tokens.' % len(word2idx_outputs))

	# store number of output and input words for later
	# remember to add 1 since indexing starts at 1