Skip to content

Instantly share code, notes, and snippets.

@javichur
Created April 22, 2021 15:56
Show Gist options
  • Save javichur/7d460c048f9bef352dc8920fc5a1efb0 to your computer and use it in GitHub Desktop.
Save javichur/7d460c048f9bef352dc8920fc5a1efb0 to your computer and use it in GitHub Desktop.
modeling_bart_for_custom_generation_custom_loss
# Original version: https://github.com/huggingface/transformers/blob/master/src/transformers/models/bart/modeling_bart.py
# Attempt for NLP Spain AI challenge (http://www.spain-ai.com/hackathon2020_reto_NLP.php)
# Loss_v2 = num_productos_no_acertados + porcentaje_tokens_no_acertados + 0,001 * default_loss
lm_logits = self.lm_head(outputs[0]) + self.final_logits_bias
masked_lm_loss = None
if labels is not None:
BATCH_SIZE = 8
cuda0 = torch.device('cuda:0')
loss_fct_array = nn.CrossEntropyLoss(reduction='none') # añadido NONE
a = lm_logits.view(-1, self.config.vocab_size)
b = labels.view(-1)
distance_loss_array = loss_fct_array(a, b)
distance_loss_mean = torch.mean(distance_loss_array)
DELTA = 0.2
distance_loss_array_con_margen = nn.functional.relu(distance_loss_array - DELTA)
uno_o_cero = torch.tensor(0)
uno_o_cero = uno_o_cero.to('cuda:0')
not_correct_num_tokens_percent = 0
for i in range(0, BATCH_SIZE):
taux = distance_loss_array_con_margen[127*i:127*(i+1)]
total_len = torch.numel(taux)
num_not_zeros = torch.sum(taux != 0) # torch.count_nonzero(taux)
not_correct_num_tokens_percent += num_not_zeros / total_len
uno_o_cero += (torch.tensor(1, device=cuda0) - nn.functional.relu(1 - num_not_zeros))
masked_lm_loss = uno_o_cero + (not_correct_num_tokens_percent/BATCH_SIZE) + 0.001 * distance_loss_mean
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment