Skip to content

Instantly share code, notes, and snippets.

View Rocketknight1's full-sized avatar

Matt Rocketknight1

View GitHub Profile
# coding=utf-8
# Copyright 2024 Google Inc. HuggingFace Inc. team. All rights reserved.
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
try:
import tf_keras as keras
except (ModuleNotFoundError, ImportError):
import keras
if parse(keras.__version__).major > 2:
raise ValueError(
"Your currently installed version of Keras is Keras 3, but this is not yet supported in "
"Transformers. Please install the backwards-compatible tf-keras package with "
"`pip install tf-keras`."
# Assume labels is a possibly multidimensional array of categories / token indices
_, label_counts = np.unique(labels, axis=None, return_counts=True) # Will flatten multidimensional arrays
# For multi-label classification you should normalize by the number of samples instead
label_frequencies = label_counts.astype(np.float) / np.sum(label_counts)
label_logprobs = np.log(label_frequencies)
# Now you just need to assign the values in label_logprobs to your bias vector!
def cosine_loss(y_true, y_pred):
# Computes the cosine similarity loss across the last dimension
# The cosine similarity loss between two vectors a and b is defined as
# In Keras: -cos(a, b)
# In PyTorch: 1 - cos(a, b)
# These definitions give the same gradient so it doesn't really matter; the PyTorch
# version just avoids negative values for the loss.
# To compute cos(a, b), just take the dot product of the two vectors
# divided by the product of their magnitudes. If we normalize the vectors first,
@Rocketknight1
Rocketknight1 / keras_metrics.py
Created August 5, 2022 13:11
Keras metrics for use with Hugging Face transformers
import tensorflow as tf
class MaskedAccuracy(tf.keras.metrics.Metric):
def __init__(self, name=None, dtype=None, clm=False, label_to_ignore=-100, **kwargs):
super().__init__(name=name, dtype=dtype, **kwargs)
self.label_to_ignore = label_to_ignore
self.correct_predictions = self.add_weight(name='correct_predictions', initializer='zeros', dtype=tf.int64)
self.all_predictions = self.add_weight(name='all_predictions', initializer='zeros', dtype=tf.int64)
self.clm = clm
import tensorflow as tf
class MaskedAccuracy(tf.keras.metrics.Metric):
def __init__(self, name=None, dtype=None, clm=False, label_to_ignore=-100, **kwargs):
super().__init__(name=name, dtype=dtype, **kwargs)
self.label_to_ignore = label_to_ignore
self.correct_predictions = self.add_weight(name='correct_predictions', initializer='zeros', dtype=tf.int64)
self.all_predictions = self.add_weight(name='all_predictions', initializer='zeros', dtype=tf.int64)
self.clm = clm
# This is a new feature, so make sure to update to the latest version of transformers!
# You will also need to pip install tensorflow_text
import tensorflow as tf
from transformers import TFAutoModel, TFBertTokenizer
class EndToEndModel(tf.keras.Model):
def __init__(self, checkpoint):
super().__init__()
# This is a new feature, so make sure to install transformers from main first!
import tensorflow as tf
from transformers import TFAutoModel, TFBertTokenizer
class EndToEndModel(tf.keras.Model):
def __init__(self, checkpoint):
super().__init__()
self.tokenizer = TFBertTokenizer.from_pretrained(checkpoint)
# This is a new feature, so make sure to install transformers from main first!
import tensorflow as tf
from transformers import TFAutoModel, TFBertTokenizer
class EndToEndModel(tf.keras.Model):
def __init__(self, checkpoint):
super().__init__()
self.tokenizer = TFBertTokenizer.from_pretrained(checkpoint)
import tensorflow as tf
from transformers import TFAutoModel, TFBertTokenizer
class EndToEndModel(tf.keras.Model):
def __init__(self, checkpoint):
super().__init__()
self.tokenizer = TFBertTokenizer.from_pretrained(checkpoint)
self.model = TFAutoModel.from_pretrained(checkpoint)