Skip to content

Instantly share code, notes, and snippets.

@Sandeep0408
Last active November 16, 2023 18:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Sandeep0408/236b164cb09408c920aedb15d5c7e984 to your computer and use it in GitHub Desktop.
Save Sandeep0408/236b164cb09408c920aedb15d5c7e984 to your computer and use it in GitHub Desktop.
t5-base-dutch
from transformers import AutoTokenizer,T5TokenizerFast,T5Tokenizer
tokenizer = AutoTokenizer.from_pretrained('yhavinga/t5-base-dutch')
reading df = pd.read_csv()
#Below replaces the English text to Dutch
emotion_mapping = {
'neutral': 'neutraal',
'joy': 'vreugde',
'fear': 'angst',
'anger': 'woede',
'sadness': 'verdriet',
'love': 'liefde'
}
df['Category_updated'] = df['Category'].replace(emotion_mapping)
df['Category_updated']
Emotion = [i for i in df_emotion['Category_updated'].unique() ]
emotions = Emotion
for em in emotions:
print(len(tokenizer.encode(em)))
class T5FineTuner(pl.LightningModule):
def __init__(self, hparams):
super(T5FineTuner, self).__init__()
self.save_hyperparameters(hparams)
self.model = T5ForConditionalGeneration.from_pretrained(hparams.model_name_or_path)
self.tokenizer = T5Tokenizer.from_pretrained(hparams.tokenizer_name_or_path)
self.validation_step_outputs = []
self.losses = []
self.learning_rate = hparams.learning_rate
args_dict = dict(
data_dir="", # path for data files
output_dir="", # path to save the checkpoints
model_name_or_path="yhavinga/t5-base-dutch",
tokenizer_name_or_path="yhavinga/t5-base-dutch",
max_seq_length=512,
learning_rate=3e-4,
weight_decay=0.0,
adam_epsilon=1e-8,
warmup_steps=0,
train_batch_size=8,
eval_batch_size=8,
num_train_epochs=2,
gradient_accumulation_steps=16,
n_gpu=1,
fp_16=False, # Set to True if using mixed-precision training with apex
max_grad_norm=1.0,
seed=42,
)
args_dict.update({'data_dir': "Emotions", 'output_dir': "t5_emotion", 'num_train_epochs':2})
args = argparse.Namespace(**args_dict)
print(args_dict)
train_params = dict(
accumulate_grad_batches=args.gradient_accumulation_steps,
max_epochs=args.num_train_epochs,
devices=args.n_gpu,
precision= 16 if args.fp_16 else 32,
gradient_clip_val=args.max_grad_norm,
callbacks=[LoggingCallback()],
)
model = T5FineTuner(args)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment