Last active
June 2, 2021 11:22
-
-
Save dvsrepo/5fe4de2f842790fdfef8083a5a86a055 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from transformers import AutoModelForSequenceClassification | |
from transformers import AutoTokenizer | |
from transformers import Trainer | |
# from here, it's just regular fine-tuning with 🤗 transformers | |
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased") | |
model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=4) | |
def tokenize_function(examples): | |
return tokenizer(examples["text"], padding="max_length", truncation=True) | |
train_dataset = dataset.map(tokenize_function, batched=True).shuffle(seed=42) | |
trainer = Trainer(model=model, train_dataset=train_dataset) | |
trainer.train() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment