Last active
December 2, 2020 21:03
-
-
Save vgpprasad91/36adf976884b32dba0deae2df1cd7ec8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class BertForSequenceClassification(BertPreTrainedModel): | |
r""" | |
**labels**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size,)``: | |
Labels for computing the sequence classification/regression loss. | |
Indices should be in ``[0, ..., config.num_labels - 1]``. | |
If ``config.num_labels == 1`` a regression loss is computed (Mean-Square loss), | |
If ``config.num_labels > 1`` a classification loss is computed (Cross-Entropy). | |
Outputs: `Tuple` comprising various elements depending on the configuration (config) and inputs: | |
**loss**: (`optional`, returned when ``labels`` is provided) ``torch.FloatTensor`` of shape ``(1,)``: | |
Classification (or regression if config.num_labels==1) loss. | |
**logits**: ``torch.FloatTensor`` of shape ``(batch_size, config.num_labels)`` | |
Classification (or regression if config.num_labels==1) scores (before SoftMax). | |
**hidden_states**: (`optional`, returned when ``config.output_hidden_states=True``) | |
list of ``torch.FloatTensor`` (one for the output of each layer + the output of the embeddings) | |
of shape ``(batch_size, sequence_length, hidden_size)``: | |
Hidden-states of the model at the output of each layer plus the initial embedding outputs. | |
**attentions**: (`optional`, returned when ``config.output_attentions=True``) | |
list of ``torch.FloatTensor`` (one for each layer) of shape ``(batch_size, num_heads, sequence_length, sequence_length)``: | |
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention heads. | |
Examples:: | |
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') | |
model = BertForSequenceClassification.from_pretrained('bert-base-uncased') | |
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 | |
labels = torch.tensor([1]).unsqueeze(0) # Batch size 1 | |
outputs = model(input_ids, labels=labels) | |
loss, logits = outputs[:2] | |
""" | |
def __init__(self, config): | |
super(BertForSequenceClassification, self).__init__(config) | |
self.num_labels = config.num_labels | |
self.bert = BertModel(config) | |
self.dropout = nn.Dropout(config.hidden_dropout_prob) | |
self.classifier = nn.Linear(config.hidden_size, self.config.num_labels) | |
self.apply(self.init_weights) | |
def forward(self, input_ids, token_type_ids=None, attention_mask=None, labels=None, | |
position_ids=None, head_mask=None): | |
outputs = self.bert(input_ids, position_ids=position_ids, token_type_ids=token_type_ids, | |
attention_mask=attention_mask, head_mask=head_mask) | |
pooled_output = outputs[1] | |
pooled_output = self.dropout(pooled_output) | |
logits = self.classifier(pooled_output) | |
outputs = (logits,) + outputs[2:] # add hidden states and attention if they are here | |
if labels is not None: | |
if self.num_labels == 1: | |
# We are doing regression | |
loss_fct = MSELoss() | |
loss = loss_fct(logits.view(-1), labels.view(-1)) | |
else: | |
loss_fct = CrossEntropyLoss() | |
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) | |
outputs = (loss,) + outputs | |
return outputs # (loss), logits, (hidden_states), (attentions) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class RobertaForSequenceClassification(BertPreTrainedModel): | |
r""" | |
**labels**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size,)``: | |
Labels for computing the sequence classification/regression loss. | |
Indices should be in ``[0, ..., config.num_labels]``. | |
If ``config.num_labels == 1`` a regression loss is computed (Mean-Square loss), | |
If ``config.num_labels > 1`` a classification loss is computed (Cross-Entropy). | |
Outputs: `Tuple` comprising various elements depending on the configuration (config) and inputs: | |
**loss**: (`optional`, returned when ``labels`` is provided) ``torch.FloatTensor`` of shape ``(1,)``: | |
Classification (or regression if config.num_labels==1) loss. | |
**logits**: ``torch.FloatTensor`` of shape ``(batch_size, config.num_labels)`` | |
Classification (or regression if config.num_labels==1) scores (before SoftMax). | |
**hidden_states**: (`optional`, returned when ``config.output_hidden_states=True``) | |
list of ``torch.FloatTensor`` (one for the output of each layer + the output of the embeddings) | |
of shape ``(batch_size, sequence_length, hidden_size)``: | |
Hidden-states of the model at the output of each layer plus the initial embedding outputs. | |
**attentions**: (`optional`, returned when ``config.output_attentions=True``) | |
list of ``torch.FloatTensor`` (one for each layer) of shape ``(batch_size, num_heads, sequence_length, sequence_length)``: | |
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention heads. | |
Examples:: | |
tokenizer = RoertaTokenizer.from_pretrained('roberta-base') | |
model = RobertaForSequenceClassification.from_pretrained('roberta-base') | |
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 | |
labels = torch.tensor([1]).unsqueeze(0) # Batch size 1 | |
outputs = model(input_ids, labels=labels) | |
loss, logits = outputs[:2] | |
""" | |
config_class = RobertaConfig | |
pretrained_model_archive_map = ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP | |
base_model_prefix = "roberta" | |
def __init__(self, config): | |
super(RobertaForSequenceClassification, self).__init__(config) | |
self.num_labels = config.num_labels | |
self.roberta = RobertaModel(config) | |
self.classifier = RobertaClassificationHead(config) | |
def forward(self, input_ids, token_type_ids=None, attention_mask=None, labels=None, | |
position_ids=None, head_mask=None): | |
outputs = self.roberta(input_ids, position_ids=position_ids, token_type_ids=token_type_ids, | |
attention_mask=attention_mask, head_mask=head_mask) | |
sequence_output = outputs[0] | |
logits = self.classifier(sequence_output) | |
outputs = (logits,) + outputs[2:] | |
if labels is not None: | |
if self.num_labels == 1: | |
# We are doing regression | |
loss_fct = MSELoss() | |
loss = loss_fct(logits.view(-1), labels.view(-1)) | |
else: | |
loss_fct = CrossEntropyLoss() | |
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) | |
outputs = (loss,) + outputs | |
return outputs # (loss), logits, (hidden_states), (attentions) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment