siddhsql/gist:3e12570f2352b706ec147fb57431feb9 Secret

## gistfile1.txt
import torch
from torch import nn
from transformers import AutoTokenizer, AutoConfig, Trainer, TrainingArguments
from datasets import load_dataset
from sklearn.metrics import accuracy_score, f1_score

class Embeddings(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.token_embeddings = nn.Embedding(config.vocab_size, # 30522
                                             config.hidden_size) # 768
        self.position_embeddings = nn.Embedding(config.max_position_embeddings, # 512
                                                config.hidden_size) # 768
        self.layer_norm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps) # 1e-12
        self.dropout = nn.Dropout(config.hidden_dropout_prob) # 0.1. in original code sample no argument is given, making the prob. 0.5

    def forward(self, input_ids):
        seq_length = input_ids.size(1) # get the number of columns in given matrix
        position_ids = torch.arange(seq_length, dtype=torch.long).unsqueeze(0) # vector of numbers [0,1,2,...,seq_length - 1]
        token_embeddings = self.token_embeddings(input_ids) # 64x85x768 tensor. each token is mapped to a 768 length real vector
        position_embeddings = self.position_embeddings(position_ids) # 64x85x768 tensor.
        embeddings = token_embeddings + position_embeddings
        embeddings = self.layer_norm(embeddings) # 64x85x768 tensor.
        embeddings = self.dropout(embeddings)   # will randomly zero out some of the responses
        return embeddings

class TransformerForSequenceClassification(nn.Module):
    def __init__(self, config, num_labels):
        super().__init__()
        self.embeddings = Embeddings(config)
        encoder_layer = nn.TransformerEncoderLayer(d_model=config.hidden_size, nhead=config.num_attention_heads) # 12
        self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=config.num_hidden_layers) # 12
        self.dropout = nn.Dropout(config.hidden_dropout_prob) # 0.1
        self.classifier = nn.Linear(config.hidden_size, num_labels) # create a linear transformation that can transform a 768-d vector to 6-d vector

    def forward(self, input_ids, **kwargs):
        x = self.embeddings(input_ids) # will apply the forward function to input_ids. each input_id ranges from [0, vocab_size)
        x = self.encoder(x)[:, 0, :] # select hidden state of [CLS] token. this is commonly used as the feature for classification task.
        x = self.dropout(x)      # 10% of entries in the matrix will be zeroed out. this is a regularization technique used to prevent overfitting.
        x = self.classifier(x)   # 64x6 matrix. the classifier takes the 768 feature vector and transforms it into logits via a matrix multiplication.
        return x   # x is a matrix of logits for each of the label. logit(p) = log(p / (1 - p)). each label has a non-zero prob.

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    f1 = f1_score(labels, preds, average="weighted")
    acc = accuracy_score(labels, preds)
    return { "accuracy": acc, "f1": f1 }

def create_training_args(emotions_encoded):
    batch_size = 64
    logging_steps = len(emotions_encoded["train"]) // batch_size
    return TrainingArguments(output_dir='out',
                             num_train_epochs=2,
                             learning_rate=1e-5,
                             per_device_train_batch_size=batch_size,
                             per_device_eval_batch_size=batch_size,
                             weight_decay=0.01,
                             evaluation_strategy="epoch",
                             disable_tqdm=False,
                             logging_steps=logging_steps,
                             push_to_hub=False,
                             log_level="error")

if __name__ == "__main__":
    def tokenize(batch):
        return tokenizer(batch["text"], padding=True, truncation=True, return_tensors="pt", add_special_tokens=False)

    emotions = load_dataset('emotion') # https://huggingface.co/docs/datasets/v1.1.3/loading_datasets.html
    model_ckpt = "bert-base-uncased"
    config = AutoConfig.from_pretrained(model_ckpt)
    tokenizer = AutoTokenizer.from_pretrained(model_ckpt)
    transformer = TransformerForSequenceClassification(config, 6) # there are 6 labels in the emotion dataset
    emotions_encoded = emotions.map(tokenize, batched=True, batch_size=None)
    training_args = create_training_args(emotions_encoded)
    trainer = Trainer(model=transformer,
                      args=training_args,
                      compute_metrics=compute_metrics,
                      train_dataset=emotions_encoded["train"],
                      eval_dataset=emotions_encoded["validation"]) # we are not passing any tokenizer as we have already tokenized the input
    trainer.train()
	import torch
	from torch import nn
	from transformers import AutoTokenizer, AutoConfig, Trainer, TrainingArguments
	from datasets import load_dataset
	from sklearn.metrics import accuracy_score, f1_score

	class Embeddings(nn.Module):
	def __init__(self, config):
	super().__init__()
	self.token_embeddings = nn.Embedding(config.vocab_size, # 30522
	config.hidden_size) # 768
	self.position_embeddings = nn.Embedding(config.max_position_embeddings, # 512
	config.hidden_size) # 768
	self.layer_norm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps) # 1e-12
	self.dropout = nn.Dropout(config.hidden_dropout_prob) # 0.1. in original code sample no argument is given, making the prob. 0.5

	def forward(self, input_ids):
	seq_length = input_ids.size(1) # get the number of columns in given matrix
	position_ids = torch.arange(seq_length, dtype=torch.long).unsqueeze(0) # vector of numbers [0,1,2,...,seq_length - 1]
	token_embeddings = self.token_embeddings(input_ids) # 64x85x768 tensor. each token is mapped to a 768 length real vector
	position_embeddings = self.position_embeddings(position_ids) # 64x85x768 tensor.
	embeddings = token_embeddings + position_embeddings
	embeddings = self.layer_norm(embeddings) # 64x85x768 tensor.
	embeddings = self.dropout(embeddings) # will randomly zero out some of the responses
	return embeddings

	class TransformerForSequenceClassification(nn.Module):
	def __init__(self, config, num_labels):
	super().__init__()
	self.embeddings = Embeddings(config)
	encoder_layer = nn.TransformerEncoderLayer(d_model=config.hidden_size, nhead=config.num_attention_heads) # 12
	self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=config.num_hidden_layers) # 12
	self.dropout = nn.Dropout(config.hidden_dropout_prob) # 0.1
	self.classifier = nn.Linear(config.hidden_size, num_labels) # create a linear transformation that can transform a 768-d vector to 6-d vector

	def forward(self, input_ids, **kwargs):
	x = self.embeddings(input_ids) # will apply the forward function to input_ids. each input_id ranges from [0, vocab_size)
	x = self.encoder(x)[:, 0, :] # select hidden state of [CLS] token. this is commonly used as the feature for classification task.
	x = self.dropout(x) # 10% of entries in the matrix will be zeroed out. this is a regularization technique used to prevent overfitting.
	x = self.classifier(x) # 64x6 matrix. the classifier takes the 768 feature vector and transforms it into logits via a matrix multiplication.
	return x # x is a matrix of logits for each of the label. logit(p) = log(p / (1 - p)). each label has a non-zero prob.

	def compute_metrics(pred):
	labels = pred.label_ids
	preds = pred.predictions.argmax(-1)
	f1 = f1_score(labels, preds, average="weighted")
	acc = accuracy_score(labels, preds)
	return { "accuracy": acc, "f1": f1 }

	def create_training_args(emotions_encoded):
	batch_size = 64
	logging_steps = len(emotions_encoded["train"]) // batch_size
	return TrainingArguments(output_dir='out',
	num_train_epochs=2,
	learning_rate=1e-5,
	per_device_train_batch_size=batch_size,
	per_device_eval_batch_size=batch_size,
	weight_decay=0.01,
	evaluation_strategy="epoch",
	disable_tqdm=False,
	logging_steps=logging_steps,
	push_to_hub=False,
	log_level="error")

	if __name__ == "__main__":
	def tokenize(batch):
	return tokenizer(batch["text"], padding=True, truncation=True, return_tensors="pt", add_special_tokens=False)

	emotions = load_dataset('emotion') # https://huggingface.co/docs/datasets/v1.1.3/loading_datasets.html
	model_ckpt = "bert-base-uncased"
	config = AutoConfig.from_pretrained(model_ckpt)
	tokenizer = AutoTokenizer.from_pretrained(model_ckpt)
	transformer = TransformerForSequenceClassification(config, 6) # there are 6 labels in the emotion dataset
	emotions_encoded = emotions.map(tokenize, batched=True, batch_size=None)
	training_args = create_training_args(emotions_encoded)
	trainer = Trainer(model=transformer,
	args=training_args,
	compute_metrics=compute_metrics,
	train_dataset=emotions_encoded["train"],
	eval_dataset=emotions_encoded["validation"]) # we are not passing any tokenizer as we have already tokenized the input
	trainer.train()