Skip to content

Instantly share code, notes, and snippets.

View Sayeet's full-sized avatar

Saeed Tamboli Sayeet

View GitHub Profile
import random
import numpy as np
'''
based on the `run_glue.py` script here:
https://github.com/huggingface/transformers/blob/5bfcd0485ece086ebcbed2d008813037968a9e58/examples/run_glue.py#L128
+ this:
https://mccormickml.com/2019/07/22/BERT-fine-tuning/
'''
train_dataset = TensorDataset(train_input_ids, train_attention_masks, train_labels)
val_dataset = TensorDataset(val_input_ids, val_attention_masks, val_labels)
test_dataset = TensorDataset(test_input_ids,test_attention_masks,test_labels)
train_dataloader = DataLoader(
train_dataset,
sampler = RandomSampler(train_dataset),
batch_size = 16
)
def tokenize(comments,labels):
input_ids_list = []
attention_masks_list= []
for comment in comments:
encoded_dict = tokenizer.encode_plus(
comment,
add_special_tokens = True,
max_length = 64,