Skip to content

Instantly share code, notes, and snippets.

View charlieoneill11's full-sized avatar

Charlie O'Neill charlieoneill11

View GitHub Profile
class TicTacToe:
def __init__(self):
# Initialise an empty board
self.board = ['-' for _ in range(9)]
self.current_player = 'X' # X will start
def make_move(self, position):
"""Make a move on the board."""
if self.board[position] == '-':
self.board[position] = self.current_player
def generate_text(model, messages):
response = openai.ChatCompletion.create(
model=model,
messages=messages,
)
return response.choices[0].message.content.strip()
def generate_problematic_prompt(adversarial_model, messages):
problematic_prompt = generate_text(adversarial_model, messages)
return problematic_prompt
preds_output = trainer.predict(offensive_encoded["validation"])
preds_output.metrics
> {'test_loss': 0.42069146037101746,
'test_accuracy': 0.8013595166163142,
'test_f1': 0.8079235239615667,
'test_runtime': 3.3561,
'test_samples_per_second': 394.507,
'test_steps_per_second': 6.257}
from sklearn.dummy import DummyClassifier
dummy_clf = DummyClassifier(strategy="most_frequent")
dummy_clf.fit(X_train, y_train)
dummy_clf.score(X_valid, y_valid)
> 0.653323262839879
from transformers import Trainer
trainer = Trainer(model=model, args=training_args,
compute_metrics=compute_metrics,
train_dataset=offensive_encoded["train"],
eval_dataset=offensive_encoded["validation"],
tokenizer=tokenizer)
trainer.train();
from transformers import Trainer, TrainingArguments
batch_size = 64
logging_steps = len(offensive_encoded["train"]) // batch_size
model_name = f"{model_ckpt}-finetuned-tweet_eval-offensive"
training_args = TrainingArguments(output_dir=model_name,
num_train_epochs=2,
learning_rate=2e-5,
per_device_train_batch_size=batch_size,
per_device_eval_batch_size=batch_size,
from sklearn.metrics import accuracy_score, f1_score
def compute_metrics(pred):
labels = pred.label_ids
preds = pred.predictions.argmax(-1)
f1 = f1_score(labels, preds, average="weighted")
acc = accuracy_score(labels, preds)
return {"accuracy": acc, "f1": f1}
from transformers import AutoModelForSequenceClassification
num_labels = 2
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = (AutoModelForSequenceClassification
.from_pretrained(model_ckpt, num_labels=num_labels)
.to(device))
offensive_encoded = offensive.map(tokenize, batched=True, batch_size=None)
print(offensive_encoded["train"].column_names)
> ['text', 'label', 'input_ids', 'attention_mask']
def tokenize(batch):
return tokenizer(batch["text"], padding=True, truncation=True)
print(tokenize(offensive["train"][:2]))
> {'input_ids': [[101, 1030, 5310, 23648, 1012, 1012, 1012, 2040, 14977, 1012, 2574, 2111, 2097, 3305, 2008, 2027, 5114, 2498, 2013, 2206, 1037, 6887, 16585, 8958, 1012, 2468, 1037, 3003, 1997, 2115, 2111, 2612, 2030, 2393, 1998, 2490, 2115, 3507, 2406, 3549, 1012, 102], [101, 1030, 5310, 2809, 2086, 1996, 10643, 6380, 8112, 1521, 1055, 11214, 1012, 7987, 20175, 8237, 7747, 19006, 2003, 2004, 6887, 16585, 2004, 2037, 8275, 2343, 1012, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]}