Skip to content

Instantly share code, notes, and snippets.

@3outeille
Created November 16, 2023 22:58
Show Gist options
  • Save 3outeille/b73be24d209b25e136bb39a57d78a807 to your computer and use it in GitHub Desktop.
Save 3outeille/b73be24d209b25e136bb39a57d78a807 to your computer and use it in GitHub Desktop.
from copy import deepcopy
import torch
from datasets import load_dataset
from torch.optim import SGD
from torch.utils.data import DataLoader
from transformers import AutoModelForCausalLM, AutoTokenizer
import random
import os
import numpy as np
def seed_everything(seed: int):
random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = True
if __name__ == "__main__":
import wandb
# MODEL = "bigscience/bloom-560m"
MODEL = "gpt2"
DATASET = "imdb"
NUM_EPOCHS = 25
LR = 1e-2
SEED = 69
torch.cuda.empty_cache()
seed_everything(SEED)
train_dataset = load_dataset("imdb", split="train[:1]")
train_dataset = train_dataset.map(lambda x: {"text": "I rented I"})
train_dataloader = DataLoader(train_dataset, batch_size=1, shuffle=False)
model_cpu = AutoModelForCausalLM.from_pretrained(MODEL)
model_gpu = deepcopy(model_cpu).to("cuda")
tokenizer = AutoTokenizer.from_pretrained(MODEL)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.add_special_tokens({"pad_token": "[PAD]"})
optim_cpu = SGD(model_cpu.parameters(), lr=LR)
optim_gpu = SGD(model_gpu.parameters(), lr=LR)
model_cpu.train()
model_gpu.train()
def get_time_name():
import datetime
today = datetime.datetime.now()
return today.strftime("%d/%m/%Y_%H:%M:%S")
wandb.init(
project="sanity-check",
name=f"{get_time_name()}.test",
config={
"model": MODEL,
"dataset": DATASET,
"epochs": NUM_EPOCHS,
"learning_rate": LR,
"seed": SEED,
},
)
for epoch in range(NUM_EPOCHS):
for batch in train_dataloader:
inputs = tokenizer(batch["text"], padding=True, truncation=True, return_tensors="pt")
inputs_cpu = {name: tensor for name, tensor in inputs.items()}
inputs_gpu = {name: tensor.to("cuda") for name, tensor in inputs.items()}
labels_cpu = inputs_cpu["input_ids"]
labels_gpu = inputs_gpu["input_ids"]
outputs_cpu = model_cpu(**inputs_cpu, labels=labels_cpu)
outputs_gpu = model_gpu(**inputs_gpu, labels=labels_gpu)
optim_cpu.zero_grad()
outputs_cpu.loss.backward()
optim_cpu.step()
optim_gpu.zero_grad()
outputs_gpu.loss.backward()
optim_gpu.step()
wandb.log({"train_loss_cpu": outputs_cpu.loss, "train_loss_gpu": outputs_gpu.loss, "epoch": epoch})
wandb.finish()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment