Skip to content

Instantly share code, notes, and snippets.

@yuhanz
Last active April 17, 2024 06:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save yuhanz/740ab396f237e8d2f41f07a10ccd8de9 to your computer and use it in GitHub Desktop.
Save yuhanz/740ab396f237e8d2f41f07a10ccd8de9 to your computer and use it in GitHub Desktop.
Example of peft fine tuning with SFTtraininng
!pip install transformers==4.30
!pip install accelerate
!pip install trl peft
!pip install bitsandbytes
!pip install xformers==0.0.22
!pip install autoawq
from peft import LoraConfig
from peft import get_peft_model, PeftConfig, PeftModel, LoraConfig, prepare_model_for_kbit_training
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TrainingArguments, GenerationConfig
from datasets import Dataset
target_modules = ['q_proj','k_proj','v_proj','o_proj','gate_proj','down_proj','up_proj','lm_head']
lora_config = LoraConfig(
r=16,
target_modules = target_modules,
lora_alpha=8,
lora_dropout=0.05,
bias="none",
task_type="CAUSAL_LM",)
output_dir = "./tinyllama" # Model repo on your hugging face account where you want to save your model
per_device_train_batch_size = 3
gradient_accumulation_steps = 2
optim = "paged_adamw_32bit"
save_strategy="steps"
save_steps = 10
logging_steps = 10
learning_rate = 2e-3
max_grad_norm = 0.3 # Sets limit for gradient clipping
max_steps = 200 # Number of training steps
warmup_ratio = 0.03 # Portion of steps used for learning_rate to warmup from 0
lr_scheduler_type = "cosine" # I chose cosine to avoid learning plateaus
training_args = TrainingArguments(
output_dir=output_dir,
per_device_train_batch_size=per_device_train_batch_size,
gradient_accumulation_steps=gradient_accumulation_steps,
optim=optim,
save_steps=save_steps,
logging_steps=logging_steps,
learning_rate=learning_rate,
max_grad_norm=max_grad_norm,
max_steps=max_steps,
warmup_ratio=warmup_ratio,
lr_scheduler_type=lr_scheduler_type,
push_to_hub=False,
report_to='none'
)
bnb_config = BitsAndBytesConfig(
# load_in_8bit=True
)
# bnb_config = BitsAndBytesConfig(
# load_in_4bit=True,
# bnb_4bit_quant_type="nf4",
# bnb_4bit_use_double_quant=True,
# bnb_4bit_compute_dtype=torch.bfloat16,
# )
model_name = "PY007/TinyLlama-1.1B-step-50K-105b"
model = AutoModelForCausalLM.from_pretrained(
model_name,
# quantization_config=bnb_config,
device_map="auto",
trust_remote_code=True,
)
train_data_seg = ["This is very interesting.", "This is super interesting.", "This is nice"]
eval_data_seg = ["How is this?"]
# TODO: load train data
train_dataset = Dataset.from_dict({'text':train_data_seg})
eval_dataset = Dataset.from_dict({'text':eval_data_seg})
from trl import SFTTrainer
trainer = SFTTrainer(
peft_model,
train_dataset = train_dataset,
eval_dataset = eval_dataset,
dataset_text_field="text",
max_seq_length=256,
args=training_args,
)
trainer.train()
merged_model = peft_model.merge_and_unload()
merged_model.save_pretrained("/tmp/my-model")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment