Skip to content

Instantly share code, notes, and snippets.

@svpino
Last active January 25, 2024 13:12
Show Gist options
  • Save svpino/3136b69c29b6a22712a1a1cd806bee4e to your computer and use it in GitHub Desktop.
Save svpino/3136b69c29b6a22712a1a1cd806bee4e to your computer and use it in GitHub Desktop.
Running a fine-tuned Llama 2 model
import torch
import peft
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM
from huggingface_hub.hf_api import HfFolder
ACCESS_TOKEN = "ENTER YOUR HUGGINGFACE ACCESS TOKEN HERE"
HfFolder.save_token(ACCESS_TOKEN)
load_in_8bit = False
base_model = "meta-llama/Llama-2-7b-chat-hf"
lora_weights = "qblocks/llama2-7b-tiny-codes-code-generation"
tokenizer = AutoTokenizer.from_pretrained(lora_weights)
model = AutoModelForCausalLM.from_pretrained(
base_model,
device_map="auto",
torch_dtype=torch.float16,
load_in_8bit=False
)
model = peft.PeftModel.from_pretrained(model, lora_weights)
generator = transformers.pipeline(
"text-generation",
model=model,
tokenizer=tokenizer
)
prompt = "Write a Python function to divide 2 numbers and check for division by zero."
generation_config = transformers.GenerationConfig(
temperature=0.4,
top_p=0.99,
top_k=40,
num_beams=2,
max_new_tokens=400,
repetition_penalty=1.3
)
t = generator(prompt, generation_config=generation_config)
finetuned_generation = t[0]["generated_text"]
print(finetuned_generation)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment