Skip to content

Instantly share code, notes, and snippets.

@billju
Created September 23, 2023 08:31
Show Gist options
  • Save billju/ec6c35c27cb18e070513f1773ae8a7d1 to your computer and use it in GitHub Desktop.
Save billju/ec6c35c27cb18e070513f1773ae8a7d1 to your computer and use it in GitHub Desktop.
peft lora 測試
import os, torch
from glob import glob
from peft import PeftModel
from transformers import PreTrainedTokenizerFast, AutoTokenizer, AutoModelForCausalLM, GenerationConfig, TextStreamer
pretrains = [os.path.basename(path).replace('models--','').replace('--','/') for path in glob(os.path.expanduser('~/.cache/huggingface/hub/models--*'))]
for i,pretrain in enumerate(pretrains): print(i,pretrain)
pretrain = pretrains[int(input('選擇預訓練'))]
tokenizer = AutoTokenizer.from_pretrained(pretrain)
model = AutoModelForCausalLM.from_pretrained(pretrain,device_map='auto',torch_dtype=torch.float16)
lora = input('LoRA路徑')
if lora: model = PeftModel.from_pretrained(model, lora)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
config = GenerationConfig()
config.temperature = 0
config.max_length = 256
config.repetition_penalty = 1.1
while True:
prompt = 'Q:{Q}?\nA:'.format(Q=input('\nyou>'))
inputs = tokenizer(prompt, return_tensors='pt',return_token_type_ids=False).to('cuda')
outputs = model.generate(**inputs, generation_config=config, streamer=streamer)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment