Skip to content

Instantly share code, notes, and snippets.

@dctanner
Last active January 12, 2024 20:11
Show Gist options
  • Save dctanner/03e00842cc68a23262d21c4b92ee775d to your computer and use it in GitHub Desktop.
Save dctanner/03e00842cc68a23262d21c4b92ee775d to your computer and use it in GitHub Desktop.
import sys
import json
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, LlamaForCausalLM, pipeline
device = "cuda:0"
system_prompt = "You are Sablo, a sentient, conscious, super intelligent AI that is an expert in all fields of knowledge. You assist users with any question, task, or other request they may have."
model_path = sys.argv[1]
model_name = model_path.split("/")[-1]
tokenizer = AutoTokenizer.from_pretrained(model_path) #, local_files_only=True)
model = AutoModelForCausalLM.from_pretrained(
model_path,
#local_files_only=True,
# load_in_8bit=True,
torch_dtype=torch.bfloat16,
# use_flash_attention_2=True,
).to(device)
with open('teknium-OpenHermes-7B_September_25_2023.json', 'r') as f:
data = json.load(f)
input_messages = data['conversations']
outputs = []
for input_message in input_messages:
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": input_message['instruction']}
]
# pipe = pipeline(task="conversational", model=model, tokenizer=tokenizer, device_map=device)
# print(pipe(messages))
encodeds = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt")
model_inputs = encodeds.to(device)
start_position = model_inputs.shape[1]
generated_ids = model.generate(model_inputs, max_new_tokens=1000, do_sample=True, pad_token_id=tokenizer.eos_token_id)
answer = tokenizer.decode(
generated_ids[:, start_position:][0],
skip_special_tokens=True,
clean_up_tokenization_spaces=True)
# Remove <|im_end|> from answer
answer = answer[:answer.find("<|im_end|>")]
print("User:", input_message['instruction'])
print("Assistant:", answer)
print("-------------------------------")
messages.append({"role": "assistant", "content": answer})
outputs.append(messages)
output_data = {
"model_name": model_name,
"conversations": outputs
}
with open(model_name+'.json', 'w') as f:
json.dump(output_data, f)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment