Skip to content

Instantly share code, notes, and snippets.

@eramax
Forked from Vaibhavs10/intel_chat_inference.py
Created November 22, 2023 19:44
Show Gist options
  • Save eramax/c190fd3ad6f142037d2ef1c0e6711b50 to your computer and use it in GitHub Desktop.
Save eramax/c190fd3ad6f142037d2ef1c0e6711b50 to your computer and use it in GitHub Desktop.
import transformers
model_name = 'Intel/neural-chat-7b-v3-1'
model = transformers.AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)
def generate_response(system_input, user_input):
# Format the input using the provided template
prompt = f"### System:\n{system_input}\n### User:\n{user_input}\n### Assistant:\n"
# Tokenize and encode the prompt
inputs = tokenizer.encode(prompt, return_tensors="pt")
# Generate a response
outputs = model.generate(inputs, max_length=1000, num_return_sequences=1)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Extract only the assistant's response
return response.split("### Assistant:\n")[-1]
# Example usage
system_input = "Please answer all questions to the best of your ability."
user_input = "How does the neural-chat-7b-v3-1 model work?"
response = generate_response(system_input, user_input)
# Generate
print(response)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment