Skip to content

Instantly share code, notes, and snippets.

@MohanSaiTeki
Last active May 5, 2024 05:40
Show Gist options
  • Save MohanSaiTeki/ccabe7350d451ca71c017a97ab3fd233 to your computer and use it in GitHub Desktop.
Save MohanSaiTeki/ccabe7350d451ca71c017a97ab3fd233 to your computer and use it in GitHub Desktop.
This gist is used in blog post
from flask import Flask, request, jsonify
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import os
os.environ["HF_TOKEN"]="HF_TOKEN"
app = Flask(__name__)
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
model = AutoModelForCausalLM.from_pretrained(
model_id,
torch_dtype="auto",
device_map="auto",
load_in_4bit=True
)
tokenizer = AutoTokenizer.from_pretrained(model_id)
pipe = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
)
@app.route("/generate")
def generate_text():
query = request.args.get("query")
print("User query ", query)
messages = [
{
"role" : "system",
"content" : "Your a good chatbot who answers the users questions"},
{
"role" : "user",
"content" : query
},
]
response = pipe(messages, max_new_tokens=256)
return jsonify({"response" : response[0]["generated_text"][-1]["content"] })
if __name__ == '__main__':
app.run(debug=True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment