Skip to content

Instantly share code, notes, and snippets.

import requests
llama2_url = "http://localhost:8000/streaming"
while True:
# Get user input for the question
question = input("Type your question: ")
# Check if the user wants to exit the loop
if question.lower() == "exit":
#Create Method
@app.post('/streaming/')
async def main(question: Question):
return StreamingResponse(answer_question(question), media_type='text/event-stream')
#Define function for Llama2 Streaming
def answer_question(question: Question) -> str:
stream = llm( f"Question: {question.question} Answer:",
max_tokens=250,
stop=["/n","Question:", "Q:"],
stream=True
)
print('Answer starts...')
for out in stream:
#Create App
app = FastAPI()
#Loading model
print('Loading model...')
llm = Llama(model_path="/home/blabla/llama-2-13b-chat.ggmlv3.q4_0.bin")
print('Model loaded, lets rock!')
#Create Question Model
class Question(BaseModel):
author: str | None = None
question: str
import copy
from fastapi import FastAPI
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
from llama_cpp import Llama
#Create a second prompt
stream = llm( f"Q: So, tell me about your strenghts and weaknesses compared with ChatGPT. A:",
max_tokens=500,
stop=["/n","Question:", "Q:"],
echo=True,
)
stream['choices'][0]['text'].split('A: ',1)[1]
#Create a third prompt using context information
stream = llm( f"Q: Based on this information: {pdf_text_full}. Create a summary of what the song is about. A:",
max_tokens=500,
stop=["/n","Question:", "Q:"],
echo=True,
)
stream['choices'][0]['text'].split('A: ',1)[1]
#Set PDF path
pdf_path = './Stairway to heaven lyrics.pdf'
#Creating a pdf file object
pdfFileObj = open(pdf_path, 'rb')
#Creating a pdf reader object
pdfReader = PyPDF2.PdfReader(pdfFileObj)
#Creating a page object