Created
May 13, 2024 16:16
-
-
Save softaverse/b6346d7b68a8411b8ca2fccef7469292 to your computer and use it in GitHub Desktop.
Llama 3🦙🦙🦙 + llama.cpp + LangChain🦜🔗
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import bs4 | |
from langchain import hub | |
from langchain_community.document_loaders import WebBaseLoader | |
from langchain_chroma import Chroma | |
from langchain_core.output_parsers import StrOutputParser | |
from langchain_core.runnables import RunnablePassthrough | |
from langchain_community.embeddings import GPT4AllEmbeddings | |
from langchain.prompts import PromptTemplate | |
from langchain_text_splitters import RecursiveCharacterTextSplitter | |
from langchain_community.llms import LlamaCpp | |
n_gpu_layers = -1 | |
n_batch = 512 | |
_model_path = "<your_gguf_model_path>" | |
llm = LlamaCpp( | |
model_path=_model_path, | |
n_gpu_layers=n_gpu_layers, | |
n_batch=n_batch, | |
f16_kv=True, | |
temperature=0, | |
top_p=1, | |
n_ctx=8192 | |
) | |
loader = WebBaseLoader( | |
web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",), | |
bs_kwargs=dict( | |
parse_only=bs4.SoupStrainer( | |
class_=("post-content", "post-title", "post-header") | |
) | |
), | |
) | |
docs = loader.load() | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=250, chunk_overlap=0) | |
splits = text_splitter.split_documents(docs) | |
vectorstore = Chroma.from_documents(documents=splits, embedding=GPT4AllEmbeddings()) | |
retriever = vectorstore.as_retriever() | |
prompt = hub.pull("rlm/rag-prompt-llama3") | |
# prompt = PromptTemplate( | |
# template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are an assistant for question-answering tasks. | |
# Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. | |
# Use three sentences maximum and keep the answer concise <|eot_id|><|start_header_id|>user<|end_header_id|> | |
# Question: {question} | |
# Context: {context} | |
# Answer: <|eot_id|><|start_header_id|>assistant<|end_header_id|>""", | |
# input_variables=["question", "context"], | |
# ) | |
def format_docs(docs): | |
return "\n\n".join(doc.page_content for doc in docs) | |
rag_chain = ( | |
{"context": retriever | format_docs, "question": RunnablePassthrough()} | |
| prompt | |
| llm | |
| StrOutputParser() | |
) | |
rag_chain.invoke("What is Task Decomposition in the article?") | |
rag_chain.invoke("Show me all of the memory types in the article.") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment