Skip to content

Instantly share code, notes, and snippets.

@softaverse
Created May 13, 2024 16:16
Show Gist options
  • Save softaverse/b6346d7b68a8411b8ca2fccef7469292 to your computer and use it in GitHub Desktop.
Save softaverse/b6346d7b68a8411b8ca2fccef7469292 to your computer and use it in GitHub Desktop.
Llama 3🦙🦙🦙 + llama.cpp + LangChain🦜🔗
import bs4
from langchain import hub
from langchain_community.document_loaders import WebBaseLoader
from langchain_chroma import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_community.embeddings import GPT4AllEmbeddings
from langchain.prompts import PromptTemplate
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.llms import LlamaCpp
n_gpu_layers = -1
n_batch = 512
_model_path = "<your_gguf_model_path>"
llm = LlamaCpp(
model_path=_model_path,
n_gpu_layers=n_gpu_layers,
n_batch=n_batch,
f16_kv=True,
temperature=0,
top_p=1,
n_ctx=8192
)
loader = WebBaseLoader(
web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
bs_kwargs=dict(
parse_only=bs4.SoupStrainer(
class_=("post-content", "post-title", "post-header")
)
),
)
docs = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=250, chunk_overlap=0)
splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(documents=splits, embedding=GPT4AllEmbeddings())
retriever = vectorstore.as_retriever()
prompt = hub.pull("rlm/rag-prompt-llama3")
# prompt = PromptTemplate(
# template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are an assistant for question-answering tasks.
# Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know.
# Use three sentences maximum and keep the answer concise <|eot_id|><|start_header_id|>user<|end_header_id|>
# Question: {question}
# Context: {context}
# Answer: <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
# input_variables=["question", "context"],
# )
def format_docs(docs):
return "\n\n".join(doc.page_content for doc in docs)
rag_chain = (
{"context": retriever | format_docs, "question": RunnablePassthrough()}
| prompt
| llm
| StrOutputParser()
)
rag_chain.invoke("What is Task Decomposition in the article?")
rag_chain.invoke("Show me all of the memory types in the article.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment