softaverse/llama3-with-langChain.py

## llama3-with-langChain.py
import bs4
from langchain import hub
from langchain_community.document_loaders import WebBaseLoader
from langchain_chroma import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_community.embeddings import GPT4AllEmbeddings
from langchain.prompts import PromptTemplate
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.llms import LlamaCpp

n_gpu_layers = -1
n_batch = 512
_model_path = "<your_gguf_model_path>"

llm = LlamaCpp(
    model_path=_model_path,
    n_gpu_layers=n_gpu_layers,
    n_batch=n_batch,
    f16_kv=True,
    temperature=0,
    top_p=1,
    n_ctx=8192
)

loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=250, chunk_overlap=0)
splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(documents=splits, embedding=GPT4AllEmbeddings())

retriever = vectorstore.as_retriever()
prompt = hub.pull("rlm/rag-prompt-llama3")


# prompt = PromptTemplate(
#     template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are an assistant for question-answering tasks.
#     Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know.
#     Use three sentences maximum and keep the answer concise <|eot_id|><|start_header_id|>user<|end_header_id|>
#     Question: {question}
#     Context: {context}
#     Answer: <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
#     input_variables=["question", "context"],
# )

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

rag_chain.invoke("What is Task Decomposition in the article?")
rag_chain.invoke("Show me all of the memory types in the article.")
	import bs4
	from langchain import hub
	from langchain_community.document_loaders import WebBaseLoader
	from langchain_chroma import Chroma
	from langchain_core.output_parsers import StrOutputParser
	from langchain_core.runnables import RunnablePassthrough
	from langchain_community.embeddings import GPT4AllEmbeddings
	from langchain.prompts import PromptTemplate
	from langchain_text_splitters import RecursiveCharacterTextSplitter
	from langchain_community.llms import LlamaCpp

	n_gpu_layers = -1
	n_batch = 512
	_model_path = "<your_gguf_model_path>"

	llm = LlamaCpp(
	model_path=_model_path,
	n_gpu_layers=n_gpu_layers,
	n_batch=n_batch,
	f16_kv=True,
	temperature=0,
	top_p=1,
	n_ctx=8192
	)

	loader = WebBaseLoader(
	web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
	bs_kwargs=dict(
	parse_only=bs4.SoupStrainer(
	class_=("post-content", "post-title", "post-header")
	)
	),
	)
	docs = loader.load()

	text_splitter = RecursiveCharacterTextSplitter(chunk_size=250, chunk_overlap=0)
	splits = text_splitter.split_documents(docs)
	vectorstore = Chroma.from_documents(documents=splits, embedding=GPT4AllEmbeddings())

	retriever = vectorstore.as_retriever()
	prompt = hub.pull("rlm/rag-prompt-llama3")


	# prompt = PromptTemplate(
	# template="""<\|begin_of_text\|><\|start_header_id\|>system<\|end_header_id\|> You are an assistant for question-answering tasks.
	# Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know.
	# Use three sentences maximum and keep the answer concise <\|eot_id\|><\|start_header_id\|>user<\|end_header_id\|>
	# Question: {question}
	# Context: {context}
	# Answer: <\|eot_id\|><\|start_header_id\|>assistant<\|end_header_id\|>""",
	# input_variables=["question", "context"],
	# )

	def format_docs(docs):
	return "\n\n".join(doc.page_content for doc in docs)

	rag_chain = (
	{"context": retriever \| format_docs, "question": RunnablePassthrough()}
	\| prompt
	\| llm
	\| StrOutputParser()
	)

	rag_chain.invoke("What is Task Decomposition in the article?")
	rag_chain.invoke("Show me all of the memory types in the article.")