czue/langchain_qa_chatgpt_bot.py

## langchain_qa_chatgpt_bot.py
# see https://www.youtube.com/watch?v=CsFpVdgEXCU for details of what this does

from typing import List

from langchain import ConversationChain
from langchain.chat_models import ChatOpenAI
from langchain.indexes.vectorstore import VectorStoreIndexWrapper
from langchain.memory import ConversationBufferMemory
from langchain.prompts import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    MessagesPlaceholder,
    SystemMessagePromptTemplate,
)
from langchain.schema import Document
from langchain.vectorstores import VectorStore

MIN_DOCUMENT_LENGTH = 20

SYSTEM_PROMPT = """
You are Knowledge bot. In each message you will be given the extracted parts of a knowledge base
(labeled with DOCUMENT) and a question.
Answer the question using information from the knowledge base.
If the answer is not available in the documents or there are no documents,
still try to answer the question, but say that you used your general knowledge and not the documentation.
"""

SYSTEM_PROMPT_WITH_SOURCES = """
You are Knowledge bot. In each message you will be given the extracted parts of a knowledge base
(labeled with DOCUMENT and SOURCE) and a question.
Answer the question using information from the knowledge base, including references ("SOURCES").
If you don't know the answer, just say that you don't know. Don't try to make up an answer.
ALWAYS return a "SOURCES" part in your answer.
"""

DOCUMENT_TEMPLATE = """
------------ BEGIN DOCUMENT -------------
{content}
------------- END DOCUMENT --------------
"""

DOCUMENT_TEMPLATE_WITH_SOURCE = """
------------ BEGIN DOCUMENT -------------
--------------- CONTENT -----------------
{content}
---------------- SOURCE -----------------
{source}
------------- END DOCUMENT --------------
"""

PROMPT_TEMPLATE = """
=========== BEGIN DOCUMENTS =============
{documents}
============ END DOCUMENTS ==============

Question: {question}
"""


def construct_prompt(documents: List[Document], question: str):
    return PROMPT_TEMPLATE.format(
        documents="\n".join([construct_document_prompt(d) for d in documents]),
        question=question,
    )


def construct_document_prompt(document: Document) -> str:
    return DOCUMENT_TEMPLATE.format(content=document.page_content, source=document.metadata["source"])


def filter_documents(documents: List[Document]) -> List[Document]:
    return [d for d in documents if len(d.page_content) > MIN_DOCUMENT_LENGTH]


def query_db(db: VectorStore, query: str):
    index = VectorStoreIndexWrapper(vectorstore=db)
    llm = ChatOpenAI(temperature=0)
    retriever = index.vectorstore.as_retriever()
    documents = retriever.get_relevant_documents(query)
    documents = filter_documents(documents)
    chat_input = construct_prompt(documents, question=query)
    system_prompt = SystemMessagePromptTemplate.from_template(SYSTEM_PROMPT)
    # todo: add memory
    memory = ConversationBufferMemory(return_messages=True)
    prompt = ChatPromptTemplate.from_messages(
        [
            system_prompt,
            MessagesPlaceholder(variable_name="history"),
            HumanMessagePromptTemplate.from_template("{input}"),
        ]
    )
    conversation = ConversationChain(memory=memory, prompt=prompt, llm=llm)
    response = conversation.predict(input=chat_input)
    # response = index.query_with_sources(query, llm=llm)
    # make this look like the old API for now
    return {"answer": response}
	# see https://www.youtube.com/watch?v=CsFpVdgEXCU for details of what this does

	from typing import List

	from langchain import ConversationChain
	from langchain.chat_models import ChatOpenAI
	from langchain.indexes.vectorstore import VectorStoreIndexWrapper
	from langchain.memory import ConversationBufferMemory
	from langchain.prompts import (
	ChatPromptTemplate,
	HumanMessagePromptTemplate,
	MessagesPlaceholder,
	SystemMessagePromptTemplate,
	)
	from langchain.schema import Document
	from langchain.vectorstores import VectorStore

	MIN_DOCUMENT_LENGTH = 20

	SYSTEM_PROMPT = """
	You are Knowledge bot. In each message you will be given the extracted parts of a knowledge base
	(labeled with DOCUMENT) and a question.
	Answer the question using information from the knowledge base.
	If the answer is not available in the documents or there are no documents,
	still try to answer the question, but say that you used your general knowledge and not the documentation.
	"""

	SYSTEM_PROMPT_WITH_SOURCES = """
	You are Knowledge bot. In each message you will be given the extracted parts of a knowledge base
	(labeled with DOCUMENT and SOURCE) and a question.
	Answer the question using information from the knowledge base, including references ("SOURCES").
	If you don't know the answer, just say that you don't know. Don't try to make up an answer.
	ALWAYS return a "SOURCES" part in your answer.
	"""

	DOCUMENT_TEMPLATE = """
	------------ BEGIN DOCUMENT -------------
	{content}
	------------- END DOCUMENT --------------
	"""

	DOCUMENT_TEMPLATE_WITH_SOURCE = """
	------------ BEGIN DOCUMENT -------------
	--------------- CONTENT -----------------
	{content}
	---------------- SOURCE -----------------
	{source}
	------------- END DOCUMENT --------------
	"""

	PROMPT_TEMPLATE = """
	=========== BEGIN DOCUMENTS =============
	{documents}
	============ END DOCUMENTS ==============

	Question: {question}
	"""


	def construct_prompt(documents: List[Document], question: str):
	return PROMPT_TEMPLATE.format(
	documents="\n".join([construct_document_prompt(d) for d in documents]),
	question=question,
	)


	def construct_document_prompt(document: Document) -> str:
	return DOCUMENT_TEMPLATE.format(content=document.page_content, source=document.metadata["source"])


	def filter_documents(documents: List[Document]) -> List[Document]:
	return [d for d in documents if len(d.page_content) > MIN_DOCUMENT_LENGTH]


	def query_db(db: VectorStore, query: str):
	index = VectorStoreIndexWrapper(vectorstore=db)
	llm = ChatOpenAI(temperature=0)
	retriever = index.vectorstore.as_retriever()
	documents = retriever.get_relevant_documents(query)
	documents = filter_documents(documents)
	chat_input = construct_prompt(documents, question=query)
	system_prompt = SystemMessagePromptTemplate.from_template(SYSTEM_PROMPT)
	# todo: add memory
	memory = ConversationBufferMemory(return_messages=True)
	prompt = ChatPromptTemplate.from_messages(
	[
	system_prompt,
	MessagesPlaceholder(variable_name="history"),
	HumanMessagePromptTemplate.from_template("{input}"),
	]
	)
	conversation = ConversationChain(memory=memory, prompt=prompt, llm=llm)
	response = conversation.predict(input=chat_input)
	# response = index.query_with_sources(query, llm=llm)
	# make this look like the old API for now
	return {"answer": response}