Skip to content

Instantly share code, notes, and snippets.

@ton77v
Created September 16, 2023 08:54
Show Gist options
  • Save ton77v/30e6ee168e1210bf57cd70ad495ad522 to your computer and use it in GitHub Desktop.
Save ton77v/30e6ee168e1210bf57cd70ad495ad522 to your computer and use it in GitHub Desktop.
MapRerankDocumentsChain issue: empty answers ranked 90-100
import os
import tempfile
from pathlib import Path
import pprint
import pdfkit # pip install pdfkit
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import PyPDFLoader
from langchain.schema import Document
from langchain.chains import MapRerankDocumentsChain
from langchain.chains.question_answering import load_qa_chain
from dotenv import load_dotenv
pp = pprint.PrettyPrinter(indent=2)
load_dotenv()
OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')
# the issue occurs often, but not all the time
# but with Ethereum whitepaper there are many issues =)
ethereum_wp_url = 'https://ethereum.org/en/whitepaper/'
with tempfile.TemporaryDirectory() as tmp_dir:
tmp_pdf = Path(tmp_dir, '_eth.pdf')
pdfkit.from_url(ethereum_wp_url, tmp_pdf)
loader = PyPDFLoader(str(tmp_pdf))
pages: list[Document] = loader.load_and_split()
os.unlink(tmp_pdf)
precise_chat_model = ChatOpenAI(
model_name='gpt-3.5-turbo',
temperature=0,
openai_api_key=OPENAI_API_KEY
)
# noinspection PyTypeChecker
qa_chain: MapRerankDocumentsChain = load_qa_chain(
llm=precise_chat_model,
chain_type='map_rerank',
verbose=True,
return_intermediate_steps=True
)
question = 'please explain Code Execution'
query = {'input_documents': pages[:5], # will get an empty answer
'question': question}
answer = qa_chain(query, return_only_outputs=True)
pp.pprint(answer)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment