Skip to content

Instantly share code, notes, and snippets.

@fsndzomga
Created September 3, 2023 21:58
Show Gist options
  • Save fsndzomga/a05ac2b694e7a837544279d0abe35104 to your computer and use it in GitHub Desktop.
Save fsndzomga/a05ac2b694e7a837544279d0abe35104 to your computer and use it in GitHub Desktop.
Pdf bot created using EmbedChain
import os
import PyPDF2
from embedchain import App
from config import Config
from embedchain.config import ChunkerConfig, AddConfig
OPENAI_KEY = Config.OPENAI_KEY
chunker_config = ChunkerConfig(chunk_size=500, chunk_overlap=100)
class PdfBot:
def __init__(self, pdf_stream):
os.environ["OPENAI_API_KEY"] = OPENAI_KEY
self.pdf_bot = App()
self.pdf_stream = pdf_stream
self.initialize_bot()
def initialize_bot(self):
pdf_text = ""
pdf_reader = PyPDF2.PdfReader(self.pdf_stream)
num_pages = len(pdf_reader.pages)
for page_num in range(num_pages):
page = pdf_reader.pages[page_num]
page_text = page.extract_text()
pdf_text += page_text
self.pdf_bot.add(pdf_text, config=AddConfig(chunker=chunker_config),
data_type='text')
def query_bot(self, question):
return self.pdf_bot.chat(question)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment