Skip to content

Instantly share code, notes, and snippets.

@youtube-jocoding
Created August 26, 2023 11:17
Show Gist options
  • Save youtube-jocoding/9dd7cd3a6cea48bc27eeb43923b7849b to your computer and use it in GitHub Desktop.
Save youtube-jocoding/9dd7cd3a6cea48bc27eeb43923b7849b to your computer and use it in GitHub Desktop.
__import__('pysqlite3')
import sys
sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
import streamlit as st
import tempfile
import os
from streamlit_extras.buy_me_a_coffee import button
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
button(username="jocoding", floating=True, width=221)
#제목
st.title("ChatPDF")
st.write("---")
#OpenAI KEY 입력 받기
openai_key = st.text_input('OPEN_AI_API_KEY', type="password")
#파일 업로드
uploaded_file = st.file_uploader("PDF 파일을 올려주세요!",type=['pdf'])
st.write("---")
def pdf_to_document(uploaded_file):
temp_dir = tempfile.TemporaryDirectory()
temp_filepath = os.path.join(temp_dir.name, uploaded_file.name)
with open(temp_filepath, "wb") as f:
f.write(uploaded_file.getvalue())
loader = PyPDFLoader(temp_filepath)
pages = loader.load_and_split()
return pages
#업로드 되면 동작하는 코드
if uploaded_file is not None:
pages = pdf_to_document(uploaded_file)
#Split
text_splitter = RecursiveCharacterTextSplitter(
# Set a really small chunk size, just to show.
chunk_size = 300,
chunk_overlap = 20,
length_function = len,
is_separator_regex = False,
)
texts = text_splitter.split_documents(pages)
#Embedding
embeddings_model = OpenAIEmbeddings(openai_api_key=openai_key)
# load it into Chroma
db = Chroma.from_documents(texts, embeddings_model)
#Stream 받아 줄 Hander 만들기
from langchain.callbacks.base import BaseCallbackHandler
class StreamHandler(BaseCallbackHandler):
def __init__(self, container, initial_text=""):
self.container = container
self.text=initial_text
def on_llm_new_token(self, token: str, **kwargs) -> None:
self.text+=token
self.container.markdown(self.text)
#Question
st.header("PDF에게 질문해보세요!!")
question = st.text_input('질문을 입력하세요')
if st.button('질문하기'):
with st.spinner('Wait for it...'):
chat_box = st.empty()
stream_hander = StreamHandler(chat_box)
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, openai_api_key=openai_key, streaming=True, callbacks=[stream_hander])
qa_chain = RetrievalQA.from_chain_type(llm,retriever=db.as_retriever())
qa_chain({"query": question})
@gigegim
Copy link

gigegim commented Aug 26, 2023

thank you

@gigegim
Copy link

gigegim commented Aug 26, 2023

thank you

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment