Created
August 26, 2023 11:17
-
-
Save youtube-jocoding/9dd7cd3a6cea48bc27eeb43923b7849b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
__import__('pysqlite3') | |
import sys | |
sys.modules['sqlite3'] = sys.modules.pop('pysqlite3') | |
from langchain.document_loaders import PyPDFLoader | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.vectorstores import Chroma | |
from langchain.embeddings import OpenAIEmbeddings | |
from langchain.chat_models import ChatOpenAI | |
from langchain.chains import RetrievalQA | |
import streamlit as st | |
import tempfile | |
import os | |
from streamlit_extras.buy_me_a_coffee import button | |
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler | |
button(username="jocoding", floating=True, width=221) | |
#제목 | |
st.title("ChatPDF") | |
st.write("---") | |
#OpenAI KEY 입력 받기 | |
openai_key = st.text_input('OPEN_AI_API_KEY', type="password") | |
#파일 업로드 | |
uploaded_file = st.file_uploader("PDF 파일을 올려주세요!",type=['pdf']) | |
st.write("---") | |
def pdf_to_document(uploaded_file): | |
temp_dir = tempfile.TemporaryDirectory() | |
temp_filepath = os.path.join(temp_dir.name, uploaded_file.name) | |
with open(temp_filepath, "wb") as f: | |
f.write(uploaded_file.getvalue()) | |
loader = PyPDFLoader(temp_filepath) | |
pages = loader.load_and_split() | |
return pages | |
#업로드 되면 동작하는 코드 | |
if uploaded_file is not None: | |
pages = pdf_to_document(uploaded_file) | |
#Split | |
text_splitter = RecursiveCharacterTextSplitter( | |
# Set a really small chunk size, just to show. | |
chunk_size = 300, | |
chunk_overlap = 20, | |
length_function = len, | |
is_separator_regex = False, | |
) | |
texts = text_splitter.split_documents(pages) | |
#Embedding | |
embeddings_model = OpenAIEmbeddings(openai_api_key=openai_key) | |
# load it into Chroma | |
db = Chroma.from_documents(texts, embeddings_model) | |
#Stream 받아 줄 Hander 만들기 | |
from langchain.callbacks.base import BaseCallbackHandler | |
class StreamHandler(BaseCallbackHandler): | |
def __init__(self, container, initial_text=""): | |
self.container = container | |
self.text=initial_text | |
def on_llm_new_token(self, token: str, **kwargs) -> None: | |
self.text+=token | |
self.container.markdown(self.text) | |
#Question | |
st.header("PDF에게 질문해보세요!!") | |
question = st.text_input('질문을 입력하세요') | |
if st.button('질문하기'): | |
with st.spinner('Wait for it...'): | |
chat_box = st.empty() | |
stream_hander = StreamHandler(chat_box) | |
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, openai_api_key=openai_key, streaming=True, callbacks=[stream_hander]) | |
qa_chain = RetrievalQA.from_chain_type(llm,retriever=db.as_retriever()) | |
qa_chain({"query": question}) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
thank you