This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
from controller import Controller | |
import gradio as gr | |
os.environ["TOKENIZERS_PARALLELISM"] = "false" | |
controller = Controller() | |
def process_pdf(file): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
OPENAI_API_KEY = os.getenv(OPENAI_API_KEY) | |
TEXT_VECTORSTORE_PATH = "data\deeplake_text_vectorstore" | |
CHARACTER_SPLITTER_CHUNK_SIZE = 75 | |
OPENAI_EMBEDDINGS_CHUNK_SIZE = 16 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from langchain.callbacks import get_openai_callback | |
def save(query, qa): | |
with get_openai_callback() as cb: | |
response = qa({"query": query}, return_only_outputs=True) | |
return response["result"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def retrieve_text(self, query): | |
self.text_deeplake_schema = DeepLake( | |
dataset_path=cfg.TEXT_VECTORSTORE_PATH, | |
read_only=True, | |
embedding_function=self.embeddings, | |
) | |
prompt_template = """You are an intelligent AI which analyses text from documents and | |
answers the user's questions. Please answer in as much detail as possible, so that the user does not have to | |
revisit the document. If you don't know the answer, say that you don't know, and avoid making up things. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def create_and_add_embeddings(self, file): | |
os.makedirs("data", exist_ok=True) | |
self.embeddings = OpenAIEmbeddings( | |
openai_api_key=cfg.OPENAI_API_KEY, | |
chunk_size=cfg.OPENAI_EMBEDDINGS_CHUNK_SIZE, | |
) | |
loader = PyMuPDFLoader(file) | |
documents = loader.load() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Retriever: | |
def __init__(self): | |
self.text_retriever = None | |
self.text_deeplake_schema = None | |
self.embeddings = None | |
self.memory = ConversationBufferWindowMemory(k=2, return_messages=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
from langchain import PromptTemplate | |
from langchain.embeddings import OpenAIEmbeddings | |
from langchain.vectorstores.deeplake import DeepLake | |
from langchain.document_loaders import TextLoader | |
from langchain.text_splitter import CharacterTextSplitter | |
from langchain.document_loaders import PyMuPDFLoader | |
from langchain.chat_models.openai import ChatOpenAI | |
from langchain.chains import RetrievalQA | |
from langchain.memory import ConversationBufferWindowMemory |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from retriever.retrieval import Retriever | |
class Controller: | |
def __init__(self): | |
self.retriever = None | |
self.query = "" | |
def embed_document(self, file): | |
if file is not None: | |
self.retriever = Retriever() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
print('Original dataset shape {}'.format(Counter(Y))) | |
print('Resampled dataset shape {}'.format(Counter(y_train_res1))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from imblearn.combine import SMOTETomek | |
# Implementing Oversampling for Handling Imbalanced | |
smk = SMOTETomek(random_state=42) | |
X_res,y_res=smk.fit_sample(X,Y) | |
os_us = SMOTETomek(ratio=0.5) | |
X_train_res1, y_train_res1 = os_us.fit_sample(X, Y) | |
X_train_res1.shape,y_train_res1.shape |
NewerOlder