This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from langchain.schema import Document | |
| def _combine_documents(docs, document_prompt, document_separator="\n\n"): | |
| doc_strings = [format_document(doc, document_prompt) for doc in docs] | |
| return document_separator.join(doc_strings) | |
| # Retrieve relevant documents | |
| retrieved_documents = { | |
| "docs": itemgetter("standalone_question") | retriever, | |
| "question": lambda x: x["standalone_question"], |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # 1. load memory using RunnableLambda. Retrieves the chat_history attribute using itemgetter. | |
| # `RunnablePassthrough.assign` adds the chat_history to the assign function | |
| loaded_memory = RunnablePassthrough.assign( | |
| chat_history=RunnableLambda(memory.load_memory_variables) | itemgetter("chat_history"), | |
| ) | |
| # 2. Pass the follow-up question along with the chat history to the LLM, and parse the answer (standalone_question). | |
| condense_question_prompt = PromptTemplate( |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from langchain.memory import ConversationSummaryBufferMemory,ConversationBufferMemory | |
| def create_memory(model_name='gpt-3.5-turbo',memory_max_token=None): | |
| """Creates a ConversationSummaryBufferMemory for gpt-3.5-turbo. | |
| Creates a ConversationBufferMemory for the other models.""" | |
| if model_name=="gpt-3.5-turbo": | |
| if memory_max_token is None: | |
| memory_max_token = 1024 # max_tokens for 'gpt-3.5-turbo' = 4096 | |
| memory = ConversationSummaryBufferMemory( |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def get_environment_variable(key): | |
| if key in os.environ: | |
| value = os.environ.get(key) | |
| print(f"\n[INFO]: {key} retrieved successfully.") | |
| else : | |
| print(f"\n[ERROR]: {key} is not found in your environment variables.") | |
| value = getpass(f"Insert your {key}") | |
| return value | |
| openai_api_key = get_environment_variable("OPENAI_API_KEY") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def instantiate_LLM(LLM_provider,api_key,temperature=0.5,top_p=0.95,model_name=None): | |
| """Instantiate LLM in Langchain. | |
| Parameters: | |
| LLM_provider (str): the LLM provider; in ["OpenAI","Google","HuggingFace"] | |
| model_name (str): in ["gpt-3.5-turbo", "gpt-3.5-turbo-0125", "gpt-4-turbo-preview", | |
| "gemini-pro", "mistralai/Mistral-7B-Instruct-v0.2"]. | |
| api_key (str): google_api_key or openai_api_key or huggingfacehub_api_token | |
| temperature (float): Range: 0.0 - 1.0; default = 0.5 | |
| top_p (float): : Range: 0.0 - 1.0; default = 1. | |
| """ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from langchain.retrievers import ContextualCompressionRetriever | |
| from langchain.retrievers.document_compressors import CohereRerank | |
| from langchain_community.llms import Cohere | |
| def CohereRerank_retriever( | |
| base_retriever, | |
| cohere_api_key,cohere_model="rerank-multilingual-v2.0", top_n=8 | |
| ): | |
| """Build a ContextualCompressionRetriever using Cohere Rerank endpoint to reorder the results based on relevance. | |
| Parameters: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from langchain.retrievers.document_compressors import DocumentCompressorPipeline | |
| from langchain.text_splitter import CharacterTextSplitter | |
| from langchain_community.document_transformers import EmbeddingsRedundantFilter,LongContextReorder | |
| from langchain.retrievers.document_compressors import EmbeddingsFilter | |
| from langchain.retrievers import ContextualCompressionRetriever | |
| def create_compression_retriever(embeddings, base_retriever, chunk_size=500, k=16, similarity_threshold=None): | |
| """Build a ContextualCompressionRetriever. | |
| We wrap the the base_retriever (a vectorstore-backed retriever) into a ContextualCompressionRetriever. | |
| The compressor here is a Document Compressor Pipeline, which splits documents |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def Vectorstore_backed_retriever( | |
| vectorstore,search_type="similarity",k=4,score_threshold=None | |
| ): | |
| """create a vectorsore-backed retriever | |
| Parameters: | |
| search_type: Defines the type of search that the Retriever should perform. | |
| Can be "similarity" (default), "mmr", or "similarity_score_threshold" | |
| k: number of documents to return (Default: 4) | |
| score_threshold: Minimum relevance threshold for similarity_score_threshold (default=None) | |
| """ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def langchain_document_loader(TMP_DIR): | |
| """ | |
| Load documents from the temporary directory (TMP_DIR). | |
| Files can be in txt, pdf, CSV or docx format. | |
| """ | |
| documents = [] | |
| txt_loader = DirectoryLoader( | |
| TMP_DIR.as_posix(), glob="**/*.txt", loader_cls=TextLoader, show_progress=True |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from tqdm.auto import tqdm | |
| def train_and_evaluate(model: torch.nn.Module, | |
| train_dataloader: torch.utils.data.DataLoader, | |
| test_dataloader: torch.utils.data.DataLoader, | |
| epochs: int = 5, | |
| learning_rate = 0.001): | |
| """Train the model for a number of epochs, evaluate the model and track the results.""" | |
| # Create empty results dictionary |
NewerOlder