Ala Eddine GRINE AlaGrine

## Langchain_create_answer_cahin.py
from langchain.schema import Document

def _combine_documents(docs, document_prompt, document_separator="\n\n"):
    doc_strings = [format_document(doc, document_prompt) for doc in docs]
    return document_separator.join(doc_strings)

# Retrieve relevant documents
retrieved_documents = {
    "docs": itemgetter("standalone_question") | retriever,
    "question": lambda x: x["standalone_question"],

## Langchain_Create_standalone_question_chain.py
# 1. load memory using RunnableLambda. Retrieves the chat_history attribute using itemgetter.
# `RunnablePassthrough.assign` adds the chat_history to the assign function

loaded_memory = RunnablePassthrough.assign(
    chat_history=RunnableLambda(memory.load_memory_variables) | itemgetter("chat_history"),
)

# 2. Pass the follow-up question along with the chat history to the LLM, and parse the answer (standalone_question).

condense_question_prompt = PromptTemplate(

## Langchain_create_Memory.py
from langchain.memory import ConversationSummaryBufferMemory,ConversationBufferMemory

def create_memory(model_name='gpt-3.5-turbo',memory_max_token=None):
    """Creates a ConversationSummaryBufferMemory for gpt-3.5-turbo.
    Creates a ConversationBufferMemory for the other models."""

    if model_name=="gpt-3.5-turbo":
        if memory_max_token is None:
            memory_max_token = 1024 # max_tokens for 'gpt-3.5-turbo' = 4096
        memory = ConversationSummaryBufferMemory(

## get_environment_varaibles.py
def get_environment_variable(key):
    if key in os.environ:
        value = os.environ.get(key)
        print(f"\n[INFO]: {key} retrieved successfully.")
    else :
        print(f"\n[ERROR]: {key} is not found in your environment variables.")
        value = getpass(f"Insert your {key}")
    return value

openai_api_key = get_environment_variable("OPENAI_API_KEY")

## Langchain_instantiate_LLM.py
def instantiate_LLM(LLM_provider,api_key,temperature=0.5,top_p=0.95,model_name=None):
    """Instantiate LLM in Langchain.
    Parameters:
        LLM_provider (str): the LLM provider; in ["OpenAI","Google","HuggingFace"]
        model_name (str): in ["gpt-3.5-turbo", "gpt-3.5-turbo-0125", "gpt-4-turbo-preview",
            "gemini-pro", "mistralai/Mistral-7B-Instruct-v0.2"].
        api_key (str): google_api_key or openai_api_key or huggingfacehub_api_token
        temperature (float): Range: 0.0 - 1.0; default = 0.5
        top_p (float): : Range: 0.0 - 1.0; default = 1.
    """

## Langchain_CohereRerank_retriever.py
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import CohereRerank
from langchain_community.llms import Cohere

def CohereRerank_retriever(
    base_retriever,
    cohere_api_key,cohere_model="rerank-multilingual-v2.0", top_n=8
):
    """Build a ContextualCompressionRetriever using Cohere Rerank endpoint to reorder the results based on relevance.
    Parameters:

## Langchain_create_compression_retriever.py
from langchain.retrievers.document_compressors import DocumentCompressorPipeline
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_transformers import EmbeddingsRedundantFilter,LongContextReorder
from langchain.retrievers.document_compressors import EmbeddingsFilter
from langchain.retrievers import ContextualCompressionRetriever

def create_compression_retriever(embeddings, base_retriever, chunk_size=500, k=16, similarity_threshold=None):
    """Build a ContextualCompressionRetriever.
    We wrap the the base_retriever (a vectorstore-backed retriever) into a ContextualCompressionRetriever.
    The compressor here is a Document Compressor Pipeline, which splits documents

## Langchain_Vectorstore_backed_retriever.py
def Vectorstore_backed_retriever(
vectorstore,search_type="similarity",k=4,score_threshold=None
):
    """create a vectorsore-backed retriever
    Parameters:
        search_type: Defines the type of search that the Retriever should perform.
            Can be "similarity" (default), "mmr", or "similarity_score_threshold"
        k: number of documents to return (Default: 4)
        score_threshold: Minimum relevance threshold for similarity_score_threshold (default=None)
    """

## Langchain_Documment_Loader.py
def langchain_document_loader(TMP_DIR):
    """
    Load documents from the temporary directory (TMP_DIR).
    Files can be in txt, pdf, CSV or docx format.
    """

    documents = []

    txt_loader = DirectoryLoader(
        TMP_DIR.as_posix(), glob="**/*.txt", loader_cls=TextLoader, show_progress=True

## PyTorch_train_test_loop.py
from tqdm.auto import tqdm

def train_and_evaluate(model: torch.nn.Module,
                       train_dataloader: torch.utils.data.DataLoader,
                       test_dataloader: torch.utils.data.DataLoader,
                       epochs: int = 5,
                       learning_rate = 0.001):
    """Train the model for a number of epochs, evaluate the model and track the results."""

    # Create empty results dictionary
	from langchain.schema import Document

	def _combine_documents(docs, document_prompt, document_separator="\n\n"):
	doc_strings = [format_document(doc, document_prompt) for doc in docs]
	return document_separator.join(doc_strings)

	# Retrieve relevant documents
	retrieved_documents = {
	"docs": itemgetter("standalone_question") \| retriever,
	"question": lambda x: x["standalone_question"],
	# 1. load memory using RunnableLambda. Retrieves the chat_history attribute using itemgetter.
	# `RunnablePassthrough.assign` adds the chat_history to the assign function

	loaded_memory = RunnablePassthrough.assign(
	chat_history=RunnableLambda(memory.load_memory_variables) \| itemgetter("chat_history"),
	)

	# 2. Pass the follow-up question along with the chat history to the LLM, and parse the answer (standalone_question).

	condense_question_prompt = PromptTemplate(
	from langchain.memory import ConversationSummaryBufferMemory,ConversationBufferMemory

	def create_memory(model_name='gpt-3.5-turbo',memory_max_token=None):
	"""Creates a ConversationSummaryBufferMemory for gpt-3.5-turbo.
	Creates a ConversationBufferMemory for the other models."""

	if model_name=="gpt-3.5-turbo":
	if memory_max_token is None:
	memory_max_token = 1024 # max_tokens for 'gpt-3.5-turbo' = 4096
	memory = ConversationSummaryBufferMemory(
	def get_environment_variable(key):
	if key in os.environ:
	value = os.environ.get(key)
	print(f"\n[INFO]: {key} retrieved successfully.")
	else :
	print(f"\n[ERROR]: {key} is not found in your environment variables.")
	value = getpass(f"Insert your {key}")
	return value

	openai_api_key = get_environment_variable("OPENAI_API_KEY")
	def instantiate_LLM(LLM_provider,api_key,temperature=0.5,top_p=0.95,model_name=None):
	"""Instantiate LLM in Langchain.
	Parameters:
	LLM_provider (str): the LLM provider; in ["OpenAI","Google","HuggingFace"]
	model_name (str): in ["gpt-3.5-turbo", "gpt-3.5-turbo-0125", "gpt-4-turbo-preview",
	"gemini-pro", "mistralai/Mistral-7B-Instruct-v0.2"].
	api_key (str): google_api_key or openai_api_key or huggingfacehub_api_token
	temperature (float): Range: 0.0 - 1.0; default = 0.5
	top_p (float): : Range: 0.0 - 1.0; default = 1.
	"""
	from langchain.retrievers import ContextualCompressionRetriever
	from langchain.retrievers.document_compressors import CohereRerank
	from langchain_community.llms import Cohere

	def CohereRerank_retriever(
	base_retriever,
	cohere_api_key,cohere_model="rerank-multilingual-v2.0", top_n=8
	):
	"""Build a ContextualCompressionRetriever using Cohere Rerank endpoint to reorder the results based on relevance.
	Parameters:
	from langchain.retrievers.document_compressors import DocumentCompressorPipeline
	from langchain.text_splitter import CharacterTextSplitter
	from langchain_community.document_transformers import EmbeddingsRedundantFilter,LongContextReorder
	from langchain.retrievers.document_compressors import EmbeddingsFilter
	from langchain.retrievers import ContextualCompressionRetriever

	def create_compression_retriever(embeddings, base_retriever, chunk_size=500, k=16, similarity_threshold=None):
	"""Build a ContextualCompressionRetriever.
	We wrap the the base_retriever (a vectorstore-backed retriever) into a ContextualCompressionRetriever.
	The compressor here is a Document Compressor Pipeline, which splits documents
	def Vectorstore_backed_retriever(
	vectorstore,search_type="similarity",k=4,score_threshold=None
	):
	"""create a vectorsore-backed retriever
	Parameters:
	search_type: Defines the type of search that the Retriever should perform.
	Can be "similarity" (default), "mmr", or "similarity_score_threshold"
	k: number of documents to return (Default: 4)
	score_threshold: Minimum relevance threshold for similarity_score_threshold (default=None)
	"""
	def langchain_document_loader(TMP_DIR):
	"""
	Load documents from the temporary directory (TMP_DIR).
	Files can be in txt, pdf, CSV or docx format.
	"""

	documents = []

	txt_loader = DirectoryLoader(
	TMP_DIR.as_posix(), glob="*/.txt", loader_cls=TextLoader, show_progress=True
	from tqdm.auto import tqdm

	def train_and_evaluate(model: torch.nn.Module,
	train_dataloader: torch.utils.data.DataLoader,
	test_dataloader: torch.utils.data.DataLoader,
	epochs: int = 5,
	learning_rate = 0.001):
	"""Train the model for a number of epochs, evaluate the model and track the results."""

	# Create empty results dictionary