Skip to content

Instantly share code, notes, and snippets.

View AlaGrine's full-sized avatar

Ala Eddine GRINE AlaGrine

View GitHub Profile
@AlaGrine
AlaGrine / Langchain_create_answer_cahin.py
Created February 14, 2024 22:27
create an answer chain in Langchain: Retrieve relevant documents, augment the LLM prompt and ask LLM.
from langchain.schema import Document
def _combine_documents(docs, document_prompt, document_separator="\n\n"):
doc_strings = [format_document(doc, document_prompt) for doc in docs]
return document_separator.join(doc_strings)
# Retrieve relevant documents
retrieved_documents = {
"docs": itemgetter("standalone_question") | retriever,
"question": lambda x: x["standalone_question"],
@AlaGrine
AlaGrine / Langchain_Create_standalone_question_chain.py
Last active February 14, 2024 22:24
Create_standalone_question_chain in Langchain
# 1. load memory using RunnableLambda. Retrieves the chat_history attribute using itemgetter.
# `RunnablePassthrough.assign` adds the chat_history to the assign function
loaded_memory = RunnablePassthrough.assign(
chat_history=RunnableLambda(memory.load_memory_variables) | itemgetter("chat_history"),
)
# 2. Pass the follow-up question along with the chat history to the LLM, and parse the answer (standalone_question).
condense_question_prompt = PromptTemplate(
@AlaGrine
AlaGrine / Langchain_create_Memory.py
Created February 14, 2024 22:17
Create Memory in Langchain
from langchain.memory import ConversationSummaryBufferMemory,ConversationBufferMemory
def create_memory(model_name='gpt-3.5-turbo',memory_max_token=None):
"""Creates a ConversationSummaryBufferMemory for gpt-3.5-turbo.
Creates a ConversationBufferMemory for the other models."""
if model_name=="gpt-3.5-turbo":
if memory_max_token is None:
memory_max_token = 1024 # max_tokens for 'gpt-3.5-turbo' = 4096
memory = ConversationSummaryBufferMemory(
@AlaGrine
AlaGrine / get_environment_varaibles.py
Created February 14, 2024 22:15
Get environment variables
def get_environment_variable(key):
if key in os.environ:
value = os.environ.get(key)
print(f"\n[INFO]: {key} retrieved successfully.")
else :
print(f"\n[ERROR]: {key} is not found in your environment variables.")
value = getpass(f"Insert your {key}")
return value
openai_api_key = get_environment_variable("OPENAI_API_KEY")
@AlaGrine
AlaGrine / Langchain_instantiate_LLM.py
Created February 14, 2024 22:12
Instantiate LLM in Langchain.
def instantiate_LLM(LLM_provider,api_key,temperature=0.5,top_p=0.95,model_name=None):
"""Instantiate LLM in Langchain.
Parameters:
LLM_provider (str): the LLM provider; in ["OpenAI","Google","HuggingFace"]
model_name (str): in ["gpt-3.5-turbo", "gpt-3.5-turbo-0125", "gpt-4-turbo-preview",
"gemini-pro", "mistralai/Mistral-7B-Instruct-v0.2"].
api_key (str): google_api_key or openai_api_key or huggingfacehub_api_token
temperature (float): Range: 0.0 - 1.0; default = 0.5
top_p (float): : Range: 0.0 - 1.0; default = 1.
"""
@AlaGrine
AlaGrine / Langchain_CohereRerank_retriever.py
Created February 14, 2024 22:07
Build a ContextualCompressionRetriever using Cohere Rerank endpoint to reorder the results based on relevance.
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import CohereRerank
from langchain_community.llms import Cohere
def CohereRerank_retriever(
base_retriever,
cohere_api_key,cohere_model="rerank-multilingual-v2.0", top_n=8
):
"""Build a ContextualCompressionRetriever using Cohere Rerank endpoint to reorder the results based on relevance.
Parameters:
@AlaGrine
AlaGrine / Langchain_create_compression_retriever.py
Created February 14, 2024 22:05
Build a ContextualCompressionRetriever in Langchain
from langchain.retrievers.document_compressors import DocumentCompressorPipeline
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_transformers import EmbeddingsRedundantFilter,LongContextReorder
from langchain.retrievers.document_compressors import EmbeddingsFilter
from langchain.retrievers import ContextualCompressionRetriever
def create_compression_retriever(embeddings, base_retriever, chunk_size=500, k=16, similarity_threshold=None):
"""Build a ContextualCompressionRetriever.
We wrap the the base_retriever (a vectorstore-backed retriever) into a ContextualCompressionRetriever.
The compressor here is a Document Compressor Pipeline, which splits documents
@AlaGrine
AlaGrine / Langchain_Vectorstore_backed_retriever.py
Created February 14, 2024 22:01
Create a Vectorsore-backed retriever in Langchain
def Vectorstore_backed_retriever(
vectorstore,search_type="similarity",k=4,score_threshold=None
):
"""create a vectorsore-backed retriever
Parameters:
search_type: Defines the type of search that the Retriever should perform.
Can be "similarity" (default), "mmr", or "similarity_score_threshold"
k: number of documents to return (Default: 4)
score_threshold: Minimum relevance threshold for similarity_score_threshold (default=None)
"""
@AlaGrine
AlaGrine / Langchain_Documment_Loader.py
Created February 14, 2024 21:47
Load documnets in Langchain using DirectoryLoader
def langchain_document_loader(TMP_DIR):
"""
Load documents from the temporary directory (TMP_DIR).
Files can be in txt, pdf, CSV or docx format.
"""
documents = []
txt_loader = DirectoryLoader(
TMP_DIR.as_posix(), glob="**/*.txt", loader_cls=TextLoader, show_progress=True
@AlaGrine
AlaGrine / PyTorch_train_test_loop.py
Created December 24, 2023 17:14
Combine the train and test step and create a train test loop function
from tqdm.auto import tqdm
def train_and_evaluate(model: torch.nn.Module,
train_dataloader: torch.utils.data.DataLoader,
test_dataloader: torch.utils.data.DataLoader,
epochs: int = 5,
learning_rate = 0.001):
"""Train the model for a number of epochs, evaluate the model and track the results."""
# Create empty results dictionary