tamerxkilinc/requirements.txt

## requirements.txt
faiss-cpu==1.7.4
langchain==0.1.5
langchain-community==0.0.17
langchain-openai==0.0.6
transformers~=4.37.2

## simple-rag-demo.py
from langchain.chains.question_answering import load_qa_chain
from langchain.embeddings import CacheBackedEmbeddings
from langchain.llms.huggingface_pipeline import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from langchain.storage import LocalFileStore
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline


# This function stores the embeddings of the documents. It uses the OpenAIEmbeddings model to generate the embeddings and stores them in a FAISS vector store. You can use other models and vector stores as well.
def store_embeddings(store, documents):
    underlying_embeddings = OpenAIEmbeddings(model="text-embedding-3-small",
                                             openai_api_key="...",
                                             dimensions=1024)
    cached_embedder = CacheBackedEmbeddings.from_bytes_store(
        underlying_embeddings, store, namespace=underlying_embeddings.model
    )
    db = FAISS.from_documents(documents, cached_embedder)
    return db


# This function creates a chain for question answering. It has the variables {context} and {question} in the prompt template, which will be replaced with the context and question respectively.
def create_chain(llm):
    qna_prompt_template = """### [INST] Aufgabe: Dir wird eine Frage gestellt. Du erhältst relevante Daten für die Beantwortung der Frage. Deine Aufgabe ist es eine Antwort für die Frage mit Hilfe der relevanten Daten zu generieren. Wenn die Daten, die du erhältst, nicht ausreichen um die Frage zu beantworten, teile dem User mit, dass du nicht genügend Informationen hast um die Frage zu beantworten.'

    {context}

    ### Frage: {question} [/INST]"""

    PROMPT = PromptTemplate(
        template=qna_prompt_template, input_variables=["context", "question"]
    )
    return load_qa_chain(llm, chain_type="stuff", prompt=PROMPT)


# This function uses a similarity search to find the most relevant documents to the question, replaces the variables {context} and {question} in the prompt with the context and question respectively, and generates an answer using the chain.
def ask(chain, db, question):
    retriever = db.as_retriever(search_type="similarity")
    context = retriever.get_relevant_documents(question)

    answer = (chain({"input_documents": context, "question": question}, return_only_outputs=True))['output_text']
    return answer


def main():
    store = LocalFileStore("./cache/")
    # There are other models and tokenizers available as well. You can use any of them. The instruction fine-tuned models work best with the langchain pipeline.
    model_id = "mistralai/Mistral-7B-Instruct-v0.2"
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    model = AutoModelForCausalLM.from_pretrained(model_id)
    pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=150)
    llm = HuggingFacePipeline(pipeline=pipe)

    loader = PyPDFLoader("my_document.pdf")
    documents = loader.load_and_split()
    db = store_embeddings(store, documents)

    chain = create_chain(llm)
    user_question = input("User: ")

    answer = ask(chain, db, user_question)
    print("Answer:", answer)


main()
	faiss-cpu==1.7.4
	langchain==0.1.5
	langchain-community==0.0.17
	langchain-openai==0.0.6
	transformers~=4.37.2
	from langchain.chains.question_answering import load_qa_chain
	from langchain.embeddings import CacheBackedEmbeddings
	from langchain.llms.huggingface_pipeline import HuggingFacePipeline
	from langchain.prompts import PromptTemplate
	from langchain.storage import LocalFileStore
	from langchain_community.document_loaders import PyPDFLoader
	from langchain_community.vectorstores import FAISS
	from langchain_openai import OpenAIEmbeddings
	from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline


	# This function stores the embeddings of the documents. It uses the OpenAIEmbeddings model to generate the embeddings and stores them in a FAISS vector store. You can use other models and vector stores as well.
	def store_embeddings(store, documents):
	underlying_embeddings = OpenAIEmbeddings(model="text-embedding-3-small",
	openai_api_key="...",
	dimensions=1024)
	cached_embedder = CacheBackedEmbeddings.from_bytes_store(
	underlying_embeddings, store, namespace=underlying_embeddings.model
	)
	db = FAISS.from_documents(documents, cached_embedder)
	return db


	# This function creates a chain for question answering. It has the variables {context} and {question} in the prompt template, which will be replaced with the context and question respectively.
	def create_chain(llm):
	qna_prompt_template = """### [INST] Aufgabe: Dir wird eine Frage gestellt. Du erhältst relevante Daten für die Beantwortung der Frage. Deine Aufgabe ist es eine Antwort für die Frage mit Hilfe der relevanten Daten zu generieren. Wenn die Daten, die du erhältst, nicht ausreichen um die Frage zu beantworten, teile dem User mit, dass du nicht genügend Informationen hast um die Frage zu beantworten.'

	{context}

	### Frage: {question} [/INST]"""

	PROMPT = PromptTemplate(
	template=qna_prompt_template, input_variables=["context", "question"]
	)
	return load_qa_chain(llm, chain_type="stuff", prompt=PROMPT)


	# This function uses a similarity search to find the most relevant documents to the question, replaces the variables {context} and {question} in the prompt with the context and question respectively, and generates an answer using the chain.
	def ask(chain, db, question):
	retriever = db.as_retriever(search_type="similarity")
	context = retriever.get_relevant_documents(question)

	answer = (chain({"input_documents": context, "question": question}, return_only_outputs=True))['output_text']
	return answer


	def main():
	store = LocalFileStore("./cache/")
	# There are other models and tokenizers available as well. You can use any of them. The instruction fine-tuned models work best with the langchain pipeline.
	model_id = "mistralai/Mistral-7B-Instruct-v0.2"
	tokenizer = AutoTokenizer.from_pretrained(model_id)
	model = AutoModelForCausalLM.from_pretrained(model_id)
	pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=150)
	llm = HuggingFacePipeline(pipeline=pipe)

	loader = PyPDFLoader("my_document.pdf")
	documents = loader.load_and_split()
	db = store_embeddings(store, documents)

	chain = create_chain(llm)
	user_question = input("User: ")

	answer = ask(chain, db, user_question)
	print("Answer:", answer)


	main()