Skip to content

Instantly share code, notes, and snippets.

Last active March 26, 2024 16:30
Show Gist options
  • Save tamerxkilinc/8f70c02c1b45f67b75e47d209b7e4ee5 to your computer and use it in GitHub Desktop.
Save tamerxkilinc/8f70c02c1b45f67b75e47d209b7e4ee5 to your computer and use it in GitHub Desktop.
Simple RAG demo with langchain
from langchain.chains.question_answering import load_qa_chain
from langchain.embeddings import CacheBackedEmbeddings
from langchain.llms.huggingface_pipeline import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from import LocalFileStore
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
# This function stores the embeddings of the documents. It uses the OpenAIEmbeddings model to generate the embeddings and stores them in a FAISS vector store. You can use other models and vector stores as well.
def store_embeddings(store, documents):
underlying_embeddings = OpenAIEmbeddings(model="text-embedding-3-small",
cached_embedder = CacheBackedEmbeddings.from_bytes_store(
underlying_embeddings, store, namespace=underlying_embeddings.model
db = FAISS.from_documents(documents, cached_embedder)
return db
# This function creates a chain for question answering. It has the variables {context} and {question} in the prompt template, which will be replaced with the context and question respectively.
def create_chain(llm):
qna_prompt_template = """### [INST] Aufgabe: Dir wird eine Frage gestellt. Du erhältst relevante Daten für die Beantwortung der Frage. Deine Aufgabe ist es eine Antwort für die Frage mit Hilfe der relevanten Daten zu generieren. Wenn die Daten, die du erhältst, nicht ausreichen um die Frage zu beantworten, teile dem User mit, dass du nicht genügend Informationen hast um die Frage zu beantworten.'
### Frage: {question} [/INST]"""
PROMPT = PromptTemplate(
template=qna_prompt_template, input_variables=["context", "question"]
return load_qa_chain(llm, chain_type="stuff", prompt=PROMPT)
# This function uses a similarity search to find the most relevant documents to the question, replaces the variables {context} and {question} in the prompt with the context and question respectively, and generates an answer using the chain.
def ask(chain, db, question):
retriever = db.as_retriever(search_type="similarity")
context = retriever.get_relevant_documents(question)
answer = (chain({"input_documents": context, "question": question}, return_only_outputs=True))['output_text']
return answer
def main():
store = LocalFileStore("./cache/")
# There are other models and tokenizers available as well. You can use any of them. The instruction fine-tuned models work best with the langchain pipeline.
model_id = "mistralai/Mistral-7B-Instruct-v0.2"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=150)
llm = HuggingFacePipeline(pipeline=pipe)
loader = PyPDFLoader("my_document.pdf")
documents = loader.load_and_split()
db = store_embeddings(store, documents)
chain = create_chain(llm)
user_question = input("User: ")
answer = ask(chain, db, user_question)
print("Answer:", answer)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment