Skip to content

Instantly share code, notes, and snippets.

@decagondev
Created May 24, 2024 16:18
Show Gist options
  • Save decagondev/731f0d6953cc8724143e89d08e86d7b9 to your computer and use it in GitHub Desktop.
Save decagondev/731f0d6953cc8724143e89d08e86d7b9 to your computer and use it in GitHub Desktop.
Rag example with llama index
import os
import fitz # PyMuPDF
from llama_index import GPTSimpleVectorIndex, SimpleDirectoryReader, LLMPredictor, ServiceContext, Document
from llama_index.prompts import PromptTemplate
from ollama_llama import Llama
class PDFDirectoryReader(SimpleDirectoryReader):
def __init__(self, directory_path):
self.directory_path = directory_path
def load_data(self):
documents = []
for filename in os.listdir(self.directory_path):
if filename.endswith('.pdf'):
file_path = os.path.join(self.directory_path, filename)
text = self._extract_text_from_pdf(file_path)
documents.append(Document(page_content=text, metadata={'source': filename}))
return documents
def _extract_text_from_pdf(self, file_path):
text = ""
with fitz.open(file_path) as pdf:
for page in pdf:
text += page.get_text()
return text
# Define your prompt
prompt = "How has Berkshire Hathaway's investment in Coca-cola grown?"
# Set up the Llama3 LLM
llm = Llama(api_key="your-ollama-api-key", model_name="llama3")
llm_predictor = LLMPredictor(llm=llm)
# Load your documents into the index from a directory containing PDFs
documents = PDFDirectoryReader('path/to/your/documents').load_data()
# Create the GPT Simple Vector Index
index = GPTSimpleVectorIndex.from_documents(documents, llm_predictor=llm_predictor)
# Create the service context
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor)
# Querying the vector database for "relevant" docs
relevant_docs = index.query(prompt, service_context=service_context)
context = "\n".join([doc.page_content for doc in relevant_docs])
for doc in relevant_docs:
print(f"Source: {doc.metadata.get('source', 'unknown')}\nContent: {doc.page_content}\n\n")
print("__________________________")
# Adding context to our prompt
template = PromptTemplate(template="{query} Context: {context}", input_variables=["query", "context"])
prompt_with_context = template.invoke({"query": prompt, "context": context})
# Asking the LLM for a response from our prompt with the provided context
results = llm.invoke(prompt_with_context)
print(results.content)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment