decagondev/raggy.py

## raggy.py
import os
import fitz  # PyMuPDF
from llama_index import GPTSimpleVectorIndex, SimpleDirectoryReader, LLMPredictor, ServiceContext, Document
from llama_index.prompts import PromptTemplate
from ollama_llama import Llama

class PDFDirectoryReader(SimpleDirectoryReader):
    def __init__(self, directory_path):
        self.directory_path = directory_path

    def load_data(self):
        documents = []
        for filename in os.listdir(self.directory_path):
            if filename.endswith('.pdf'):
                file_path = os.path.join(self.directory_path, filename)
                text = self._extract_text_from_pdf(file_path)
                documents.append(Document(page_content=text, metadata={'source': filename}))
        return documents

    def _extract_text_from_pdf(self, file_path):
        text = ""
        with fitz.open(file_path) as pdf:
            for page in pdf:
                text += page.get_text()
        return text

# Define your prompt
prompt = "How has Berkshire Hathaway's investment in Coca-cola grown?"

# Set up the Llama3 LLM
llm = Llama(api_key="your-ollama-api-key", model_name="llama3")
llm_predictor = LLMPredictor(llm=llm)

# Load your documents into the index from a directory containing PDFs
documents = PDFDirectoryReader('path/to/your/documents').load_data()

# Create the GPT Simple Vector Index
index = GPTSimpleVectorIndex.from_documents(documents, llm_predictor=llm_predictor)

# Create the service context
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor)

# Querying the vector database for "relevant" docs
relevant_docs = index.query(prompt, service_context=service_context)
context = "\n".join([doc.page_content for doc in relevant_docs])

for doc in relevant_docs:
    print(f"Source: {doc.metadata.get('source', 'unknown')}\nContent: {doc.page_content}\n\n")
print("__________________________")

# Adding context to our prompt
template = PromptTemplate(template="{query} Context: {context}", input_variables=["query", "context"])
prompt_with_context = template.invoke({"query": prompt, "context": context})

# Asking the LLM for a response from our prompt with the provided context
results = llm.invoke(prompt_with_context)

print(results.content)
	import os
	import fitz # PyMuPDF
	from llama_index import GPTSimpleVectorIndex, SimpleDirectoryReader, LLMPredictor, ServiceContext, Document
	from llama_index.prompts import PromptTemplate
	from ollama_llama import Llama

	class PDFDirectoryReader(SimpleDirectoryReader):
	def __init__(self, directory_path):
	self.directory_path = directory_path

	def load_data(self):
	documents = []
	for filename in os.listdir(self.directory_path):
	if filename.endswith('.pdf'):
	file_path = os.path.join(self.directory_path, filename)
	text = self._extract_text_from_pdf(file_path)
	documents.append(Document(page_content=text, metadata={'source': filename}))
	return documents

	def _extract_text_from_pdf(self, file_path):
	text = ""
	with fitz.open(file_path) as pdf:
	for page in pdf:
	text += page.get_text()
	return text

	# Define your prompt
	prompt = "How has Berkshire Hathaway's investment in Coca-cola grown?"

	# Set up the Llama3 LLM
	llm = Llama(api_key="your-ollama-api-key", model_name="llama3")
	llm_predictor = LLMPredictor(llm=llm)

	# Load your documents into the index from a directory containing PDFs
	documents = PDFDirectoryReader('path/to/your/documents').load_data()

	# Create the GPT Simple Vector Index
	index = GPTSimpleVectorIndex.from_documents(documents, llm_predictor=llm_predictor)

	# Create the service context
	service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor)

	# Querying the vector database for "relevant" docs
	relevant_docs = index.query(prompt, service_context=service_context)
	context = "\n".join([doc.page_content for doc in relevant_docs])

	for doc in relevant_docs:
	print(f"Source: {doc.metadata.get('source', 'unknown')}\nContent: {doc.page_content}\n\n")
	print("__________________________")

	# Adding context to our prompt
	template = PromptTemplate(template="{query} Context: {context}", input_variables=["query", "context"])
	prompt_with_context = template.invoke({"query": prompt, "context": context})

	# Asking the LLM for a response from our prompt with the provided context
	results = llm.invoke(prompt_with_context)

	print(results.content)