Skip to content

Instantly share code, notes, and snippets.

@gautamborgohain
Last active August 22, 2024 08:10
Show Gist options
  • Save gautamborgohain/8fa1120c1cf8b76d14fdaaee249c261c to your computer and use it in GitHub Desktop.
Save gautamborgohain/8fa1120c1cf8b76d14fdaaee249c261c to your computer and use it in GitHub Desktop.
Simple script implementing a command line interface to run ReAct agent on PDFs and CSV using local LLM or OpenAI
"""
This script creates and interacts with ReActAgents for PDF and CSV data.
Dependencies (install in your virtual environment) :
python = "3.11.1"
llama-index-agent-openai = "^0.2.5"
openai = "^1.30.1"
pandas = "^2.2.2"
fitz = "^0.0.1.dev2"
pymupdf = "^1.24.4"
llama-index-llms-openai = "^0.1.19"
llama-index = "^0.10.37"
llama-index-experimental = "^0.1.3"
python-dotenv = "^1.0.1"
"""
import argparse
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
from llama_index.llms.openai import OpenAI
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from glob import glob
from dotenv import load_dotenv
import pandas as pd
import datetime
from llama_index.experimental.query_engine import PandasQueryEngine
from llama_index.agent.openai import OpenAIAgent
from llama_index.core.agent import ReActAgent
import argparse
# Load environment variables with OpenAI API key
# I put my OpenAI API key in a .env file
# but you can also set it as an environment variable OPENAI_API_KEY
load_dotenv()
# Initialize OpenAI models
gpt4 = OpenAI(model="gpt-4o")
local_llm = OpenAI(api_base="http://localhost:1234/v1", api_key="lm-studio")
# Define dir where vector indexes will be stored
vector_indexes_path = "data/vector_store_indexes"
def create_pdf_gpt_agent(
pdfs_path, embedding_llm, agent_llm, agent_name, agent_description, verbose
):
"""
Create a ReActAgent for PDF data.
Args:
pdfs_path (str): Path to the PDF files.
embedding_llm (OpenAI): OpenAI model for embedding PDF content.
agent_llm (OpenAI): OpenAI model for generating responses.
agent_name (str): Name of the agent.
agent_description (str): Description of the agent.
Returns:
ReActAgent: The created ReActAgent.
"""
# Read PDF files
pdf_files = glob(pdfs_path)
pdf_content_list = SimpleDirectoryReader(input_files=pdf_files).load_data()
# Build vector index
vector_index = VectorStoreIndex.from_documents(pdf_content_list)
vector_index.storage_context.persist(
persist_dir=f"./{vector_indexes_path}/{agent_name}"
)
# Create PDF contents tool
pdf_contents_tool = QueryEngineTool(
query_engine=vector_index.as_query_engine(
similarity_top_k=3, llm=embedding_llm
),
metadata=ToolMetadata(name=agent_name, description=agent_description),
)
# Create ReActAgent
react_agent = ReActAgent.from_tools(
[pdf_contents_tool], llm=agent_llm, verbose=verbose
)
return react_agent
def create_csv_gpt_agent(
csv_path, pandas_llm, agent_llm, agent_name, agent_description, verbose
):
"""
Create a ReActAgent for CSV data.
Args:
csv_path (str): Path to the CSV file.
pandas_llm (OpenAI): OpenAI model for processing CSV data.
agent_llm (OpenAI): OpenAI model for generating responses.
agent_name (str): Name of the agent.
agent_description (str): Description of the agent.
Returns:
ReActAgent: The created ReActAgent.
"""
# Read CSV file
csv_data = pd.read_csv(csv_path)
# Create CSV data tool
csv_data_tool = QueryEngineTool(
query_engine=PandasQueryEngine(
df=csv_data, verbose=verbose, synthesize_response=True, llm=pandas_llm
),
metadata=ToolMetadata(name=agent_name, description=agent_description),
)
# Create ReActAgent
react_agent = ReActAgent.from_tools([csv_data_tool], llm=agent_llm, verbose=verbose)
return react_agent
def chat_with_agent(agent, prompt):
"""
Chat with the ReActAgent.
Args:
agent (ReActAgent): The ReActAgent to chat with.
prompt (str): The chat prompt.
"""
response = agent.stream_chat(prompt)
response.print_response_stream()
def main():
"""
Main function to interact with the ReActAgents.
"""
parser = argparse.ArgumentParser(description="Chat with an agent")
parser.add_argument(
"--agent", choices=["pdf", "csv"], help="Choose the agent to chat with"
)
parser.add_argument(
"--verbose",
action="store_true",
default=False,
help="Toggle verbose mode",
)
parser.add_argument(
"--local",
action="store_true",
default=False,
help="Toggle between local and remote OpenAI models",
)
parser.add_argument(
"--pdfs_path",
type=str,
help="Path to folder containing the PDF files",
)
parser.add_argument(
"--csv_path",
type=str,
help="Path to the CSV file",
)
args = parser.parse_args()
llm = local_llm if args.local else gpt4
verbose = args.verbose
if args.agent == "pdf":
pdfs_path = f"{args.pdfs_path}/*.pdf"
pdf_gpt_agent = create_pdf_gpt_agent(
pdfs_path,
embedding_llm=llm,
agent_llm=llm,
agent_name="pdf_gpt_agent",
agent_description="Provides the full scientific papers in the artificial intelligence domain",
verbose=verbose,
)
agent = pdf_gpt_agent
elif args.agent == "csv":
csv_path = args.csv_path
csv_gpt_agent = create_csv_gpt_agent(
csv_path,
pandas_llm=llm,
agent_llm=llm,
agent_name="csv_gpt_agent",
agent_description="Accepts pandas instructions on the datasate of resale prices of HDB flats in Singapore.",
verbose=verbose,
)
agent = csv_gpt_agent
else:
print("Invalid agent choice")
return
while True:
prompt = input("\nEnter your chat prompt (or 'exit' to quit): ")
if prompt == "exit":
break
chat_with_agent(agent, prompt)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment