gautamborgohain/research_agent.py

## research_agent.py
"""
This script creates and interacts with ReActAgents for PDF and CSV data.

Dependencies (install in your virtual environment) :

python = "3.11.1"
llama-index-agent-openai = "^0.2.5"
openai = "^1.30.1"
pandas = "^2.2.2"
fitz = "^0.0.1.dev2"
pymupdf = "^1.24.4"
llama-index-llms-openai = "^0.1.19"
llama-index = "^0.10.37"
llama-index-experimental = "^0.1.3"
python-dotenv = "^1.0.1"

"""

import argparse
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
from llama_index.llms.openai import OpenAI
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from glob import glob
from dotenv import load_dotenv
import pandas as pd
import datetime
from llama_index.experimental.query_engine import PandasQueryEngine
from llama_index.agent.openai import OpenAIAgent
from llama_index.core.agent import ReActAgent
import argparse

# Load environment variables with OpenAI API key
# I put my OpenAI API key in a .env file
# but you can also set it as an environment variable OPENAI_API_KEY
load_dotenv()

# Initialize OpenAI models
gpt4 = OpenAI(model="gpt-4o")
local_llm = OpenAI(api_base="http://localhost:1234/v1", api_key="lm-studio")

# Define dir where vector indexes will be stored
vector_indexes_path = "data/vector_store_indexes"


def create_pdf_gpt_agent(
    pdfs_path, embedding_llm, agent_llm, agent_name, agent_description, verbose
):
    """
    Create a ReActAgent for PDF data.

    Args:
        pdfs_path (str): Path to the PDF files.
        embedding_llm (OpenAI): OpenAI model for embedding PDF content.
        agent_llm (OpenAI): OpenAI model for generating responses.
        agent_name (str): Name of the agent.
        agent_description (str): Description of the agent.

    Returns:
        ReActAgent: The created ReActAgent.
    """
    # Read PDF files
    pdf_files = glob(pdfs_path)
    pdf_content_list = SimpleDirectoryReader(input_files=pdf_files).load_data()

    # Build vector index
    vector_index = VectorStoreIndex.from_documents(pdf_content_list)
    vector_index.storage_context.persist(
        persist_dir=f"./{vector_indexes_path}/{agent_name}"
    )

    # Create PDF contents tool
    pdf_contents_tool = QueryEngineTool(
        query_engine=vector_index.as_query_engine(
            similarity_top_k=3, llm=embedding_llm
        ),
        metadata=ToolMetadata(name=agent_name, description=agent_description),
    )

    # Create ReActAgent
    react_agent = ReActAgent.from_tools(
        [pdf_contents_tool], llm=agent_llm, verbose=verbose
    )
    return react_agent


def create_csv_gpt_agent(
    csv_path, pandas_llm, agent_llm, agent_name, agent_description, verbose
):
    """
    Create a ReActAgent for CSV data.

    Args:
        csv_path (str): Path to the CSV file.
        pandas_llm (OpenAI): OpenAI model for processing CSV data.
        agent_llm (OpenAI): OpenAI model for generating responses.
        agent_name (str): Name of the agent.
        agent_description (str): Description of the agent.

    Returns:
        ReActAgent: The created ReActAgent.
    """
    # Read CSV file
    csv_data = pd.read_csv(csv_path)

    # Create CSV data tool
    csv_data_tool = QueryEngineTool(
        query_engine=PandasQueryEngine(
            df=csv_data, verbose=verbose, synthesize_response=True, llm=pandas_llm
        ),
        metadata=ToolMetadata(name=agent_name, description=agent_description),
    )

    # Create ReActAgent
    react_agent = ReActAgent.from_tools([csv_data_tool], llm=agent_llm, verbose=verbose)
    return react_agent


def chat_with_agent(agent, prompt):
    """
    Chat with the ReActAgent.

    Args:
        agent (ReActAgent): The ReActAgent to chat with.
        prompt (str): The chat prompt.
    """
    response = agent.stream_chat(prompt)
    response.print_response_stream()


def main():
    """
    Main function to interact with the ReActAgents.
    """
    parser = argparse.ArgumentParser(description="Chat with an agent")
    parser.add_argument(
        "--agent", choices=["pdf", "csv"], help="Choose the agent to chat with"
    )
    parser.add_argument(
        "--verbose",
        action="store_true",
        default=False,
        help="Toggle verbose mode",
    )
    parser.add_argument(
        "--local",
        action="store_true",
        default=False,
        help="Toggle between local and remote OpenAI models",
    )
    parser.add_argument(
        "--pdfs_path",
        type=str,
        help="Path to folder containing the PDF files",
    )

    parser.add_argument(
        "--csv_path",
        type=str,
        help="Path to the CSV file",
    )
    args = parser.parse_args()
    llm = local_llm if args.local else gpt4
    verbose = args.verbose

    if args.agent == "pdf":
        pdfs_path = f"{args.pdfs_path}/*.pdf"
        pdf_gpt_agent = create_pdf_gpt_agent(
            pdfs_path,
            embedding_llm=llm,
            agent_llm=llm,
            agent_name="pdf_gpt_agent",
            agent_description="Provides the full scientific papers in the artificial intelligence domain",
            verbose=verbose,
        )
        agent = pdf_gpt_agent
    elif args.agent == "csv":
        csv_path = args.csv_path
        csv_gpt_agent = create_csv_gpt_agent(
            csv_path,
            pandas_llm=llm,
            agent_llm=llm,
            agent_name="csv_gpt_agent",
            agent_description="Accepts pandas instructions on the datasate of resale prices of HDB flats in Singapore.",
            verbose=verbose,
        )
        agent = csv_gpt_agent
    else:
        print("Invalid agent choice")
        return

    while True:
        prompt = input("\nEnter your chat prompt (or 'exit' to quit): ")
        if prompt == "exit":
            break
        chat_with_agent(agent, prompt)


if __name__ == "__main__":
    main()
	"""
	This script creates and interacts with ReActAgents for PDF and CSV data.

	Dependencies (install in your virtual environment) :

	python = "3.11.1"
	llama-index-agent-openai = "^0.2.5"
	openai = "^1.30.1"
	pandas = "^2.2.2"
	fitz = "^0.0.1.dev2"
	pymupdf = "^1.24.4"
	llama-index-llms-openai = "^0.1.19"
	llama-index = "^0.10.37"
	llama-index-experimental = "^0.1.3"
	python-dotenv = "^1.0.1"

	"""

	import argparse
	from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
	from llama_index.llms.openai import OpenAI
	from llama_index.core.tools import QueryEngineTool, ToolMetadata
	from glob import glob
	from dotenv import load_dotenv
	import pandas as pd
	import datetime
	from llama_index.experimental.query_engine import PandasQueryEngine
	from llama_index.agent.openai import OpenAIAgent
	from llama_index.core.agent import ReActAgent
	import argparse

	# Load environment variables with OpenAI API key
	# I put my OpenAI API key in a .env file
	# but you can also set it as an environment variable OPENAI_API_KEY
	load_dotenv()

	# Initialize OpenAI models
	gpt4 = OpenAI(model="gpt-4o")
	local_llm = OpenAI(api_base="http://localhost:1234/v1", api_key="lm-studio")

	# Define dir where vector indexes will be stored
	vector_indexes_path = "data/vector_store_indexes"


	def create_pdf_gpt_agent(
	pdfs_path, embedding_llm, agent_llm, agent_name, agent_description, verbose
	):
	"""
	Create a ReActAgent for PDF data.

	Args:
	pdfs_path (str): Path to the PDF files.
	embedding_llm (OpenAI): OpenAI model for embedding PDF content.
	agent_llm (OpenAI): OpenAI model for generating responses.
	agent_name (str): Name of the agent.
	agent_description (str): Description of the agent.

	Returns:
	ReActAgent: The created ReActAgent.
	"""
	# Read PDF files
	pdf_files = glob(pdfs_path)
	pdf_content_list = SimpleDirectoryReader(input_files=pdf_files).load_data()

	# Build vector index
	vector_index = VectorStoreIndex.from_documents(pdf_content_list)
	vector_index.storage_context.persist(
	persist_dir=f"./{vector_indexes_path}/{agent_name}"
	)

	# Create PDF contents tool
	pdf_contents_tool = QueryEngineTool(
	query_engine=vector_index.as_query_engine(
	similarity_top_k=3, llm=embedding_llm
	),
	metadata=ToolMetadata(name=agent_name, description=agent_description),
	)

	# Create ReActAgent
	react_agent = ReActAgent.from_tools(
	[pdf_contents_tool], llm=agent_llm, verbose=verbose
	)
	return react_agent


	def create_csv_gpt_agent(
	csv_path, pandas_llm, agent_llm, agent_name, agent_description, verbose
	):
	"""
	Create a ReActAgent for CSV data.

	Args:
	csv_path (str): Path to the CSV file.
	pandas_llm (OpenAI): OpenAI model for processing CSV data.
	agent_llm (OpenAI): OpenAI model for generating responses.
	agent_name (str): Name of the agent.
	agent_description (str): Description of the agent.

	Returns:
	ReActAgent: The created ReActAgent.
	"""
	# Read CSV file
	csv_data = pd.read_csv(csv_path)

	# Create CSV data tool
	csv_data_tool = QueryEngineTool(
	query_engine=PandasQueryEngine(
	df=csv_data, verbose=verbose, synthesize_response=True, llm=pandas_llm
	),
	metadata=ToolMetadata(name=agent_name, description=agent_description),
	)

	# Create ReActAgent
	react_agent = ReActAgent.from_tools([csv_data_tool], llm=agent_llm, verbose=verbose)
	return react_agent


	def chat_with_agent(agent, prompt):
	"""
	Chat with the ReActAgent.

	Args:
	agent (ReActAgent): The ReActAgent to chat with.
	prompt (str): The chat prompt.
	"""
	response = agent.stream_chat(prompt)
	response.print_response_stream()


	def main():
	"""
	Main function to interact with the ReActAgents.
	"""
	parser = argparse.ArgumentParser(description="Chat with an agent")
	parser.add_argument(
	"--agent", choices=["pdf", "csv"], help="Choose the agent to chat with"
	)
	parser.add_argument(
	"--verbose",
	action="store_true",
	default=False,
	help="Toggle verbose mode",
	)
	parser.add_argument(
	"--local",
	action="store_true",
	default=False,
	help="Toggle between local and remote OpenAI models",
	)
	parser.add_argument(
	"--pdfs_path",
	type=str,
	help="Path to folder containing the PDF files",
	)

	parser.add_argument(
	"--csv_path",
	type=str,
	help="Path to the CSV file",
	)
	args = parser.parse_args()
	llm = local_llm if args.local else gpt4
	verbose = args.verbose

	if args.agent == "pdf":
	pdfs_path = f"{args.pdfs_path}/*.pdf"
	pdf_gpt_agent = create_pdf_gpt_agent(
	pdfs_path,
	embedding_llm=llm,
	agent_llm=llm,
	agent_name="pdf_gpt_agent",
	agent_description="Provides the full scientific papers in the artificial intelligence domain",
	verbose=verbose,
	)
	agent = pdf_gpt_agent
	elif args.agent == "csv":
	csv_path = args.csv_path
	csv_gpt_agent = create_csv_gpt_agent(
	csv_path,
	pandas_llm=llm,
	agent_llm=llm,
	agent_name="csv_gpt_agent",
	agent_description="Accepts pandas instructions on the datasate of resale prices of HDB flats in Singapore.",
	verbose=verbose,
	)
	agent = csv_gpt_agent
	else:
	print("Invalid agent choice")
	return

	while True:
	prompt = input("\nEnter your chat prompt (or 'exit' to quit): ")
	if prompt == "exit":
	break
	chat_with_agent(agent, prompt)


	if __name__ == "__main__":
	main()