Last active
August 22, 2024 08:10
-
-
Save gautamborgohain/8fa1120c1cf8b76d14fdaaee249c261c to your computer and use it in GitHub Desktop.
Simple script implementing a command line interface to run ReAct agent on PDFs and CSV using local LLM or OpenAI
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
This script creates and interacts with ReActAgents for PDF and CSV data. | |
Dependencies (install in your virtual environment) : | |
python = "3.11.1" | |
llama-index-agent-openai = "^0.2.5" | |
openai = "^1.30.1" | |
pandas = "^2.2.2" | |
fitz = "^0.0.1.dev2" | |
pymupdf = "^1.24.4" | |
llama-index-llms-openai = "^0.1.19" | |
llama-index = "^0.10.37" | |
llama-index-experimental = "^0.1.3" | |
python-dotenv = "^1.0.1" | |
""" | |
import argparse | |
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex | |
from llama_index.llms.openai import OpenAI | |
from llama_index.core.tools import QueryEngineTool, ToolMetadata | |
from glob import glob | |
from dotenv import load_dotenv | |
import pandas as pd | |
import datetime | |
from llama_index.experimental.query_engine import PandasQueryEngine | |
from llama_index.agent.openai import OpenAIAgent | |
from llama_index.core.agent import ReActAgent | |
import argparse | |
# Load environment variables with OpenAI API key | |
# I put my OpenAI API key in a .env file | |
# but you can also set it as an environment variable OPENAI_API_KEY | |
load_dotenv() | |
# Initialize OpenAI models | |
gpt4 = OpenAI(model="gpt-4o") | |
local_llm = OpenAI(api_base="http://localhost:1234/v1", api_key="lm-studio") | |
# Define dir where vector indexes will be stored | |
vector_indexes_path = "data/vector_store_indexes" | |
def create_pdf_gpt_agent( | |
pdfs_path, embedding_llm, agent_llm, agent_name, agent_description, verbose | |
): | |
""" | |
Create a ReActAgent for PDF data. | |
Args: | |
pdfs_path (str): Path to the PDF files. | |
embedding_llm (OpenAI): OpenAI model for embedding PDF content. | |
agent_llm (OpenAI): OpenAI model for generating responses. | |
agent_name (str): Name of the agent. | |
agent_description (str): Description of the agent. | |
Returns: | |
ReActAgent: The created ReActAgent. | |
""" | |
# Read PDF files | |
pdf_files = glob(pdfs_path) | |
pdf_content_list = SimpleDirectoryReader(input_files=pdf_files).load_data() | |
# Build vector index | |
vector_index = VectorStoreIndex.from_documents(pdf_content_list) | |
vector_index.storage_context.persist( | |
persist_dir=f"./{vector_indexes_path}/{agent_name}" | |
) | |
# Create PDF contents tool | |
pdf_contents_tool = QueryEngineTool( | |
query_engine=vector_index.as_query_engine( | |
similarity_top_k=3, llm=embedding_llm | |
), | |
metadata=ToolMetadata(name=agent_name, description=agent_description), | |
) | |
# Create ReActAgent | |
react_agent = ReActAgent.from_tools( | |
[pdf_contents_tool], llm=agent_llm, verbose=verbose | |
) | |
return react_agent | |
def create_csv_gpt_agent( | |
csv_path, pandas_llm, agent_llm, agent_name, agent_description, verbose | |
): | |
""" | |
Create a ReActAgent for CSV data. | |
Args: | |
csv_path (str): Path to the CSV file. | |
pandas_llm (OpenAI): OpenAI model for processing CSV data. | |
agent_llm (OpenAI): OpenAI model for generating responses. | |
agent_name (str): Name of the agent. | |
agent_description (str): Description of the agent. | |
Returns: | |
ReActAgent: The created ReActAgent. | |
""" | |
# Read CSV file | |
csv_data = pd.read_csv(csv_path) | |
# Create CSV data tool | |
csv_data_tool = QueryEngineTool( | |
query_engine=PandasQueryEngine( | |
df=csv_data, verbose=verbose, synthesize_response=True, llm=pandas_llm | |
), | |
metadata=ToolMetadata(name=agent_name, description=agent_description), | |
) | |
# Create ReActAgent | |
react_agent = ReActAgent.from_tools([csv_data_tool], llm=agent_llm, verbose=verbose) | |
return react_agent | |
def chat_with_agent(agent, prompt): | |
""" | |
Chat with the ReActAgent. | |
Args: | |
agent (ReActAgent): The ReActAgent to chat with. | |
prompt (str): The chat prompt. | |
""" | |
response = agent.stream_chat(prompt) | |
response.print_response_stream() | |
def main(): | |
""" | |
Main function to interact with the ReActAgents. | |
""" | |
parser = argparse.ArgumentParser(description="Chat with an agent") | |
parser.add_argument( | |
"--agent", choices=["pdf", "csv"], help="Choose the agent to chat with" | |
) | |
parser.add_argument( | |
"--verbose", | |
action="store_true", | |
default=False, | |
help="Toggle verbose mode", | |
) | |
parser.add_argument( | |
"--local", | |
action="store_true", | |
default=False, | |
help="Toggle between local and remote OpenAI models", | |
) | |
parser.add_argument( | |
"--pdfs_path", | |
type=str, | |
help="Path to folder containing the PDF files", | |
) | |
parser.add_argument( | |
"--csv_path", | |
type=str, | |
help="Path to the CSV file", | |
) | |
args = parser.parse_args() | |
llm = local_llm if args.local else gpt4 | |
verbose = args.verbose | |
if args.agent == "pdf": | |
pdfs_path = f"{args.pdfs_path}/*.pdf" | |
pdf_gpt_agent = create_pdf_gpt_agent( | |
pdfs_path, | |
embedding_llm=llm, | |
agent_llm=llm, | |
agent_name="pdf_gpt_agent", | |
agent_description="Provides the full scientific papers in the artificial intelligence domain", | |
verbose=verbose, | |
) | |
agent = pdf_gpt_agent | |
elif args.agent == "csv": | |
csv_path = args.csv_path | |
csv_gpt_agent = create_csv_gpt_agent( | |
csv_path, | |
pandas_llm=llm, | |
agent_llm=llm, | |
agent_name="csv_gpt_agent", | |
agent_description="Accepts pandas instructions on the datasate of resale prices of HDB flats in Singapore.", | |
verbose=verbose, | |
) | |
agent = csv_gpt_agent | |
else: | |
print("Invalid agent choice") | |
return | |
while True: | |
prompt = input("\nEnter your chat prompt (or 'exit' to quit): ") | |
if prompt == "exit": | |
break | |
chat_with_agent(agent, prompt) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment