-
-
Save juananpe/782fe27d61996e733c0f6a4ba871133f to your computer and use it in GitHub Desktop.
pip install openai-agents requests beautifulsoup4
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import asyncio | |
import requests | |
from bs4 import BeautifulSoup | |
from agents import Agent, Runner, function_tool | |
@function_tool | |
def fetch_webpage(url: str) -> str: | |
"""Fetches content from a webpage. | |
Args: | |
url: The URL of the webpage to fetch. | |
Returns: | |
The text content of the webpage. | |
""" | |
try: | |
response = requests.get(url, timeout=10) | |
response.raise_for_status() # Raise an exception for 4XX/5XX responses | |
# Parse with BeautifulSoup to get cleaner text | |
soup = BeautifulSoup(response.text, 'html.parser') | |
# Remove script and style elements that contain JavaScript/CSS | |
for script in soup(["script", "style"]): | |
script.extract() | |
# Get text and clean up whitespace | |
text = soup.get_text(separator='\n') | |
lines = (line.strip() for line in text.splitlines()) | |
text = '\n'.join(line for line in lines if line) | |
# Truncate if too long (LLMs have token limits) | |
if len(text) > 10000: | |
text = text[:10000] + "...\n[Content truncated due to length]" | |
return text | |
except Exception as e: | |
return f"Error fetching webpage: {str(e)}" | |
@function_tool | |
def extract_information(url: str, query: str) -> str: | |
"""Extracts specific information from a webpage based on a query. | |
Args: | |
url: The URL of the webpage to extract information from. | |
query: The specific information to look for (e.g., "main headline", "price of product"). | |
Returns: | |
The extracted information or an error message. | |
""" | |
try: | |
response = requests.get(url, timeout=10) | |
response.raise_for_status() | |
soup = BeautifulSoup(response.text, 'html.parser') | |
# This is a simple implementation - in a real application, you might want | |
# to implement more sophisticated extraction based on the query | |
return f"Query: '{query}'\nWebpage content to analyze:\n\n{soup.get_text(separator='\n')[:5000]}" | |
except Exception as e: | |
return f"Error extracting information: {str(e)}" | |
# Create the agent with our webpage fetching tools | |
webpage_agent = Agent( | |
name="Webpage Information Agent", | |
instructions="""You are an agent that helps fetch and analyze information from webpages. | |
You have access to tools that can fetch content from websites and extract specific information. | |
When a user asks about content from a webpage: | |
1. Use the fetch_webpage tool to retrieve the content | |
2. If they want specific information, use the extract_information tool | |
3. Analyze the information and provide a helpful response | |
Always check that URLs are properly formatted before fetching them.""", | |
tools=[fetch_webpage, extract_information], | |
) | |
async def main(): | |
# Get user input | |
user_query = input("What webpage would you like me to analyze? ") | |
# Run the agent | |
result = await Runner.run(webpage_agent, user_query) | |
# Print the result | |
print("\nAgent response:") | |
print(result.final_output) | |
if __name__ == "__main__": | |
asyncio.run(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment