Skip to content

Instantly share code, notes, and snippets.

@juananpe
Created March 18, 2025 22:01
Show Gist options
  • Save juananpe/782fe27d61996e733c0f6a4ba871133f to your computer and use it in GitHub Desktop.
Save juananpe/782fe27d61996e733c0f6a4ba871133f to your computer and use it in GitHub Desktop.
pip install openai-agents requests beautifulsoup4
import asyncio
import requests
from bs4 import BeautifulSoup
from agents import Agent, Runner, function_tool
@function_tool
def fetch_webpage(url: str) -> str:
"""Fetches content from a webpage.
Args:
url: The URL of the webpage to fetch.
Returns:
The text content of the webpage.
"""
try:
response = requests.get(url, timeout=10)
response.raise_for_status() # Raise an exception for 4XX/5XX responses
# Parse with BeautifulSoup to get cleaner text
soup = BeautifulSoup(response.text, 'html.parser')
# Remove script and style elements that contain JavaScript/CSS
for script in soup(["script", "style"]):
script.extract()
# Get text and clean up whitespace
text = soup.get_text(separator='\n')
lines = (line.strip() for line in text.splitlines())
text = '\n'.join(line for line in lines if line)
# Truncate if too long (LLMs have token limits)
if len(text) > 10000:
text = text[:10000] + "...\n[Content truncated due to length]"
return text
except Exception as e:
return f"Error fetching webpage: {str(e)}"
@function_tool
def extract_information(url: str, query: str) -> str:
"""Extracts specific information from a webpage based on a query.
Args:
url: The URL of the webpage to extract information from.
query: The specific information to look for (e.g., "main headline", "price of product").
Returns:
The extracted information or an error message.
"""
try:
response = requests.get(url, timeout=10)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
# This is a simple implementation - in a real application, you might want
# to implement more sophisticated extraction based on the query
return f"Query: '{query}'\nWebpage content to analyze:\n\n{soup.get_text(separator='\n')[:5000]}"
except Exception as e:
return f"Error extracting information: {str(e)}"
# Create the agent with our webpage fetching tools
webpage_agent = Agent(
name="Webpage Information Agent",
instructions="""You are an agent that helps fetch and analyze information from webpages.
You have access to tools that can fetch content from websites and extract specific information.
When a user asks about content from a webpage:
1. Use the fetch_webpage tool to retrieve the content
2. If they want specific information, use the extract_information tool
3. Analyze the information and provide a helpful response
Always check that URLs are properly formatted before fetching them.""",
tools=[fetch_webpage, extract_information],
)
async def main():
# Get user input
user_query = input("What webpage would you like me to analyze? ")
# Run the agent
result = await Runner.run(webpage_agent, user_query)
# Print the result
print("\nAgent response:")
print(result.final_output)
if __name__ == "__main__":
asyncio.run(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment