gbertb/spider_crewai_stock_research.py

## spider_crewai_stock_research.py
from dotenv import load_dotenv
from crewai import Task, Process
from crewai import Agent, Crew
from crewai_tools import SerperDevTool, tool
from langchain_community.document_loaders import SpiderLoader
from langchain_core.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
from langchain.chains.combine_documents.stuff import StuffDocumentsChain, LLMChain
from typing import List

load_dotenv()
search_tool = SerperDevTool()


def load_markdown_from_urls(url):
    loader = SpiderLoader(
        url=url,
        mode="crawl",
        params={
            "return_format": "markdown",
            "proxy_enabled": True,
            "request": "http",
            "request_timeout": 60,
            "limit": 1,
            "cache": False,
        },
    )
    data = loader.load()

    if data:
        return data
    else:
        return None


@tool("scrape_and_summarize")
def scrape_and_summarize(urls: List[str]) -> str:
    """Scrape website content based on one or more urls and summarize each based on the objective of the goal. Scrape up to 5 URLs at a time. Do not scrape or summarize PDF content types."""

    url_join_str = ",".join(urls)
    content_docs = load_markdown_from_urls(url_join_str)
    print(content_docs)
    llm = ChatOpenAI(model="gpt-4-turbo")

    document_prompt = PromptTemplate(
        input_variables=["page_content"], template="{page_content}"
    )
    document_variable_name = "context"
    prompt = PromptTemplate.from_template(
        "Objective: Summarize this content in bullet points highlighting important insights. Be comprehensive, yet concise: {context}"
    )
    llm_chain = LLMChain(llm=llm, prompt=prompt)
    stuff_chain = StuffDocumentsChain(
        llm_chain=llm_chain,
        document_prompt=document_prompt,
        document_variable_name=document_variable_name,
    )
    output = stuff_chain.invoke(content_docs)["output_text"]
    return output

researcher = Agent(
    role="Senior Stock Researcher",
    goal="Stock researcher for company or ticker: {company}",
    verbose=True,
    memory=True,
    backstory="Driven by researching the next upcoming company stock that would make a good purchase",
    tools=[search_tool, scrape_and_summarize],
    allow_delegation=True,
)

writer = Agent(
    role="Writer",
    goal="Blog writer for company stock {company}",
    verbose=True,
    memory=True,
    backstory=(
        "A writer for many popular business magazines and journals covering companies and business."
    ),
    tools=[search_tool, scrape_and_summarize],
    allow_delegation=False,
)

research_fundamentals_task = Task(
    description="Research stock for {company} based on the fundamentals of the company for 2024 and beyond."
    "Focus on identifying the strengths and weaknesses for the given company and provide reasons for why the stock is a good or bad buy. Scrape search results by passing in urls to the scrape_and_summarize tool."
    "Based on the scraped content, your final report should clearly articulate the key points,"
    "its market opportunities, and potential risks of buying the stock. ONLY use scraped content from our search results for the report.",
    expected_output="A comprehensive 4-6 paragraphs long report on company stock.",
    tools=[search_tool, scrape_and_summarize],
    agent=researcher,
)

research_technicals_task = Task(
    description="Research the technicals of stock chart for {company} for 2024 and beyond."
    "Focus on identifying the strengths and weaknesses of what the charts and price are saying for the given company and provide reasons for why the stock is a good or bad buy based on this perspective. Scrape search results by passing in urls to the scrape_and_summarize tool."
    "Based on the scraped content, your final report should clearly articulate the key points. ONLY use scraped content from our search results for the report.",
    expected_output="A comprehensive 4-6 paragraphs long report on company stock.",
    tools=[search_tool, scrape_and_summarize],
    agent=researcher,
)

write_task = Task(
    description=(
        "Compose an insightful article on {company}."
        "Focus on recent news about the company, fundamental analysis like strengths and weaknesses, it's overall market outlook and the industry in which the company operates. Also, write an overview of it's stock from a technical analysis perspective."
        "This article should be easy to understand, engaging, and positive. ONLY use scraped content from our search results for the report."
    ),
    expected_output="A 6-8 paragraph article on {company}, formatted as markdown.",
    tools=[search_tool, scrape_and_summarize],
    agent=writer,
    async_execution=False,
    output_file="COST-post.md",
    human_input=True,
)

crew = Crew(
    agents=[researcher, writer],
    tasks=[research_fundamentals_task, research_technicals_task, write_task],
    process=Process.sequential,  # Optional: Sequential task execution is default
    memory=True,
    cache=True,
    max_rpm=100,
    share_crew=False,
    output_log_file="crewai_spider.log",
)

result = crew.kickoff(inputs={"company": "Costco Wholesale"})
print(result)
	from dotenv import load_dotenv
	from crewai import Task, Process
	from crewai import Agent, Crew
	from crewai_tools import SerperDevTool, tool
	from langchain_community.document_loaders import SpiderLoader
	from langchain_core.prompts import PromptTemplate
	from langchain_openai import ChatOpenAI
	from langchain.chains.combine_documents.stuff import StuffDocumentsChain, LLMChain
	from typing import List

	load_dotenv()
	search_tool = SerperDevTool()


	def load_markdown_from_urls(url):
	loader = SpiderLoader(
	url=url,
	mode="crawl",
	params={
	"return_format": "markdown",
	"proxy_enabled": True,
	"request": "http",
	"request_timeout": 60,
	"limit": 1,
	"cache": False,
	},
	)
	data = loader.load()

	if data:
	return data
	else:
	return None


	@tool("scrape_and_summarize")
	def scrape_and_summarize(urls: List[str]) -> str:
	"""Scrape website content based on one or more urls and summarize each based on the objective of the goal. Scrape up to 5 URLs at a time. Do not scrape or summarize PDF content types."""

	url_join_str = ",".join(urls)
	content_docs = load_markdown_from_urls(url_join_str)
	print(content_docs)
	llm = ChatOpenAI(model="gpt-4-turbo")

	document_prompt = PromptTemplate(
	input_variables=["page_content"], template="{page_content}"
	)
	document_variable_name = "context"
	prompt = PromptTemplate.from_template(
	"Objective: Summarize this content in bullet points highlighting important insights. Be comprehensive, yet concise: {context}"
	)
	llm_chain = LLMChain(llm=llm, prompt=prompt)
	stuff_chain = StuffDocumentsChain(
	llm_chain=llm_chain,
	document_prompt=document_prompt,
	document_variable_name=document_variable_name,
	)
	output = stuff_chain.invoke(content_docs)["output_text"]
	return output

	researcher = Agent(
	role="Senior Stock Researcher",
	goal="Stock researcher for company or ticker: {company}",
	verbose=True,
	memory=True,
	backstory="Driven by researching the next upcoming company stock that would make a good purchase",
	tools=[search_tool, scrape_and_summarize],
	allow_delegation=True,
	)

	writer = Agent(
	role="Writer",
	goal="Blog writer for company stock {company}",
	verbose=True,
	memory=True,
	backstory=(
	"A writer for many popular business magazines and journals covering companies and business."
	),
	tools=[search_tool, scrape_and_summarize],
	allow_delegation=False,
	)

	research_fundamentals_task = Task(
	description="Research stock for {company} based on the fundamentals of the company for 2024 and beyond."
	"Focus on identifying the strengths and weaknesses for the given company and provide reasons for why the stock is a good or bad buy. Scrape search results by passing in urls to the scrape_and_summarize tool."
	"Based on the scraped content, your final report should clearly articulate the key points,"
	"its market opportunities, and potential risks of buying the stock. ONLY use scraped content from our search results for the report.",
	expected_output="A comprehensive 4-6 paragraphs long report on company stock.",
	tools=[search_tool, scrape_and_summarize],
	agent=researcher,
	)

	research_technicals_task = Task(
	description="Research the technicals of stock chart for {company} for 2024 and beyond."
	"Focus on identifying the strengths and weaknesses of what the charts and price are saying for the given company and provide reasons for why the stock is a good or bad buy based on this perspective. Scrape search results by passing in urls to the scrape_and_summarize tool."
	"Based on the scraped content, your final report should clearly articulate the key points. ONLY use scraped content from our search results for the report.",
	expected_output="A comprehensive 4-6 paragraphs long report on company stock.",
	tools=[search_tool, scrape_and_summarize],
	agent=researcher,
	)

	write_task = Task(
	description=(
	"Compose an insightful article on {company}."
	"Focus on recent news about the company, fundamental analysis like strengths and weaknesses, it's overall market outlook and the industry in which the company operates. Also, write an overview of it's stock from a technical analysis perspective."
	"This article should be easy to understand, engaging, and positive. ONLY use scraped content from our search results for the report."
	),
	expected_output="A 6-8 paragraph article on {company}, formatted as markdown.",
	tools=[search_tool, scrape_and_summarize],
	agent=writer,
	async_execution=False,
	output_file="COST-post.md",
	human_input=True,
	)

	crew = Crew(
	agents=[researcher, writer],
	tasks=[research_fundamentals_task, research_technicals_task, write_task],
	process=Process.sequential, # Optional: Sequential task execution is default
	memory=True,
	cache=True,
	max_rpm=100,
	share_crew=False,
	output_log_file="crewai_spider.log",
	)

	result = crew.kickoff(inputs={"company": "Costco Wholesale"})
	print(result)