ohmeow/main.py

## main.py
import json

import requests

from bs4 import BeautifulSoup
from dotenv import load_dotenv
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.retrievers import ArxivRetriever
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda
from langchain.utilities.duckduckgo_search import DuckDuckGoSearchAPIWrapper

# Using the prompts from the gpt-research repo
# see: https://github.com/assafelovic/gpt-researcher/blob/47482c35578d671c45c2ef74e0d56c29131a3c4e/gpt_researcher/master/prompts.py

load_dotenv()

RESULTS_PER_QUESTION = 3

# ddg_search = DuckDuckGoSearchAPIWrapper()

retriever = ArxivRetriever()
# docs = retriever.get_summaries_as_docs(query="What are the best parameter efficient fine-tuning techniques to use in LLMs?")
# print(docs)

# ====================
# Scrape and Summarize Chain
# ====================

SUMMARY_TEMPLATE = """\
{text}

----------

Using the above text, answer in short the following question:

> {question}

----------
If the question cannot be answered using the text, imply summarize the text. Include all factual information, numbers, stats, etc..."""

SUMMARY_PROMPT = ChatPromptTemplate.from_template(SUMMARY_TEMPLATE)

summarize_chain = RunnablePassthrough.assign(summary=(SUMMARY_PROMPT | ChatOpenAI(model="gpt-3.5-turbo-1106") | StrOutputParser())) | (
    lambda d: f"TITLE: {d['title']}\n\nSUMMARY: {d['summary']}"
)

# rsp = summarize_chain.invoke(
#     {
#         "question": "What is the difference between LangChain and LangSmith?",
#         "title": "What is LangChain",
#         "text": "Langchain abstracts at a high level working with large language models",
#     }
# )
# print(rsp)


# # ====================
# # Arxiv Search Chain
# # ====================


def arxiv_search(query: str, num_results: int = RESULTS_PER_QUESTION):
    # docs = retriever.get_relevant_documents(query)
    docs = retriever.get_relevant_documents(query)
    return docs


arxiv_search_chain = (
    RunnablePassthrough.assign(docs=lambda inputs_d: arxiv_search(inputs_d["question"]))
    | (
        lambda inputs_d: [
            {"question": inputs_d["question"], "text": doc.page_content, "title": doc.metadata["Title"]} for doc in inputs_d["docs"]
        ]
    )
    | summarize_chain.map()
)

# rsp = arxiv_search_chain.invoke({"question": "What are the best parameter efficient fine-tuning techniques to use in LLMs?"})
# print(rsp)

# ====================
# Query Generation Chain
# ====================

SEARCH_USER_MSG = """\
Write 3 google search queries to search online that for an objective opinion from the following: {question}
You must response with a list of strings in the following format: ["query 1", "query 2", "query 3"]

The response must be valid JSON!"""

SEARCH_PROMPT = ChatPromptTemplate.from_messages(
    [
        # ("system", "{agent_prompt}"),
        ("user", SEARCH_USER_MSG)
    ]
)

search_question_chain = SEARCH_PROMPT | ChatOpenAI(model="gpt-3.5-turbo-1106") | StrOutputParser() | json.loads

# rsp = search_question_chain.invoke({"question": "What are the best parameter efficient fine-tuning techniques to use in LLMs?"})
# print(rsp)

# ====================
# Full Research Chain
# ====================

full_research_chain = search_question_chain | (lambda queries: [{"question": q} for q in queries]) | arxiv_search_chain.map()

# rsp = full_research_chain.invoke({"question": "What are the best parameter efficient fine-tuning techniques to use in LLMs?"})
# print(rsp)

# ====================
# Final Report Chain
# ====================

RESEARCH_REPORT_SYSTEM_TEMPLATE = """\
You are an experienced AI business analyst assistant. Your main objective is to produce comprehensive, insightful, impartial, and systematically structured business reports based on provided business data, market trends, and strategic analysis."""

RESEARCH_REPORT_HUMAN_TEMPLATE = """\
Information:
----------
{research_summary}
----------


Using the above information, answer the following query or task: "{question}" in a detailed report --' \
The report should focus on the answer to the query, should be well structured, informative, in depth and comprehensive, with facts and numbers if available and a minimum of 1,200 words.

You should strive to write the report as long as you can using all relevant and necessary information provided.
You must write the report with markdown syntax.
Use an unbiased and journalistic tone.
You MUST determine your own concrete and valid opinion based on the given information. Do NOT deter to general and meaningless conclusions.
You MUST write all used source titles at the end of the report as references, and make sure to not add duplicated sources, but only one reference for each.
You MUST write the report in markdown format.
Cite search results using inline notations. Only cite the most relevant results that answer the query accurately. Place these citations at the end of the sentence or paragraph that reference them.
Please do your best, this is very important to my career."""

RESEARCH_PROMPT = ChatPromptTemplate.from_messages(
    [
        ("system", RESEARCH_REPORT_SYSTEM_TEMPLATE),
        ("human", RESEARCH_REPORT_HUMAN_TEMPLATE),
    ]
)


def collect_results(list_of_lists):
    content = []
    for l in list_of_lists:
        content.append("\n\n".join(l))
    return "\n\n".join(content)


chain = (
    RunnablePassthrough.assign(research_summary=full_research_chain | collect_results)
    | RESEARCH_PROMPT
    | ChatOpenAI(model="gpt-3.5-turbo-1106")
    | StrOutputParser()
)

rsp = chain.invoke({"question": "What are the best parameter efficient fine-tuning techniques to use in LLMs?"})
print(rsp)


# #!/usr/bin/env python
# from fastapi import FastAPI
# from langserve import add_routes


# app = FastAPI(
#     title="LangChain Server",
#     version="1.0",
#     description="A simple api server using Langchain's Runnable interfaces",
# )

# add_routes(
#     app,
#     chain,
#     path="/research-assistant",
# )

# if __name__ == "__main__":
#     import uvicorn

#     uvicorn.run(app, host="localhost", port=8000)
	import json

	import requests

	from bs4 import BeautifulSoup
	from dotenv import load_dotenv
	from langchain.chat_models import ChatOpenAI
	from langchain.prompts import ChatPromptTemplate
	from langchain.retrievers import ArxivRetriever
	from langchain.schema.output_parser import StrOutputParser
	from langchain.schema.runnable import RunnablePassthrough, RunnableLambda
	from langchain.utilities.duckduckgo_search import DuckDuckGoSearchAPIWrapper

	# Using the prompts from the gpt-research repo
	# see: https://github.com/assafelovic/gpt-researcher/blob/47482c35578d671c45c2ef74e0d56c29131a3c4e/gpt_researcher/master/prompts.py

	load_dotenv()

	RESULTS_PER_QUESTION = 3

	# ddg_search = DuckDuckGoSearchAPIWrapper()

	retriever = ArxivRetriever()
	# docs = retriever.get_summaries_as_docs(query="What are the best parameter efficient fine-tuning techniques to use in LLMs?")
	# print(docs)

	# ====================
	# Scrape and Summarize Chain
	# ====================

	SUMMARY_TEMPLATE = """\
	{text}

	----------

	Using the above text, answer in short the following question:

	> {question}

	----------
	If the question cannot be answered using the text, imply summarize the text. Include all factual information, numbers, stats, etc..."""

	SUMMARY_PROMPT = ChatPromptTemplate.from_template(SUMMARY_TEMPLATE)

	summarize_chain = RunnablePassthrough.assign(summary=(SUMMARY_PROMPT \| ChatOpenAI(model="gpt-3.5-turbo-1106") \| StrOutputParser())) \| (
	lambda d: f"TITLE: {d['title']}\n\nSUMMARY: {d['summary']}"
	)

	# rsp = summarize_chain.invoke(
	# {
	# "question": "What is the difference between LangChain and LangSmith?",
	# "title": "What is LangChain",
	# "text": "Langchain abstracts at a high level working with large language models",
	# }
	# )
	# print(rsp)


	# # ====================
	# # Arxiv Search Chain
	# # ====================


	def arxiv_search(query: str, num_results: int = RESULTS_PER_QUESTION):
	# docs = retriever.get_relevant_documents(query)
	docs = retriever.get_relevant_documents(query)
	return docs


	arxiv_search_chain = (
	RunnablePassthrough.assign(docs=lambda inputs_d: arxiv_search(inputs_d["question"]))
	\| (
	lambda inputs_d: [
	{"question": inputs_d["question"], "text": doc.page_content, "title": doc.metadata["Title"]} for doc in inputs_d["docs"]
	]
	)
	\| summarize_chain.map()
	)

	# rsp = arxiv_search_chain.invoke({"question": "What are the best parameter efficient fine-tuning techniques to use in LLMs?"})
	# print(rsp)

	# ====================
	# Query Generation Chain
	# ====================

	SEARCH_USER_MSG = """\
	Write 3 google search queries to search online that for an objective opinion from the following: {question}
	You must response with a list of strings in the following format: ["query 1", "query 2", "query 3"]

	The response must be valid JSON!"""

	SEARCH_PROMPT = ChatPromptTemplate.from_messages(
	[
	# ("system", "{agent_prompt}"),
	("user", SEARCH_USER_MSG)
	]
	)

	search_question_chain = SEARCH_PROMPT \| ChatOpenAI(model="gpt-3.5-turbo-1106") \| StrOutputParser() \| json.loads

	# rsp = search_question_chain.invoke({"question": "What are the best parameter efficient fine-tuning techniques to use in LLMs?"})
	# print(rsp)

	# ====================
	# Full Research Chain
	# ====================

	full_research_chain = search_question_chain \| (lambda queries: [{"question": q} for q in queries]) \| arxiv_search_chain.map()

	# rsp = full_research_chain.invoke({"question": "What are the best parameter efficient fine-tuning techniques to use in LLMs?"})
	# print(rsp)

	# ====================
	# Final Report Chain
	# ====================

	RESEARCH_REPORT_SYSTEM_TEMPLATE = """\
	You are an experienced AI business analyst assistant. Your main objective is to produce comprehensive, insightful, impartial, and systematically structured business reports based on provided business data, market trends, and strategic analysis."""

	RESEARCH_REPORT_HUMAN_TEMPLATE = """\
	Information:
	----------
	{research_summary}
	----------


	Using the above information, answer the following query or task: "{question}" in a detailed report --' \
	The report should focus on the answer to the query, should be well structured, informative, in depth and comprehensive, with facts and numbers if available and a minimum of 1,200 words.

	You should strive to write the report as long as you can using all relevant and necessary information provided.
	You must write the report with markdown syntax.
	Use an unbiased and journalistic tone.
	You MUST determine your own concrete and valid opinion based on the given information. Do NOT deter to general and meaningless conclusions.
	You MUST write all used source titles at the end of the report as references, and make sure to not add duplicated sources, but only one reference for each.
	You MUST write the report in markdown format.
	Cite search results using inline notations. Only cite the most relevant results that answer the query accurately. Place these citations at the end of the sentence or paragraph that reference them.
	Please do your best, this is very important to my career."""

	RESEARCH_PROMPT = ChatPromptTemplate.from_messages(
	[
	("system", RESEARCH_REPORT_SYSTEM_TEMPLATE),
	("human", RESEARCH_REPORT_HUMAN_TEMPLATE),
	]
	)


	def collect_results(list_of_lists):
	content = []
	for l in list_of_lists:
	content.append("\n\n".join(l))
	return "\n\n".join(content)


	chain = (
	RunnablePassthrough.assign(research_summary=full_research_chain \| collect_results)
	\| RESEARCH_PROMPT
	\| ChatOpenAI(model="gpt-3.5-turbo-1106")
	\| StrOutputParser()
	)

	rsp = chain.invoke({"question": "What are the best parameter efficient fine-tuning techniques to use in LLMs?"})
	print(rsp)


	# #!/usr/bin/env python
	# from fastapi import FastAPI
	# from langserve import add_routes


	# app = FastAPI(
	# title="LangChain Server",
	# version="1.0",
	# description="A simple api server using Langchain's Runnable interfaces",
	# )

	# add_routes(
	# app,
	# chain,
	# path="/research-assistant",
	# )

	# if __name__ == "__main__":
	# import uvicorn

	# uvicorn.run(app, host="localhost", port=8000)