mtanco/stream_h2o_llms.py

## stream_h2o_llms.py
import os
import asyncio

from h2o_wave import main, app, Q, ui, data, run_on, on
from h2ogpte import H2OGPTE
from h2ogpte.types import ChatMessage, PartialChatMessage


SYSTEM_PROMPT = "Hello, I am a bot that only talks about dogs!"


@app("/")
async def serve(q: Q):
    """Route the end user based on how they interacted with the app."""

    if not q.client.initialized:  # Setup the application for a new browser tab, if not done yet
        q.page["chatbot_card"] = ui.chatbot_card(
            box="1 1 4 -1",
            name="chatbot",
            data=data(
                fields="content from_user",
                t="list",
                rows=[
                    [SYSTEM_PROMPT, False],
                ],
            ),
        )
        q.client.initialized = True

    await run_on(q)  # Route user to the appropriate "on" function
    await q.page.save()  # Update the UI


@on()
async def chatbot(q: Q):
    """Send a user's message to a Large Language Model and stream the response."""

    q.client.chatbot_interaction = ChatBotInteraction(user_message=q.args.chatbot)

    q.page["chatbot_card"].data += [q.args.chatbot, True]
    q.page["chatbot_card"].data += [q.client.chatbot_interaction.content_to_show, False]

    # Prepare our UI-Streaming function so that it can run while the blocking LLM message interaction runs
    update_ui = asyncio.ensure_future(stream_updates_to_ui(q))
    await q.run(chat, q.client.chatbot_interaction)
    await update_ui


async def stream_updates_to_ui(q: Q):
    """
    Update the app's UI every 1/10th of a second with values from our chatbot interaction

    :param q: The query object stored by H2O Wave with information about the app and user behavior.
    """
    while q.client.chatbot_interaction.responding:
        q.page["chatbot_card"].data[-1] = [
            q.client.chatbot_interaction.content_to_show,
            False,
        ]
        await q.page.save()
        await q.sleep(0.1)

    q.page["chatbot_card"].data[-1] = [
        q.client.chatbot_interaction.content_to_show,
        False,
    ]
    await q.page.save()


def chat(chatbot_interaction):
    """
    Send the user's message to the LLM and save the response

    :param chatbot_interaction: Details about the interaction between the user and the LLM
    """

    def stream_response(message):
        """
        This function is called by the blocking H2OGPTE function periodically

        :param message: response from the LLM, this is either a partial or completed response
        """
        chatbot_interaction.update_response(message)

    client = H2OGPTE(
        address=os.getenv("H2OGPTE_URL"), api_key=os.getenv("H2OGPTE_API_TOKEN")
    )
    collection_id = client.create_collection("temp", "")
    chat_session_id = client.create_chat_session(collection_id)
    with client.connect(chat_session_id) as session:
        session.query(
            system_prompt=SYSTEM_PROMPT,
            message=chatbot_interaction.user_message,
            timeout=60,
            callback=stream_response,
            rag_config={"rag_type": "llm_only"},
        )
    client.delete_collections([collection_id])
    client.delete_chat_sessions([chat_session_id])


class ChatBotInteraction:
    def __init__(self, user_message) -> None:
        self.user_message = user_message
        self.responding = True

        self.llm_response = ""
        self.content_to_show = "🟡"

    def update_response(self, message):
        if isinstance(message, ChatMessage):
            self.content_to_show = message.content
            self.responding = False
        elif isinstance(message, PartialChatMessage):
            if message.content != "#### LLM Only (no RAG):\n":
                self.llm_response += message.content
                self.content_to_show = self.llm_response + " 🟡"
	import os
	import asyncio

	from h2o_wave import main, app, Q, ui, data, run_on, on
	from h2ogpte import H2OGPTE
	from h2ogpte.types import ChatMessage, PartialChatMessage


	SYSTEM_PROMPT = "Hello, I am a bot that only talks about dogs!"


	@app("/")
	async def serve(q: Q):
	"""Route the end user based on how they interacted with the app."""

	if not q.client.initialized: # Setup the application for a new browser tab, if not done yet
	q.page["chatbot_card"] = ui.chatbot_card(
	box="1 1 4 -1",
	name="chatbot",
	data=data(
	fields="content from_user",
	t="list",
	rows=[
	[SYSTEM_PROMPT, False],
	],
	),
	)
	q.client.initialized = True

	await run_on(q) # Route user to the appropriate "on" function
	await q.page.save() # Update the UI


	@on()
	async def chatbot(q: Q):
	"""Send a user's message to a Large Language Model and stream the response."""

	q.client.chatbot_interaction = ChatBotInteraction(user_message=q.args.chatbot)

	q.page["chatbot_card"].data += [q.args.chatbot, True]
	q.page["chatbot_card"].data += [q.client.chatbot_interaction.content_to_show, False]

	# Prepare our UI-Streaming function so that it can run while the blocking LLM message interaction runs
	update_ui = asyncio.ensure_future(stream_updates_to_ui(q))
	await q.run(chat, q.client.chatbot_interaction)
	await update_ui


	async def stream_updates_to_ui(q: Q):
	"""
	Update the app's UI every 1/10th of a second with values from our chatbot interaction

	:param q: The query object stored by H2O Wave with information about the app and user behavior.
	"""
	while q.client.chatbot_interaction.responding:
	q.page["chatbot_card"].data[-1] = [
	q.client.chatbot_interaction.content_to_show,
	False,
	]
	await q.page.save()
	await q.sleep(0.1)

	q.page["chatbot_card"].data[-1] = [
	q.client.chatbot_interaction.content_to_show,
	False,
	]
	await q.page.save()


	def chat(chatbot_interaction):
	"""
	Send the user's message to the LLM and save the response

	:param chatbot_interaction: Details about the interaction between the user and the LLM
	"""

	def stream_response(message):
	"""
	This function is called by the blocking H2OGPTE function periodically

	:param message: response from the LLM, this is either a partial or completed response
	"""
	chatbot_interaction.update_response(message)

	client = H2OGPTE(
	address=os.getenv("H2OGPTE_URL"), api_key=os.getenv("H2OGPTE_API_TOKEN")
	)
	collection_id = client.create_collection("temp", "")
	chat_session_id = client.create_chat_session(collection_id)
	with client.connect(chat_session_id) as session:
	session.query(
	system_prompt=SYSTEM_PROMPT,
	message=chatbot_interaction.user_message,
	timeout=60,
	callback=stream_response,
	rag_config={"rag_type": "llm_only"},
	)
	client.delete_collections([collection_id])
	client.delete_chat_sessions([chat_session_id])


	class ChatBotInteraction:
	def __init__(self, user_message) -> None:
	self.user_message = user_message
	self.responding = True

	self.llm_response = ""
	self.content_to_show = "🟡"

	def update_response(self, message):
	if isinstance(message, ChatMessage):
	self.content_to_show = message.content
	self.responding = False
	elif isinstance(message, PartialChatMessage):
	if message.content != "#### LLM Only (no RAG):\n":
	self.llm_response += message.content
	self.content_to_show = self.llm_response + " 🟡"