Model: gemini-2.5-pro-exp-03-25
-- none --
llm -f github:simonw/llm -f issue:simonw/llm/938
- f81e77f64971bae443e3367d6c9216aee0d8f2a4f03fd5db5137d22ee7b85b52
- c71d239df91726fc519c6eb72d318ec65820627232b2f796219e87dcf35d0ab4
- 82b28f6f074641dff51fd8e037b67215cc4e480b84145c3a62bcf169729b3b00
- 1fafa3b21198affe944065163a8eeedf2abab9d33368be9d5663dfc689b10ee9
- 3f59e0699854cbf6b86451b35eb62fe16b3d9a191fd0f508c0743399221c48af
- 213bb0ae16d6fad28b730599286eb85bfd968127da101913755ccc57a90cee7f
- 46e94c9beadbe7a4a1a960723d4f39937cbda973d7fbe1beda3aacbe25b96485
- 1feb2e3d2256ca79f2817934e49db3d1c7f05f83961c0e6ce0c566e08355a56b
- 3525808aee3b82d17a9a8aa64f324bf10fc8083c42d269499178a8e66a449090
- 900b89ac1d940bd0abc3dddadbe79b5309e2eccf928f2434d63818e638f2a19e
- 33a5705c8eaab89f37334aece6f177aaec3e2fb0aaf8ea698a267ad829f68f9b
- e69a46596e50a0dd606c82fe0c9409a12b280f050389ba52e45a9510725de20b
- 1d42063ca922e036840da4da0c2074baa19d0313db4b8553d1af0890a3a7aa38
- 1466107adf32ea90be62ada75ff21b992ff09dc093035f8a49ff830b1e84ee66
- 1707c0560032ea61ecd807d92e46f6a622e90db033bd0567226be773b0ecce6b
- 69b94a5fe79bb777b0b5fdf6f9ed53e5579308e8fdb7a9d650b28d1e19f3e3fa
- 461c0cef31e4a445d8e7f2d61d703a1f44e058e69387d14ed83ee1c1abd5bded
- cafcd4676c03b4495a8d3241bd44f69d88d37ae05b9ff90753989605968fd3f0
- aa7dfbf1a991ff8f903f6ad15a479d622fa041ce9b0346974a3a5b72aa5fc41a
- 520d292f33d4bc0f40ebe4f785635ffa95e0049cd96faeb7925c6f099c0f4804
- e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
- c6f47a1426a735601c9a8e01d391e8d574349ca03f8681615b4f0f233474fc75
- f210ed5a568564adb82a17cdabf64a82d5ea607b034037adba4382325b1b5b44
- e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
- 1850994d449635bf02739cda473029ecdb795a6ab0aabde253a4990005a481bd
- 6b1831241ba34a2d1c7e0e6653d3ea157d7ae1af57c7ddb0caacfefe4d0a7bd7
- 443c551de0f720ad2bb0da591a6b562cf6294e517e82f5eba041d59c048e486f
- e0fb0c70da679cbd102300a5665f96330a03acccd172bd2f38e3da176d17e7e7
- 478ea76ca169e493ad9e4451da104af9bf798212c2723cabb2175f20235a8b51
- c185e066ffd27c97ff653819e93d5c380960d68407b8a949e9a158eff0d24cea
- b3ccb31cb942a4ebb1253ecd7d2b8c5b35e26791acfaa64c7f05251fe6b4d69b
- aa7bd00359c420f939f97ec41894b5eb342e88eb8e309f25158374e421bb83d0
- 4ee8f9138131298bf6724fbe017eb5097e2d2709829f93783c99471b8a7e566b
- 79abc41cfc8917094f117a3b0d350dc22bd156409a4e237ab2a22230c53a1c7a
- 8a73ee544a8f36bbf868b864d56089ff7601e1609a91a2af66641c5e4b46517a
- fe2482a4c39826ac28be995416025d3adfb1cdb7a788b84ab78c0e8eb18dc438
- b656a12b481c6ef8a6354584eb8bf94423a3aac54ff2291687679f0c52d19ed7
- 0a7bdaee40f7fd97ca6f2132d6be8a8d2fc3e45c7079303083a9ed44d227ac8d
- d83bd027c07108f02eca676721bf00f4667b9c7e12ee2c395e1ae9f851d52504
- dd6c9de742c8f2d08777430d46b475de47f6343d9effcce291c79d7c1e8c72fc
- 438cc2acb833782a9c17e5d7b2ae42bc95ad86c674ef9853cd727bff28621abe
- 779f23377885bca5f8ddae4736f380b9ecd28a5cdaadde2ad7e6539d3d123c94
- b5e5f6fa8f47c343d4ba5ec80395a7b2dd6c00b2a18a224d87c76c398997b994
- 740eda164f6501d88410c059c5f545ccd0ea5f7f9f48bbf1993a92fc74d44500
- 82027875df5233212f79efe501232808035df1ce9a6853150669f8e290dded01
- a2e315a8cb6b804ff5d63269cc6f671656d689b61d9fd046e88ae273e5158914
- 889abcb03d6c631d99c2e8a528d5b6587ed9b4c34c14d5e1c83cc3f0f287b153
- ad3d836b19c45d0ccc75df51b932ce38f005e385abd29e6417eacdf41f1aff36
- 150a31f17d9a69fea410d02979fbdf3c916a196d6a1ec9f31831f4c8644ff1aa
- 392944fdb6c86454e01d096aba2178cb4ecdf0514c4de5029d44259b2c840344
- 95f7355a5a9938b289e6d084132a157fb86ee048db0eb12faa6652bf5b1f518d
- b4d2ae4f9963d546a1a666e9cc094fc6fa16b0994509e49393d68d0268d52e2c
- 7c0c4ceaa382112ab91cbdbd56a2eb7db951d6d311ac86f433258b6ae5ed524f
- 08d8789ff486e94876595c801932745100d6f52140a5f7c144f332d8ae632330
- 68c1726e3dc842999ed1fc52dc363b031002790ab471b763fdba9911410b2b64
- a22b5ea0459915eaddb1552bc097148d95152b5ca1ec5e09d9ec3962f4d4cd42
- 7f1e16da099541d7948944366e418905bd5e39591b5e009ef17b9be9d4bfdf30
- fde6e46547307ef7e718427de6b7e3d65c6804d77a104f59bed39cec5b1b7c51
- f8a7f206f61ad0e7c6fafcdf542a81b14d9386f5859272480b1da1ed5db2cb90
- 10039406a53b55ba7b24403202f6462df3cd97590120aded789d3c1c600b44d2
- 5c0a1b67f1733f8ec7c02ecef204e6f31bb2f8bd4c0330930bce94037ad8038b
- dbf5de14ddb489ca4cd5eaf32f71f10a0c8f76959cd4a53cb449495c59a86dfa
- afddcd243091c60091ddc77d69acf34d55ecacd9216489735c0dd27fcb978137
- 96bf244518b07ba0bc82373ef838c5fe7c3ca717241aca9865e5483ef4c9f98a
- 185d93f3a0e33103a9b52b2365236b70814d74650be8de99e1a98176b4bbdbe8
- 7e64ee2a8227a536b8a5f9d9afa5b3790a5ed880fc0097254c7b78c26bb3949a
- 69cb1dbbd68c71172253a123dce9289765cb2a7809c3b42148c54c4472171b4c
- 6448de731a566420d4bf732ba9990fa0d66c6ec5ae1dbb5d86f5c3c131a39324
- 24a6dce80da6f898cec4fb4fda3f5ee0f7a6303700e7dbd675346ab85f2838c5
- aa8d49adaf8e9a8cd4834580f1e0b276ca2a6d5b45ed491092f65a53fd6e5d96
- 51f8597fec5e16fbff339b24dbafffe942d7ab0db68b74841d722d26da7c8fb8
- 28af846cdb9be4319784fbef529b78e6041e7affc82abfdfb226450d0e081325
- 307394a2cbacac9f7e73b5cdc01126e78fc54e6419cc6e5e0243f256b4a36145
- a29000c40c840de752c30478362693a237ae086dcb927c6a62ec03af43cc2422
- c27af1ab52980f551a2a4dadbbe9d4c617d3f7bb53c635c7b1d08f3a14cfa059
- 15ef0e69dcdea5b86f4a351807ad138979b9068689932e6fe8a54c7330ee2140
- 2dee83d8e6eff73de7ab1eaf7945aaf0572b4135ef7205506e9642e1fe2b28bc
- f71638adfd2d91b8a24368a213809e095dbc8a2730cde88296af7c56231fbd99
- a8c770eeaf9233da213632d58551d3ccf676c22de60fe5ee9450261d5054000c
- 52e4f82fb2713c0a1616b8595afec9ea7311017db29f7d7e12d96d030f43a299
- bc8f9338be2fe096f6acf9d8dda0c10c3960efd658d77c0d847d2adea3a47947
- 2ca7c5673f173c0137315d4a7de14912e29f95da24f010cb6ef889b5f2aa8458
- 2588765294af2ad7ae51cefd08cf08b097c89a3e03c3c78d466b8bb9ef4763dc
- 59a4f1e58a7cf16cb9df3f0a16da96ddf87895f3cec36fe6e84b50700612072e
muse on this issue, then propose a whole bunch of code to help implement it
Okay, let's muse on this.
Musing on Replacing Conversation
with reply_to_id
The core idea here is shifting from an explicit Conversation
object that holds a linear list of Response
objects to a more implicit, potentially tree-like structure where each Response
can point to the Response
it's replying to using a reply_to_id
.
Pros:
- More Natural Structure: Real conversations often branch. A user might ask a follow-up to an earlier point, not just the immediately preceding one. A tree structure (
reply_to_id
) models this much more naturally than a strictly linear list (Conversation.responses
). - Tool Integration: As Simon notes, this seems much cleaner for implementing tools. An LLM might make a "tool call" request (a
Response
), the system executes the tool and generates results, and then provides those results back as a reply to that specific tool call request. Linking them directly viareply_to_id
makes tracking this request/response pairing explicit and straightforward. - Simpler In-Memory API: For users of the Python library who aren't using the SQLite logging, creating follow-up prompts becomes simpler. Instead of managing a
Conversation
object, they can just callresponse.reply(...)
on the previous response object. - Potential Caching/Efficiency Gains: If a large document is processed in the first turn (e.g., using fragments), subsequent replies that branch off might reuse cached intermediate states or embeddings associated with that initial response, potentially saving computation (though this depends heavily on the model implementation).
- Database Normalization: It feels slightly more normalized. Instead of a separate
conversations
table primarily for grouping, the relationship is inherent in theresponses
table itself via the foreign key.
Cons:
- Retrieving Linear History: Getting the full linear history that led to a specific response now requires traversing the
reply_to_id
chain backwards, potentially using recursive SQL queries (like SQLite CTEs) or Python loops. This is slightly more complex than just iteratingconversation.responses
. - Breaking Change for Plugins: This is the big one. The
execute()
signature for all models currently acceptsconversation
. Removing this or changing its meaning will break existing plugins unless a compatibility layer is introduced. Building the prompt history (like themessages
list for OpenAI) currently relies on iteratingconversation.responses
. This logic needs to shift to traversing thereply_to
chain. - Defining a "Conversation": How do we define a "conversation" in the UI or for grouping logs? Is it everything reachable by traversing
reply_to_id
back from a leaf node? What's the root? The currentconversation_id
provides a simple grouping mechanism which might still be useful, even if it represents a tree rather than a line. Maybe theconversation_id
becomes the ID of the root response? - Migration Complexity: Requires adding a column and foreign key to the
responses
table. Existing data won't havereply_to_id
populated, though maybe the linear history inconversation.responses
could be used to backfill it for existing logs upon migration? - UI/UX: How does the CLI represent this?
-c/--continue
currently continues the last conversation. The proposed-r/--reply
would reply to the last response. This seems sensible.--cid
would need clarification - does it mean reply to the latest response within that old conversation ID's tree, or reply to the root response of that conversation?--reply-to ID
is clear.
Compatibility Strategy for Plugins:
The least disruptive path would be:
- Introduce the
reply_to
mechanism internally. - Modify
llm
core to build the history by traversing thereply_to
chain if available, falling back to the oldconversation.responses
if needed (e.g., for older log entries). - When calling a plugin's
execute
method, check its signature.- If it doesn't accept
conversation
, passconversation=None
and expect it to use theprompt.reply_to
chain (the new way). - If it does accept
conversation
, construct a temporaryConversation
object containing the linear history derived from thereply_to
chain and pass that in (the old way).
- If it doesn't accept
- Deprecate passing
conversation
and encourage plugin authors to update to the new signature and use theprompt.reply_to
chain.
Decision:
The benefits, especially for tool integration and a more natural conversation flow representation, seem significant. The main hurdle is the breaking change for plugins. A compatibility strategy makes this feasible. The added complexity of retrieving linear history seems manageable. Let's proceed with outlining the code changes needed.
Here's a bunch of code snippets illustrating the required changes across different files.
1. Database Migration (migrations.py
)
# llm/migrations.py
# ... (other migrations)
@migration
def m017_replies(db):
"""Add reply_to_id for response threading."""
if "reply_to_id" not in db["responses"].columns_dict:
db["responses"].add_column("reply_to_id", str, fk="responses", fk_col="id")
# Add a default column order including the new column near the start
# Adjust columns based on the *actual* current schema before this migration
existing_columns = db["responses"].columns_names
# Define desired order (example, needs refinement based on actual order)
desired_order = [
"id", "reply_to_id", "model", "prompt", "system",
"prompt_json", "options_json", "response", "response_json",
"conversation_id", # Keep for grouping/backward compat?
"duration_ms", "datetime_utc", "input_tokens", "output_tokens",
"token_details", "schema_id"
]
# Ensure all existing columns are included, handling potential additions/removals
current_set = set(existing_columns)
final_order = [col for col in desired_order if col in current_set]
# Add any unexpected columns to the end
final_order.extend([col for col in existing_columns if col not in desired_order])
db["responses"].transform(column_order=tuple(final_order))
# Consider backfilling reply_to_id based on conversation_id and datetime_utc
# for existing linear logs, but this could be complex and might be skipped.
# Example (conceptual, needs refinement):
# try:
# db.execute("""
# WITH NumberedResponses AS (
# SELECT
# id,
# conversation_id,
# datetime_utc,
# LAG(id) OVER (PARTITION BY conversation_id ORDER BY datetime_utc, id) AS prev_id
# FROM responses
# )
# UPDATE responses
# SET reply_to_id = (SELECT prev_id FROM NumberedResponses WHERE NumberedResponses.id = responses.id)
# WHERE responses.reply_to_id IS NULL;
# """)
# except Exception as e:
# print(f"Could not backfill reply_to_id: {e}")
2. Core Model Changes (models.py
)
# llm/models.py
import asyncio
import base64
from dataclasses import dataclass, field
import datetime
# ... other imports ...
from typing import (
Any, AsyncGenerator, Callable, Dict, Iterable, Iterator, List, Optional, Set, Union
)
# --- Prompt Class ---
@dataclass
class Prompt:
_prompt: Optional[str]
model: "Model"
fragments: Optional[List[str]]
attachments: Optional[List[Attachment]]
_system: Optional[str]
system_fragments: Optional[List[str]]
prompt_json: Optional[str]
schema: Optional[Union[Dict, type[BaseModel]]]
options: "Options"
reply_to: Optional["_BaseResponse"] = None # ADDED: Reference to the response being replied to
def __init__(
self,
prompt,
model,
*,
fragments=None,
attachments=None,
system=None,
system_fragments=None,
prompt_json=None,
options=None,
schema=None,
reply_to: Optional["_BaseResponse"] = None, # ADDED
):
self._prompt = prompt
self.model = model
self.attachments = list(attachments or [])
self.fragments = fragments or []
self._system = system
self.system_fragments = system_fragments or []
self.prompt_json = prompt_json
if schema and not isinstance(schema, dict) and issubclass(schema, BaseModel):
schema = schema.model_json_schema()
self.schema = schema
self.options = options or {}
self.reply_to = reply_to # ADDED
# ... (existing properties like prompt, system) ...
# --- _BaseResponse Class ---
class _BaseResponse:
# ... (existing attributes) ...
prompt: "Prompt" # This prompt will now have the reply_to attribute
# Keep conversation for now for grouping / backward compat? Or phase out?
# Let's keep it for now but maybe don't pass it to execute()
conversation: Optional["_BaseConversation"] = None
def __init__(
self,
prompt: Prompt,
model: "_BaseModel",
stream: bool,
conversation: Optional[_BaseConversation] = None, # Keep param for now
key: Optional[str] = None,
):
# ... (existing init code) ...
self.prompt = prompt
self.model = model
self.stream = stream
self._key = key
# ... (rest of init) ...
# If conversation is None but prompt.reply_to has one, maybe inherit it?
if conversation is None and prompt.reply_to and prompt.reply_to.conversation:
self.conversation = prompt.reply_to.conversation
else:
self.conversation = conversation
# ... (existing methods like set_usage, from_row, token_usage) ...
def get_history(self) -> List["_BaseResponse"]:
"""Traverses the reply_to chain to get the linear history."""
history = []
current = self
while current is not None:
history.append(current)
# Access the prompt that LED TO this response
# Then access the response that prompt replied to
current = current.prompt.reply_to
return history[::-1] # Reverse to get chronological order
def log_to_db(self, db):
# ... (logic to ensure conversation exists or create one) ...
# Fetch or create conversation record (maybe use root response ID?)
root_response = self.get_history()[0] if self.prompt.reply_to else self
# For now, keep using the existing conversation mechanism or create one
conversation = self.conversation
if not conversation:
# If no conversation explicitly set, create one based on root response
conversation = Conversation(model=self.model, id=root_response.id) # Tentative ID choice
self.conversation = conversation
# ... (rest of conversation insert logic) ...
# Ensure schema is logged
# ... (schema handling) ...
response_id = str(ULID()).lower()
self.id = response_id # Store the generated ID on the response object
# Persist fragments (no change needed here)
# ... (fragment logging) ...
# Determine reply_to_id
reply_to_id_val = None
if self.prompt.reply_to and hasattr(self.prompt.reply_to, 'id') and self.prompt.reply_to.id:
reply_to_id_val = self.prompt.reply_to.id
response_data = {
"id": response_id,
"reply_to_id": reply_to_id_val, # ADDED
"model": self.model.model_id,
"prompt": self.prompt._prompt,
"system": self.prompt._system,
# ... (rest of the fields: prompt_json, options_json, response, etc.) ...
"conversation_id": conversation.id, # Still log conversation for grouping
# ... (duration, datetime, tokens, schema_id) ...
}
db["responses"].insert(response_data)
# Persist attachments (no change needed here)
# ... (attachment logging) ...
# --- Response Class ---
class Response(_BaseResponse):
# ... (existing methods) ...
def reply(
self,
prompt: Optional[str] = None,
*,
fragments: Optional[List[str]] = None,
attachments: Optional[List[Attachment]] = None,
system: Optional[str] = None, # System prompt likely inherited or ignored in replies? Decide policy.
schema: Optional[Union[dict, type[BaseModel]]] = None,
system_fragments: Optional[List[str]] = None,
stream: bool = True,
key: Optional[str] = None,
**options,
) -> "Response":
"""Creates a new prompt that replies to this response."""
# Ensure the current response is finished before replying
self._force()
# Decide how to handle system prompts in replies - maybe inherit?
# For now, let's assume a reply doesn't usually set a new system prompt.
# Or maybe it replaces the system prompt context for that branch? Needs thought.
# Let's allow overriding it for now.
# Ensure options are validated against the model's options
prompt_options = self.model.Options(**options)
new_prompt = Prompt(
prompt,
model=self.model,
fragments=fragments,
attachments=attachments,
system=system, # Pass system if provided
schema=schema,
system_fragments=system_fragments,
options=prompt_options,
reply_to=self # Link back to this response
)
# Create the new Response object, potentially inheriting the conversation
# The model's prompt method handles creating the Response object
return self.model.prompt(
prompt=new_prompt._prompt, # Pass components to model.prompt
fragments=new_prompt.fragments,
attachments=new_prompt.attachments,
system=new_prompt._system,
schema=new_prompt.schema,
system_fragments=new_prompt.system_fragments,
stream=stream,
key=key,
conversation=self.conversation, # Pass conversation along for grouping
reply_to=self, # Explicitly pass reply_to here too
**dict(prompt_options) # Pass validated options
)
def __iter__(self) -> Iterator[str]:
# ... (start timer etc) ...
# --- Compatibility Layer ---
# Check the signature of the plugin's execute method
execute_sig = inspect.signature(self.model.execute)
accepts_conversation = 'conversation' in execute_sig.parameters
# Prepare args for execute
execute_args = {
"prompt": self.prompt,
"stream": self.stream,
"response": self,
}
if isinstance(self.model, KeyModel):
execute_args["key"] = self.model.get_key(self._key)
if accepts_conversation:
# Old way: Construct a Conversation object (might need refinement)
# For now, pass the existing conversation object if it exists
execute_args["conversation"] = self.conversation
else:
# New way: Plugin expected to use prompt.reply_to
pass # No conversation arg needed
model_iterator = self.model.execute(**execute_args)
# --- End Compatibility Layer ---
for chunk in model_iterator:
yield chunk
self._chunks.append(chunk)
# If no conversation exists AND we weren't replying, create one
if self.conversation is None and self.prompt.reply_to is None:
self.conversation = Conversation(model=self.model, id=self.id) # Use response ID as conv ID for root
# Append to conversation *responses* list for backward compat/grouping?
# Or rely solely on reply_to chain? For now, let's keep appending.
if self.conversation and self not in self.conversation.responses:
self.conversation.responses.append(self)
self._end = time.monotonic()
self._done = True
self._on_done()
# ...
# --- AsyncResponse Class ---
# Similar changes: add reply() method, adjust __anext__ for compatibility layer
class AsyncResponse(_BaseResponse):
async def reply(
self,
prompt: Optional[str] = None,
*,
fragments: Optional[List[str]] = None,
attachments: Optional[List[Attachment]] = None,
system: Optional[str] = None,
schema: Optional[Union[dict, type[BaseModel]]] = None,
system_fragments: Optional[List[str]] = None,
stream: bool = True,
key: Optional[str] = None,
**options,
) -> "AsyncResponse":
"""Creates a new prompt that replies to this response."""
await self._force() # Ensure current response is finished
# (Similar logic as sync reply for system prompt, options etc.)
prompt_options = self.model.Options(**options)
new_prompt = Prompt(
prompt,
model=self.model,
fragments=fragments,
attachments=attachments,
system=system,
schema=schema,
system_fragments=system_fragments,
options=prompt_options,
reply_to=self
)
# Call the async model's prompt method
return self.model.prompt(
prompt=new_prompt._prompt,
fragments=new_prompt.fragments,
attachments=new_prompt.attachments,
system=new_prompt._system,
schema=new_prompt.schema,
system_fragments=new_prompt.system_fragments,
stream=stream,
key=key,
conversation=self.conversation, # Pass conversation along
reply_to=self, # Pass reply_to
**dict(prompt_options)
)
async def __anext__(self) -> str:
# ... (check if done, etc) ...
if not hasattr(self, "_generator"):
# --- Compatibility Layer ---
execute_sig = inspect.signature(self.model.execute)
accepts_conversation = 'conversation' in execute_sig.parameters
execute_args = {
"prompt": self.prompt,
"stream": self.stream,
"response": self,
}
if isinstance(self.model, AsyncKeyModel):
execute_args["key"] = self.model.get_key(self._key)
if accepts_conversation:
execute_args["conversation"] = self.conversation
else:
pass # New way
self._generator = self.model.execute(**execute_args)
# --- End Compatibility Layer ---
# ... (rest of __anext__ logic) ...
# Update conversation handling as in sync __iter__
# --- Model / KeyModel / AsyncModel / AsyncKeyModel ---
# Need to update the base .prompt() methods to accept reply_to
class _Model(_BaseModel):
def prompt(
self,
prompt: Optional[str] = None,
*,
fragments: Optional[List[str]] = None,
attachments: Optional[List[Attachment]] = None,
system: Optional[str] = None,
system_fragments: Optional[List[str]] = None,
stream: bool = True,
schema: Optional[Union[dict, type[BaseModel]]] = None,
conversation: Optional[Conversation] = None, # Keep accepting for now
reply_to: Optional[Response] = None, # ADDED
**options,
) -> Response:
key = options.pop("key", None)
self._validate_attachments(attachments)
# Create the Prompt object, including reply_to
prompt_obj = Prompt(
prompt,
fragments=fragments,
attachments=attachments,
system=system,
schema=schema,
system_fragments=system_fragments,
model=self,
options=self.Options(**options),
reply_to=reply_to # Pass it here
)
# Create the Response object
return Response(
prompt_obj,
self,
stream,
conversation=conversation, # Pass conversation if provided
key=key,
)
# Redefine execute with new signature (breaking change for plugins)
# @abstractmethod
# def execute(
# self,
# prompt: Prompt,
# stream: bool,
# response: Response,
# # REMOVED: conversation: Optional[Conversation],
# # ADDED (for KeyModel): key: Optional[str] = None
# ) -> Iterator[str]:
# pass
class Model(_Model):
# Define execute with NEW signature if not using compat layer
@abstractmethod
def execute(
self,
prompt: Prompt,
stream: bool,
response: Response,
# REMOVED: conversation: Optional[Conversation],
) -> Iterator[str]:
pass
class KeyModel(_Model):
# Define execute with NEW signature if not using compat layer
@abstractmethod
def execute(
self,
prompt: Prompt,
stream: bool,
response: Response,
# REMOVED: conversation: Optional[Conversation],
key: Optional[str],
) -> Iterator[str]:
pass
# Similar changes for _AsyncModel, AsyncModel, AsyncKeyModel, updating their
# .prompt() methods and defining execute() with the new signature.
3. CLI Changes (cli.py
)
# llm/cli.py
import inspect # Need this for compatibility layer
# ... other imports ...
def get_response_by_id(db: sqlite_utils.Database, response_id: str, async_=False) -> Optional[_BaseResponse]:
"""Helper to fetch a specific response object by ID."""
try:
row = db["responses"].get(response_id)
# Need to reconstruct the *full* response object, including potentially
# its prompt and reply_to chain. This might get complex or we might
# just return a simplified object if only the ID is needed by the caller.
# For now, let's assume we can reconstruct it if needed.
response_class = AsyncResponse if async_ else Response
# This from_row needs enhancing to handle the reply_to chain reconstruction
# if we want the full object. For now, it might be partial.
return response_class.from_row(db, row, _async=async_)
except sqlite_utils.db.NotFoundError:
return None
@cli.command(name="prompt")
@click.argument("prompt", required=False)
# ... other options ...
@click.option(
"_continue", # RENAME this variable internally if needed due to keyword clash
"-c",
"--continue",
is_flag=True,
flag_value=-1, # Keep flag_value for now, logic changes below
help="Continue the most recent conversation (reply to last response).", # Updated help
)
# --cid is now ambiguous. Let's rename it to --conversation-group or similar
# and maybe deprecate it later, or clarify its meaning (e.g., reply to latest in that group)
@click.option(
"conversation_id", # Keep variable name, but behavior might change
"--cid",
"--conversation",
help="Continue conversation with root ID (replies to latest in thread).", # Clarify help
)
@click.option( # NEW
"reply_to_id",
"-r", "--reply-to",
help="Reply to the specified response ID.",
)
@click.option( # NEW alias for -c
"--reply-to-latest",
is_flag=True,
help="Reply to the latest response overall.",
)
# ... rest of options ...
def prompt(
prompt,
system,
model_id,
database,
# ... other params ...
_continue, # Keep internal name for now
conversation_id,
reply_to_id, # NEW
reply_to_latest, # NEW
key,
save,
async_,
usage,
extract,
extract_last,
):
# ... (db setup) ...
# --- Determine reply_to_response ---
reply_to_response: Optional[_BaseResponse] = None
# Precedence: --reply-to > --reply-to-latest > -c > --cid
if reply_to_id:
reply_to_response = get_response_by_id(db, reply_to_id, async_=async_)
if not reply_to_response:
raise click.ClickException(f"Response ID not found: {reply_to_id}")
elif reply_to_latest or _continue: # Treat -c as alias for --reply-to-latest now
# Find the absolute latest response ID
latest_rows = list(db["responses"].rows_where(order_by="rowid desc", limit=1))
if latest_rows:
latest_id = latest_rows[0]["id"]
reply_to_response = get_response_by_id(db, latest_id, async_=async_)
# Need error handling if get_response_by_id fails somehow
elif _continue: # Only error if -c was explicitly used and no logs exist
raise click.ClickException("No previous response to continue from.")
elif conversation_id:
# Find latest response within the specified conversation group
latest_in_conv = list(db["responses"].rows_where(
"conversation_id = ?", [conversation_id], order_by="rowid desc", limit=1
))
if latest_in_conv:
latest_id = latest_in_conv[0]["id"]
reply_to_response = get_response_by_id(db, latest_id, async_=async_)
if not reply_to_response:
raise click.ClickException(f"Could not load latest response for conversation {conversation_id}")
else:
raise click.ClickException(f"No responses found for conversation ID: {conversation_id}")
# --- Conversation Handling (Backward Compat / Grouping) ---
# If we are replying, inherit the conversation ID for grouping
conversation_obj = None
if reply_to_response and reply_to_response.conversation:
conversation_obj = reply_to_response.conversation
# (Need to decide if a new Conversation object should ever be created here,
# or if it's purely handled during logging based on root node)
# --- Model Selection ---
if model_id is None:
if reply_to_response:
model_id = reply_to_response.model.model_id # Inherit model from response being replied to
# ... (else use template model or default model logic) ...
# ... (resolve model object) ...
# --- Template Handling ---
# ... (template loading, applying params, potentially overriding system/options) ...
# Be careful how templates interact with replies - does a template override the context?
# --- Execute Prompt ---
# Pass reply_to_response to the model's prompt method
prompt_method = model.prompt
# The conversation_obj is passed mainly for logging/grouping now,
# not necessarily for history reconstruction by the execute method itself.
# However, the compat layer in Response.__iter__ might still use it.
response = prompt_method(
prompt,
# ... fragments, attachments, system, schema ...
reply_to=reply_to_response, # Pass the actual response object
conversation=conversation_obj, # Pass for grouping/compat
key=key,
**kwargs, # validated_options etc.
)
# --- Process Response ---
# (Streaming/non-streaming logic, logging call)
# log_to_db now handles storing reply_to_id
if (logs_on() or log) and not no_log:
# For async, need to await first or log in a callback
if async_:
async def log_async_response(res):
sync_res = await res.to_sync_response()
sync_res.log_to_db(db)
response.on_done(log_async_response)
else:
response.log_to_db(db)
# ... (rest of command) ...
@cli.command()
# ... (chat options - similar changes needed for -c, --cid) ...
# Remove --continue? Or make it an alias for --reply-to-latest?
# Let's keep -c for now as alias for replying to latest overall response.
def chat(
system,
model_id,
_continue,
conversation_id,
template,
param,
options,
no_stream,
key,
):
# ... (db setup, model resolution) ...
current_response = None # Track the last response object
# Initial setup based on -c or --cid
if _continue:
# Find latest response overall
latest_rows = list(db["responses"].rows_where(order_by="rowid desc", limit=1))
if latest_rows:
current_response = get_response_by_id(db, latest_rows[0]["id"])
model_id = current_response.model.model_id # Use model from last response
click.echo(f"Continuing conversation {current_response.conversation.id} (last response {current_response.id})")
else:
click.echo("No previous conversation found to continue.")
elif conversation_id:
# Find latest response in specified conversation group
latest_in_conv = list(db["responses"].rows_where(
"conversation_id = ?", [conversation_id], order_by="rowid desc", limit=1
))
if latest_in_conv:
current_response = get_response_by_id(db, latest_in_conv[0]["id"])
model_id = current_response.model.model_id
click.echo(f"Continuing conversation {conversation_id} (last response {current_response.id})")
else:
raise click.ClickException(f"No responses found for conversation ID: {conversation_id}")
# Resolve model if not determined by conversation continuation
if not current_response:
if model_id is None:
# ... (handle template model or default model) ...
model_id = get_default_model()
try:
model = get_model(model_id)
except UnknownModelError as ex:
raise click.ClickException(str(ex))
# ... (validate options, setup kwargs) ...
click.echo(f"Chatting with {model.model_id}")
# ... (instructions) ...
initial_system = system # Store initial system prompt
while True:
# ... (handle multiline input) ...
prompt_text = click.prompt(...)
if prompt_text.strip() in ("exit", "quit"):
break
# Apply template logic if needed (careful with context)
# ...
current_system = initial_system if not current_response else None # Only use system for first message
if current_response:
# Reply to the previous response
response = current_response.reply(
prompt=prompt_text, system=current_system, **kwargs
)
else:
# Start a new conversation thread
response = model.prompt(
prompt=prompt_text, system=current_system, **kwargs
)
# Process the response (streaming, logging)
accumulated_response_text = ""
for chunk in response:
print(chunk, end="")
sys.stdout.flush()
accumulated_response_text += chunk
print("") # Newline after response
response.log_to_db(db) # Log after streaming complete
current_response = response # Update for the next loop
initial_system = None # Don't apply initial system prompt again
4. Plugin Example Changes (e.g., openai_models.py
)
# llm/default_plugins/openai_models.py
import inspect # Add this
# ... other imports ...
class _Shared:
# ... (existing init) ...
def build_messages(self, prompt: Prompt) -> List[Dict[str, Any]]:
"""Builds messages list from prompt reply chain."""
# Traverse the reply chain
history = []
current_prompt = prompt
while current_prompt is not None:
history.append(current_prompt)
current_response = current_prompt.reply_to
if current_response:
# We need the prompt that *generated* the current_response
history.append(current_response.prompt)
current_prompt = current_response.prompt
else:
current_prompt = None # Reached the root
history.reverse() # Chronological order
messages = []
current_system = None
for item in history:
# item is either a Prompt or a Response object (derived from _BaseResponse)
if isinstance(item, Prompt):
# This is a 'user' turn (or the final prompt)
if item.system and item.system != current_system:
messages.append({"role": "system", "content": item.system})
current_system = item.system
# Handle attachments for this user prompt
if not item.attachments:
messages.append({"role": "user", "content": item.prompt or ""})
else:
attachment_message = []
if item.prompt:
attachment_message.append({"type": "text", "text": item.prompt})
for attachment in item.attachments:
attachment_message.append(_attachment(attachment))
messages.append({"role": "user", "content": attachment_message})
elif isinstance(item, _BaseResponse):
# This is an 'assistant' turn
# Need to ensure response text is available
# This might be tricky if called *during* execution of an earlier part of the chain.
# Assume text_or_raise() works or adapt logic.
messages.append({"role": "assistant", "content": item.text_or_raise()})
# We also need to apply the system prompt associated *with this response's prompt*
if item.prompt.system and item.prompt.system != current_system:
# Insert system prompt *before* the assistant message if it changed
# Find where the preceding user message was added
for i in range(len(messages) - 2, -1, -1):
if messages[i]['role'] == 'user':
messages.insert(i, {"role": "system", "content": item.prompt.system})
break
current_system = item.prompt.system
# Filter out empty user messages if needed (e.g. initial system prompt only)
messages = [m for m in messages if m.get("content")]
return messages
# ... rest of _Shared class ...
class Chat(_Shared, KeyModel):
# ... (Options definition) ...
# Use NEW execute signature (without conversation)
def execute(self, prompt: Prompt, stream: bool, response: Response, key=None):
# Build messages using the new method that traverses prompt.reply_to
messages = self.build_messages(prompt)
if not messages: # Handle cases like system-prompt only initial prompts?
# Decide policy - maybe raise error or return empty?
return iter([])
# ... (rest of the method is similar, using self.build_kwargs, self.get_client) ...
# It NO LONGER needs the 'conversation' argument.
# --- AsyncChat ---
class AsyncChat(_Shared, AsyncKeyModel):
# ... (Options definition) ...
# Use NEW async execute signature (without conversation)
async def execute(
self, prompt: Prompt, stream: bool, response: AsyncResponse, key=None
) -> AsyncGenerator[str, None]:
# Build messages using the new method
messages = self.build_messages(prompt)
if not messages:
yield "" # Need to yield something for async generator
return
# ... (rest of the method is similar, using self.build_kwargs, self.get_client) ...
# It NO LONGER needs the 'conversation' argument.
This is a substantial refactoring, touching core data structures, database interactions, the CLI, and plugin interfaces. The backward compatibility layer for plugins adds complexity but is likely necessary for a smoother transition. Careful testing, especially around edge cases (first prompt, replies with system prompts, error handling) would be crucial.
202,680 input, 10,460 output, {"promptTokensDetails": [{"modality": "TEXT", "tokenCount": 202680}], "thoughtsTokenCount": 1859}
62.989 cents: https://www.llm-prices.com/#it=202680&ot=12319&ic=2.5&oc=10