feat: add dummy tool result for aborted tool calls

alex-funmula · alex-funmula · commit c9c6fac3e02d · 2025-06-06T16:54:10.000+08:00
diff --git a/dive_mcp_host/host/agents/chat_agent.py b/dive_mcp_host/host/agents/chat_agent.py
@@ -31,6 +31,7 @@
 from pydantic import BaseModel
 
 from dive_mcp_host.host.agents.agent_factory import AgentFactory, initial_messages
+from dive_mcp_host.host.agents.message_order import tool_call_order
 from dive_mcp_host.host.agents.tools_in_prompt import (
     convert_messages,
     extract_tool_calls,
@@ -160,7 +161,6 @@ def _check_more_steps_needed(
         )
 
     def _call_model(self, state: AgentState, config: RunnableConfig) -> AgentState:
-        # TODO: _validate_chat_history
         if not self._tools_in_prompt:
             model = self._model
             if self._tool_classes:
@@ -204,8 +204,13 @@ def _before_agent(self, state: AgentState, config: RunnableConfig) -> AgentState
         configurable = config.get("configurable", {})
         max_input_tokens: int | None = configurable.get("max_input_tokens")
         oversize_policy: Literal["window"] | None = configurable.get("oversize_policy")
+
+        new_messages: list[BaseMessage] = []
+        new_messages.extend(tool_call_order(state["messages"]))
+
         if max_input_tokens is None or oversize_policy is None:
-            return cast(AgentState, {"messages": []})
+            return cast(AgentState, {"messages": new_messages})
+
         if oversize_policy == "window":
             messages: list[BaseMessage] = trim_messages(
                 state["messages"],
@@ -217,7 +222,8 @@ def _before_agent(self, state: AgentState, config: RunnableConfig) -> AgentState
                 for m in state["messages"]
                 if m not in messages
             ]
-            return cast(AgentState, {"messages": remove_messages})
+            new_messages.extend(remove_messages)
+            return cast(AgentState, {"messages": new_messages})
 
         return cast(AgentState, {"messages": []})
 
diff --git a/dive_mcp_host/host/agents/message_order.py b/dive_mcp_host/host/agents/message_order.py
@@ -0,0 +1,85 @@
+from logging import getLogger
+from uuid import uuid4
+
+from langchain_core.messages import (
+    AIMessage,
+    AnyMessage,
+    BaseMessage,
+    RemoveMessage,
+    ToolMessage,
+)
+
+from dive_mcp_host.log import TRACE
+
+logger = getLogger(__name__)
+
+
+FAKE_TOOL_RESPONSE = "FAKE_TOOL_RESPONSE"
+
+
+def _has_tool_call(msg: AnyMessage | None) -> bool:
+    return msg is not None and isinstance(msg, AIMessage) and len(msg.tool_calls) > 0
+
+
+def _not_tool_result(msg: AnyMessage) -> bool:
+    return not isinstance(msg, ToolMessage)
+
+
+def tool_call_order(messages: list[AnyMessage]) -> list[BaseMessage]:
+    """Guarantee tool call tool result pair.
+
+    Providers like Anthropic requires each tool call to have their
+    corresponding tool result.
+    """
+    logger.log(TRACE, "Examine tool call order. msgs: %s", messages)
+
+    new_msgs: list[BaseMessage] = []
+    remove_msgs: list[RemoveMessage] = []
+    found_error: bool = False
+    prev_msg: BaseMessage | None = None
+
+    for index, msg in enumerate(messages):
+        if _has_tool_call(prev_msg) and _not_tool_result(msg):
+            assert isinstance(prev_msg, AIMessage), "Could only be AIMessage"
+            logger.warning(
+                "Found tool call that doesn't have tool result as next message: %s",
+                prev_msg.model_dump_json(),
+            )
+
+            # Add tool results for each tool call
+            for tool_call in prev_msg.tool_calls:
+                new_msgs.append(
+                    ToolMessage(
+                        content="Previous tool call was not processed",
+                        tool_call_id=tool_call["id"],
+                        response_metadata={FAKE_TOOL_RESPONSE: True},
+                        id=uuid4().hex,
+                    ),
+                )
+
+            # Because we will rearrange all messages after this tool_call,
+            # we will remove the original messages.
+            if not found_error:
+                found_error = True
+                remove_msgs = [
+                    RemoveMessage(id=msg.id) for msg in messages[index:] if msg.id
+                ]
+
+        # Add original messages back (with new id)
+        if found_error:
+            new = type(msg)(**msg.model_dump())
+            new.id = uuid4().hex
+            new_msgs.append(new)
+
+        prev_msg = msg
+
+    result = remove_msgs + new_msgs
+    logger.log(TRACE, "Tool call order result: %s", result)
+    logger.debug(
+        "tool call order result, fake tool result needed: %s"
+        ", new_msgs: %s, remove_msgs: %s",
+        found_error,
+        len(new_msgs),
+        len(remove_msgs),
+    )
+    return result
diff --git a/dive_mcp_host/httpd/routers/utils.py b/dive_mcp_host/httpd/routers/utils.py
@@ -15,6 +15,7 @@
 from pydantic import BaseModel
 from starlette.datastructures import State
 
+from dive_mcp_host.host.agents.message_order import FAKE_TOOL_RESPONSE
 from dive_mcp_host.host.errors import LogBufferNotFoundError
 from dive_mcp_host.host.tools.log import LogEvent, LogManager, LogMsg
 from dive_mcp_host.host.tools.model_types import ClientState
@@ -489,6 +490,13 @@ async def _handle_response(  # noqa: C901, PLR0912
                         await self._stream_text_msg(message)
                 elif isinstance(message, ToolMessage):
                     logger.log(TRACE, "got tool message: %s", message.model_dump_json())
+                    if message.response_metadata.get(FAKE_TOOL_RESPONSE, False):
+                        logger.log(
+                            TRACE,
+                            "ignore fake tool response: %s",
+                            message.model_dump_json(),
+                        )
+                        continue
                     await self._stream_tool_result_msg(message)
                 else:
                     # idk what is this
diff --git a/tests/test_message_order.py b/tests/test_message_order.py
@@ -0,0 +1,42 @@
+from langchain_core.messages import AIMessage, HumanMessage, RemoveMessage, ToolMessage
+
+from dive_mcp_host.host.agents.message_order import FAKE_TOOL_RESPONSE, tool_call_order
+
+
+def test_msg_order():
+    """Test if message order correction is successful."""
+    tool_call_id = "toolu_012N5cw28KM9QfRLeYdik5V6"
+    messages = [
+        HumanMessage(content="Hi, please generate a image of xxx for me.", id="1"),
+        AIMessage(
+            content="Sure, I will us xxx to generate and image of xxx for you.",
+            tool_calls=[
+                {
+                    "name": "xxx",
+                    "args": {
+                        "prompt": "A xxx",
+                    },
+                    "id": "toolu_012N5cw28KM9QfRLeYdik5V6",
+                    "type": "tool_call",
+                }
+            ],
+            id="2",
+        ),
+        HumanMessage(content="Hi, again", id="3"),
+    ]
+    result = tool_call_order(messages)
+    assert len(result) == 3
+
+    # Remove messages after the tool call
+    assert isinstance(result[0], RemoveMessage)
+
+    # Insert ToolMessage
+    assert isinstance(result[1], ToolMessage)
+    assert result[1].tool_call_id == tool_call_id
+    assert result[1].id
+    assert result[1].response_metadata[FAKE_TOOL_RESPONSE]
+
+    # Other messages behind ToolMessage
+    assert isinstance(result[2], HumanMessage)
+    assert result[2].content == messages[2].content
+    assert result[2].id