fix: trace tool forwarding decisions

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-29 06:12:21 +08:00
parent 3c9d419726
commit d9fec3fd74
4 changed files with 168 additions and 0 deletions
--- a/app/http/execution_core.py
+++ b/app/http/execution_core.py
@@ -45,6 +45,31 @@ def _resolve_ask_mode(model: str, has_tooling_context: bool, *, default_ask_mode
    return default_ask_mode
 def _tool_config_summary(tool_config: dict[str, Any] | None) -> dict[str, Any]:
    if not isinstance(tool_config, dict):
        return {"present": False, "provider": None, "tool_names": [], "tool_choice": None}
    tools = tool_config.get("tools")
    tool_names: list[str] = []
    if isinstance(tools, list):
        for tool in tools:
            if not isinstance(tool, dict):
                continue
            if tool.get("type") == "function":
                fn = tool.get("function")
                if isinstance(fn, dict) and isinstance(fn.get("name"), str) and fn.get("name").strip():
                    tool_names.append(fn.get("name").strip())
                    continue
            name = tool.get("name")
            if isinstance(name, str) and name.strip():
                tool_names.append(name.strip())
    return {
        "present": True,
        "provider": tool_config.get("provider"),
        "tool_names": tool_names,
        "tool_choice": tool_config.get("tool_choice"),
    }
 async def _apply_cached_instance_or_invalidate(
    *,
    protocol: str,
@@ -91,6 +116,14 @@ async def prepare_execution_context(
        has_tooling_context,
        default_ask_mode=default_ask_mode,
    )
    logger.info(
        "%s.prepare requested_model=%s ask_mode=%s tooling=%s tool_config=%s",
        protocol,
        requested_model,
        ask_mode,
        has_tooling_context,
        _tool_config_summary(tool_config),
    )
    reuse_eligible = (
        session_cache.enabled
@@ -152,6 +185,17 @@ async def prepare_execution_context(
        prompt = messages_to_prompt(messages_dump)
        is_reply = False
    logger.info(
        "%s.context inst=%s model=%s ask_mode=%s reuse_eligible=%s reused_session=%s affinity=%s",
        protocol,
        inst.name,
        model,
        ask_mode,
        reuse_eligible,
        bool(cached_session_id),
        affinity,
    )
    return ExecutionContext(
        ask_mode=ask_mode,
        lookup_key=lookup_key,
@@ -219,6 +263,13 @@ async def complete_execution(
    estimate_tokens: Callable[[str], int],
 ) -> CompletedExecution:
    try:
        logger.info(
            "%s.complete inst=%s ask_mode=%s tool_config=%s",
            protocol,
            execution.inst.name,
            execution.ask_mode,
            _tool_config_summary(tool_config),
        )
        result = await execution.inst.client.chat_complete(
            execution.prompt,
            execution.model,
--- a/app/lingma_client.py
+++ b/app/lingma_client.py
@@ -19,6 +19,31 @@ from .logging_config import get_logger
 logger = get_logger("lingma_gateway.client")
 def _tool_config_summary(tool_config: dict[str, Any] | None) -> dict[str, Any]:
    if not isinstance(tool_config, dict):
        return {"present": False, "provider": None, "tool_names": [], "tool_choice": None}
    tools = tool_config.get("tools")
    tool_names: list[str] = []
    if isinstance(tools, list):
        for tool in tools:
            if not isinstance(tool, dict):
                continue
            if tool.get("type") == "function":
                fn = tool.get("function")
                if isinstance(fn, dict) and isinstance(fn.get("name"), str) and fn.get("name").strip():
                    tool_names.append(fn.get("name").strip())
                    continue
            name = tool.get("name")
            if isinstance(name, str) and name.strip():
                tool_names.append(name.strip())
    return {
        "present": True,
        "provider": tool_config.get("provider"),
        "tool_names": tool_names,
        "tool_choice": tool_config.get("tool_choice"),
    }
 # Some callers live on Python 3.10 where asyncio.TimeoutError is a distinct class,
 # while 3.11+ unifies it with the builtin TimeoutError. Always catch both.
 TIMEOUT_EXCEPTIONS: tuple[type[BaseException], ...] = (
@@ -407,6 +432,12 @@ class LspWsRpcClient:
        if method in {"tool/call/sync", "tool/invoke", "tool/call/approve", "tool/invokeResult"}:
            tool_event = self._extract_tool_event(params)
            logger.info(
                "lingma tool event method=%s request_id=%s tool=%s",
                method,
                params.get("requestId"),
                tool_event,
            )
            stream = self._resolve_tool_stream(method, params, tool_event)
            if stream is not None and tool_event is not None:
@@ -433,6 +464,11 @@ class LspWsRpcClient:
        if method == "chat/finish":
            logger.info(
                "lingma finish request_id=%s session_id=%s",
                params.get("requestId"),
                params.get("sessionId"),
            )
            req_id = params.get("requestId")
            stream = self._chat_streams.get(req_id)
            if stream is not None and not stream["done"].is_set():
@@ -936,6 +972,13 @@ class LingmaGatewayClient:
        }
        if tool_config is not None:
            payload["toolConfig"] = tool_config
        logger.info(
            "lingma payload request_id=%s session_id=%s mode=%s tool_config=%s",
            request_id,
            session_id,
            ask_mode,
            _tool_config_summary(tool_config),
        )
        return payload
    async def _kick_chat_ask(self, payload: dict) -> None:
--- a/app/main.py
+++ b/app/main.py
@@ -28,6 +28,7 @@ from .config import Settings, load_settings
 from .http.execution_core import (
    _apply_cached_instance_or_invalidate as _shared_apply_cached_instance_or_invalidate,
    _resolve_ask_mode as _shared_resolve_ask_mode,
    _tool_config_summary,
    UpstreamExecutionError,
    complete_execution,
    finalize_stream_execution,
@@ -459,6 +460,12 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
    #   3. Stick the request to the pool instance that originally served it.
    tool_config = _openai_tool_config(req, settings=settings)
    has_tooling_context = _openai_has_tooling_context(req, messages_dump)
    logger.info(
        "chat.request stream=%s tooling=%s tool_config=%s",
        req.stream,
        has_tooling_context,
        _tool_config_summary(tool_config),
    )
    execution = await prepare_execution_context(
        protocol="chat",
        requested_model=req.model,
@@ -892,6 +899,12 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
        message = error.get("message") or str(detail) or "invalid tool configuration"
        return _anthropic_error(exc.status_code, "invalid_request_error", message)
    has_tooling_context = _anthropic_has_tooling_context(req)
    logger.info(
        "anthropic.request stream=%s tooling=%s tool_config=%s",
        req.stream,
        has_tooling_context,
        _tool_config_summary(tool_config),
    )
    try:
        execution = await prepare_execution_context(
            protocol="anthropic",
--- a/tests/test_tool_call_bridge.py
+++ b/tests/test_tool_call_bridge.py
@@ -860,6 +860,33 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
        self.assertIsInstance(cfg["tool_choice"], dict)
        self.assertEqual(spy_client.last_complete_args[2], "agent")
    async def test_openai_stream_forwards_tool_config_when_enabled(self) -> None:
        spy_client = _SpyClient(stream_events=[{"type": "text", "text": "ok"}], complete_result={})
        req = ChatCompletionsRequest(
            model="org_auto",
            messages=[{"role": "user", "content": "hi"}],
            stream=True,
            tools=[{"type": "function", "function": {"name": "lookup", "parameters": {}}}],
            tool_choice={"type": "function", "function": {"name": "lookup"}},
        )
        with (
            patch.object(main, "pool", _FakePool(_FakeInstance(spy_client))),
            patch.object(main, "chat_guard", _FakeGuard()),
            patch.object(main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"})),
            patch.object(main.stats_collector, "record_chat", AsyncMock(return_value=None)),
            _SettingsPatch(tool_forward_enabled=True),
        ):
            response = await main.v1_chat_completions(req, _make_request("/v1/chat/completions"))
            await _collect_stream(response)
        self.assertIn("tool_config", spy_client.last_stream_kwargs)
        cfg = spy_client.last_stream_kwargs["tool_config"]
        self.assertEqual(cfg["provider"], "openai")
        self.assertEqual(len(cfg["tools"]), 1)
        self.assertIsInstance(cfg["tool_choice"], dict)
        self.assertEqual(spy_client.last_stream_args[2], "agent")
    async def test_openai_non_stream_does_not_forward_tool_config_when_disabled(self) -> None:
        spy_client = _SpyClient(stream_events=[], complete_result={"text": "ok", "toolEvents": []})
        req = ChatCompletionsRequest(
@@ -1054,6 +1081,40 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
        self.assertEqual(openai_spy.last_complete_args[2], "chat")
        self.assertEqual(anthropic_spy.last_complete_args[2], "chat")
    async def test_anthropic_stream_forwards_tool_config_when_enabled(self) -> None:
        spy_client = _SpyClient(stream_events=[{"type": "text", "text": "ok"}], complete_result={})
        req = AnthropicMessagesRequest(
            model="claude-3-5-sonnet-20241022",
            max_tokens=128,
            messages=[{"role": "user", "content": "hi"}],
            stream=True,
            tools=[{"name": "lookup", "input_schema": {"type": "object", "properties": {}}}],
            tool_choice={"type": "tool", "name": "lookup"},
        )
        with (
            patch.object(main, "pool", _FakePool(_FakeInstance(spy_client))),
            patch.object(main, "chat_guard", _FakeGuard()),
            patch.object(main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"})),
            patch.object(main.stats_collector, "record_chat", AsyncMock(return_value=None)),
            patch.object(main.settings, "api_keys", ["test-key"]),
            _SettingsPatch(tool_forward_enabled=True, default_ask_mode="chat"),
        ):
            response = await main.v1_messages(
                req,
                _make_request(
                    "/v1/messages",
                    headers={"x-api-key": "test-key", "anthropic-version": "2023-06-01"},
                ),
            )
            await _collect_stream(response)
        self.assertIn("tool_config", spy_client.last_stream_kwargs)
        cfg = spy_client.last_stream_kwargs["tool_config"]
        self.assertEqual(cfg["provider"], "anthropic")
        self.assertEqual(len(cfg["tools"]), 1)
        self.assertEqual(spy_client.last_stream_args[2], "agent")
    async def test_anthropic_non_stream_does_not_forward_tool_config_when_disabled(self) -> None:
        spy_client = _SpyClient(stream_events=[], complete_result={"text": "ok", "toolEvents": []})
        req = AnthropicMessagesRequest(