fix: harden responses streaming and tool-call fallback

Ensure /v1/responses streams always terminate with response.completed and normalize Lingma tool_code fallbacks into structured tool calls, including single-argument forms. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-20 19:24:02 +08:00
parent 866a212573
commit d0df089282
6 changed files with 927 additions and 18 deletions
--- a/tests/test_pool_stats_config.py
+++ b/tests/test_pool_stats_config.py
@@ -187,6 +187,17 @@ class ConfigParsingTests(unittest.TestCase):
            settings_without_accounts = load_settings()

        self.assertEqual(settings_without_accounts.instance_count, 1)
+    def test_load_settings_parses_tool_allowlist_csv(self) -> None:
+        with patch.dict(os.environ, {"TOOL_ALLOWLIST": " lookup , write_file ,,search_docs "}, clear=True):
+            settings = load_settings()
+
+        self.assertEqual(settings.tool_allowlist, ["lookup", "write_file", "search_docs"])
+
+    def test_load_settings_empty_tool_allowlist(self) -> None:
+        with patch.dict(os.environ, {"TOOL_ALLOWLIST": "  , ,  "}, clear=True):
+            settings = load_settings()
+
+        self.assertEqual(settings.tool_allowlist, [])


 if __name__ == "__main__":
--- a/tests/test_tool_call_bridge.py
+++ b/tests/test_tool_call_bridge.py
@@ -263,6 +263,120 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
            {"query": "gateway"},
        )

+    async def test_openai_non_stream_fallbacks_to_tool_code_structured_tool_call_for_forced_tool(self) -> None:
+        fake_client = _FakeClient(
+            stream_events=[],
+            complete_result={
+                "text": "```tool_code\nlookup(query=\"gateway\")\n```",
+                "toolEvents": [],
+                "sessionId": "sess-fallback-tool-code-openai",
+            },
+        )
+        req = ChatCompletionsRequest(
+            model="org_auto",
+            messages=[{"role": "user", "content": "hi"}],
+            stream=False,
+            tools=[{"type": "function", "function": {"name": "lookup", "parameters": {}}}],
+            tool_choice={"type": "function", "function": {"name": "lookup"}},
+        )
+
+        with (
+            patch.object(main, "pool", _FakePool(_FakeInstance(fake_client))),
+            patch.object(main, "chat_guard", _FakeGuard()),
+            patch.object(main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"})),
+            patch.object(main.stats_collector, "record_chat", AsyncMock(return_value=None)),
+        ):
+            response = await main.v1_chat_completions(req, _make_request("/v1/chat/completions"))
+
+        payload = json.loads(response.body)
+        message = payload["choices"][0]["message"]
+        self.assertEqual(payload["choices"][0]["finish_reason"], "tool_calls")
+        self.assertEqual(message["content"], "")
+        self.assertEqual(message["tool_calls"][0]["function"]["name"], "lookup")
+        self.assertEqual(
+            json.loads(message["tool_calls"][0]["function"]["arguments"]),
+            {"query": "gateway"},
+        )
+
+    async def test_openai_non_stream_fallbacks_to_tool_code_structured_tool_call_for_forced_tool_with_positional_arg(self) -> None:
+        fake_client = _FakeClient(
+            stream_events=[],
+            complete_result={
+                "text": "```tool_code\nlookup(\"gateway\")\n```",
+                "toolEvents": [],
+                "sessionId": "sess-fallback-tool-code-openai-positional",
+            },
+        )
+        req = ChatCompletionsRequest(
+            model="org_auto",
+            messages=[{"role": "user", "content": "hi"}],
+            stream=False,
+            tools=[
+                {
+                    "type": "function",
+                    "function": {
+                        "name": "lookup",
+                        "parameters": {
+                            "type": "object",
+                            "properties": {"query": {"type": "string"}},
+                            "required": ["query"],
+                        },
+                    },
+                }
+            ],
+            tool_choice={"type": "function", "function": {"name": "lookup"}},
+        )
+
+        with (
+            patch.object(main, "pool", _FakePool(_FakeInstance(fake_client))),
+            patch.object(main, "chat_guard", _FakeGuard()),
+            patch.object(main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"})),
+            patch.object(main.stats_collector, "record_chat", AsyncMock(return_value=None)),
+        ):
+            response = await main.v1_chat_completions(req, _make_request("/v1/chat/completions"))
+
+        payload = json.loads(response.body)
+        message = payload["choices"][0]["message"]
+        self.assertEqual(payload["choices"][0]["finish_reason"], "tool_calls")
+        self.assertEqual(message["content"], "")
+        self.assertEqual(message["tool_calls"][0]["function"]["name"], "lookup")
+        self.assertEqual(
+            json.loads(message["tool_calls"][0]["function"]["arguments"]),
+            {"query": "gateway"},
+        )
+
+    async def test_openai_stream_fallbacks_to_tool_code_structured_tool_call_for_forced_tool(self) -> None:
+        fake_client = _FakeClient(
+            stream_events=[
+                {"type": "text", "text": "```tool_code\n"},
+                {"type": "text", "text": 'lookup(query=\"gateway\")\n'},
+                {"type": "text", "text": "```"},
+            ],
+            complete_result={},
+        )
+        req = ChatCompletionsRequest(
+            model="org_auto",
+            messages=[{"role": "user", "content": "hi"}],
+            stream=True,
+            tools=[{"type": "function", "function": {"name": "lookup", "parameters": {}}}],
+            tool_choice={"type": "function", "function": {"name": "lookup"}},
+        )
+
+        with (
+            patch.object(main, "pool", _FakePool(_FakeInstance(fake_client))),
+            patch.object(main, "chat_guard", _FakeGuard()),
+            patch.object(main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"})),
+            patch.object(main.stats_collector, "record_chat", AsyncMock(return_value=None)),
+        ):
+            response = await main.v1_chat_completions(req, _make_request("/v1/chat/completions"))
+            body = await _collect_stream(response)
+
+        self.assertIn('"tool_calls"', body)
+        self.assertIn('"name": "lookup"', body)
+        self.assertIn('{"query": "gateway"}', body)
+        self.assertIn('"finish_reason": "tool_calls"', body)
+        self.assertIn('data: [DONE]', body)
+
    async def test_openai_stream_bridges_tool_and_text_events(self) -> None:
        fake_client = _FakeClient(
            stream_events=[
@@ -300,6 +414,7 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
        self.assertIn('"usage"', body)
        self.assertIn("data: [DONE]", body)

+
    async def test_anthropic_non_stream_bridges_tool_blocks(self) -> None:
        fake_client = _FakeClient(
            stream_events=[],
@@ -605,6 +720,57 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
        self.assertEqual(spy_client.last_complete_args[2], "agent")


+
+    async def test_openai_non_stream_filters_tools_by_allowlist(self) -> None:
+        spy_client = _SpyClient(stream_events=[], complete_result={"text": "ok", "toolEvents": []})
+        req = ChatCompletionsRequest(
+            model="org_auto",
+            messages=[{"role": "user", "content": "hi"}],
+            stream=False,
+            tools=[
+                {"type": "function", "function": {"name": "lookup", "parameters": {}}},
+                {"type": "function", "function": {"name": "write_file", "parameters": {}}},
+            ],
+            tool_choice={"type": "function", "function": {"name": "lookup"}},
+        )
+
+        with (
+            patch.object(main, "pool", _FakePool(_FakeInstance(spy_client))),
+            patch.object(main, "chat_guard", _FakeGuard()),
+            patch.object(main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"})),
+            patch.object(main.stats_collector, "record_chat", AsyncMock(return_value=None)),
+            _SettingsPatch(tool_forward_enabled=True, tool_allowlist=["lookup"]),
+        ):
+            await main.v1_chat_completions(req, _make_request("/v1/chat/completions"))
+
+        cfg = spy_client.last_complete_kwargs["tool_config"]
+        self.assertEqual([tool["function"]["name"] for tool in cfg["tools"]], ["lookup"])
+        self.assertEqual(cfg["tool_choice"], req.tool_choice)
+
+    async def test_openai_non_stream_rejects_forced_tool_outside_allowlist(self) -> None:
+        spy_client = _SpyClient(stream_events=[], complete_result={"text": "ok", "toolEvents": []})
+        req = ChatCompletionsRequest(
+            model="org_auto",
+            messages=[{"role": "user", "content": "hi"}],
+            stream=False,
+            tools=[{"type": "function", "function": {"name": "lookup", "parameters": {}}}],
+            tool_choice={"type": "function", "function": {"name": "write_file"}},
+        )
+
+        with (
+            patch.object(main, "pool", _FakePool(_FakeInstance(spy_client))),
+            patch.object(main, "chat_guard", _FakeGuard()),
+            patch.object(main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"})),
+            patch.object(main.stats_collector, "record_chat", AsyncMock(return_value=None)),
+            _SettingsPatch(tool_forward_enabled=True, tool_allowlist=["lookup"]),
+        ):
+            with self.assertRaises(main.HTTPException) as cm:
+                await main.v1_chat_completions(req, _make_request("/v1/chat/completions"))
+
+        self.assertEqual(cm.exception.status_code, 400)
+        self.assertEqual(cm.exception.detail["error"]["type"], "invalid_request_error")
+        self.assertIn("write_file", cm.exception.detail["error"]["message"])
+
    async def test_openai_tooling_context_disables_session_reuse_cache(self) -> None:
        fake_cache = _FakeSessionCache()
        fake_client = _FakeClient(
@@ -757,6 +923,74 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
        self.assertEqual(len(cfg["tools"]), 1)
        self.assertEqual(spy_client.last_complete_args[2], "agent")

+
+    async def test_anthropic_non_stream_filters_tools_by_allowlist(self) -> None:
+        spy_client = _SpyClient(stream_events=[], complete_result={"text": "ok", "toolEvents": []})
+        req = AnthropicMessagesRequest(
+            model="claude-3-5-sonnet-20241022",
+            max_tokens=128,
+            messages=[{"role": "user", "content": "hi"}],
+            stream=False,
+            tools=[
+                {"name": "lookup", "input_schema": {"type": "object", "properties": {}}},
+                {"name": "write_file", "input_schema": {"type": "object", "properties": {}}},
+            ],
+            tool_choice={"type": "tool", "name": "lookup"},
+        )
+
+        with (
+            patch.object(main, "pool", _FakePool(_FakeInstance(spy_client))),
+            patch.object(main, "chat_guard", _FakeGuard()),
+            patch.object(main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"})),
+            patch.object(main.stats_collector, "record_chat", AsyncMock(return_value=None)),
+            patch.object(main.settings, "api_keys", ["test-key"]),
+            _SettingsPatch(tool_forward_enabled=True, tool_allowlist=["lookup"]),
+        ):
+            await main.v1_messages(
+                req,
+                _make_request(
+                    "/v1/messages",
+                    headers={"x-api-key": "test-key", "anthropic-version": "2023-06-01"},
+                ),
+            )
+
+        cfg = spy_client.last_complete_kwargs["tool_config"]
+        self.assertEqual([tool["name"] for tool in cfg["tools"]], ["lookup"])
+        self.assertEqual(cfg["tool_choice"], req.tool_choice)
+
+    async def test_anthropic_non_stream_rejects_forced_tool_outside_allowlist(self) -> None:
+        spy_client = _SpyClient(stream_events=[], complete_result={"text": "ok", "toolEvents": []})
+        req = AnthropicMessagesRequest(
+            model="claude-3-5-sonnet-20241022",
+            max_tokens=128,
+            messages=[{"role": "user", "content": "hi"}],
+            stream=False,
+            tools=[{"name": "lookup", "input_schema": {"type": "object", "properties": {}}}],
+            tool_choice={"type": "tool", "name": "write_file"},
+        )
+
+        with (
+            patch.object(main, "pool", _FakePool(_FakeInstance(spy_client))),
+            patch.object(main, "chat_guard", _FakeGuard()),
+            patch.object(main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"})),
+            patch.object(main.stats_collector, "record_chat", AsyncMock(return_value=None)),
+            patch.object(main.settings, "api_keys", ["test-key"]),
+            _SettingsPatch(tool_forward_enabled=True, tool_allowlist=["lookup"]),
+        ):
+            response = await main.v1_messages(
+                req,
+                _make_request(
+                    "/v1/messages",
+                    headers={"x-api-key": "test-key", "anthropic-version": "2023-06-01"},
+                ),
+            )
+
+        self.assertEqual(response.status_code, 400)
+        payload = json.loads(response.body)
+        self.assertEqual(payload["type"], "error")
+        self.assertEqual(payload["error"]["type"], "invalid_request_error")
+        self.assertIn("write_file", payload["error"]["message"])
+
    async def test_anthropic_tooling_context_disables_session_reuse_cache(self) -> None:
        fake_cache = _FakeSessionCache()
        fake_client = _FakeClient(
@@ -833,6 +1067,54 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
        messages_dump = [m.model_dump() for m in chat_req.messages]
        self.assertEqual(messages_dump, [{"role": "user", "content": "hello from responses", "name": None, "tool_call_id": None, "tool_calls": None}])

+    async def test_responses_non_stream_maps_chat_tool_calls_to_function_call_output(self) -> None:
+        req = ResponsesRequest(
+            model="org_auto",
+            input="tool please",
+            stream=False,
+        )
+        chat_payload = {
+            "id": "chatcmpl-tools1",
+            "created": 234,
+            "model": "org_auto",
+            "choices": [
+                {
+                    "index": 0,
+                    "finish_reason": "tool_calls",
+                    "message": {
+                        "role": "assistant",
+                        "content": "",
+                        "tool_calls": [
+                            {
+                                "id": "call_1",
+                                "type": "function",
+                                "function": {
+                                    "name": "lookup",
+                                    "arguments": "{\"q\":\"gateway\"}",
+                                },
+                            }
+                        ],
+                    },
+                }
+            ],
+            "usage": {"prompt_tokens": 8, "completion_tokens": 3, "total_tokens": 11},
+        }
+
+        mock_chat = AsyncMock(return_value=JSONResponse(content=chat_payload))
+        with patch.object(main, "v1_chat_completions", mock_chat):
+            response = await main.v1_responses(req, _make_request("/v1/responses"))
+
+        payload = json.loads(response.body)
+        self.assertEqual(payload["status"], "completed")
+        self.assertEqual(payload["output_text"], "")
+        self.assertEqual(payload["usage"], {"input_tokens": 8, "output_tokens": 3, "total_tokens": 11})
+        self.assertEqual(len(payload["output"]), 1)
+        self.assertEqual(payload["output"][0]["type"], "function_call")
+        self.assertEqual(payload["output"][0]["call_id"], "call_1")
+        self.assertEqual(payload["output"][0]["id"], "call_1")
+        self.assertEqual(payload["output"][0]["name"], "lookup")
+        self.assertEqual(payload["output"][0]["arguments"], "{\"q\":\"gateway\"}")
+
    async def test_responses_forwards_input_tools_and_tool_choice_to_chat_request(self) -> None:
        req = ResponsesRequest(
            model="org_auto",
@@ -883,17 +1165,70 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
            response = await main.v1_responses(req, _make_request("/v1/responses"))
            body = await _collect_stream(response)

-        self.assertIn('"type": "response.created"', body)
+        self.assertIn('"type": "response.output_item.added"', body)
        self.assertIn('"type": "response.output_text.delta"', body)
        self.assertIn('"delta": "hello"', body)
-        self.assertIn('"type": "response.function_call.delta"', body)
+        self.assertIn('"type": "response.function_call_arguments.delta"', body)
        self.assertIn('"item_id": "call_1"', body)
+        self.assertIn('"output_index": 1', body)
+        self.assertIn('"delta": "{\\"q\\": \\"x\\"}"', body)
+        self.assertIn('"type": "response.function_call_arguments.done"', body)
+        self.assertIn('"arguments": "{\\"q\\": \\"x\\"}"', body)
+        self.assertIn('"type": "response.output_item.done"', body)
+        self.assertIn('"type": "function_call"', body)
        self.assertIn('"name": "lookup"', body)
+        self.assertIn('"arguments": "{\\"q\\": \\"x\\"}"', body)
        self.assertIn('"type": "response.completed"', body)
        self.assertIn('"input_tokens": 3', body)
        self.assertIn('"output_tokens": 2', body)
        self.assertIn('data: [DONE]', body)

+    async def test_responses_stream_accumulates_fragmented_tool_arguments(self) -> None:
+        async def _chat_sse():
+            yield b'data: {"choices": [{"delta": {"tool_calls": [{"id": "call_1", "function": {"name": "lookup", "arguments": "{\\"q\\":"}}]}}]}\n\n'
+            yield b'data: {"choices": [{"delta": {"tool_calls": [{"id": "call_1", "function": {"name": "lookup", "arguments": " \\\"x\\\"}"}}]}}]}\n\n'
+            yield b"data: [DONE]\n\n"
+
+        req = ResponsesRequest(model="org_auto", input="hi", stream=True)
+        mock_chat = AsyncMock(
+            return_value=StreamingResponse(_chat_sse(), media_type="text/event-stream")
+        )
+
+        with patch.object(main, "v1_chat_completions", mock_chat):
+            response = await main.v1_responses(req, _make_request("/v1/responses"))
+            body = await _collect_stream(response)
+
+        self.assertIn('"type": "response.function_call_arguments.delta"', body)
+        self.assertIn('"delta": "{\\"q\\":"', body)
+        self.assertIn('"delta": " \\\"x\\\"}"', body)
+        self.assertIn('"type": "response.function_call_arguments.done"', body)
+        self.assertIn('"arguments": "{\\"q\\": \\\"x\\\"}"', body)
+        self.assertIn('"type": "response.output_item.done"', body)
+        self.assertIn('"arguments": "{\\"q\\": \\\"x\\\"}"', body)
+        self.assertIn('data: [DONE]', body)
+
+    async def test_responses_stream_accumulates_fragmented_tool_arguments_without_repeated_id_or_name(self) -> None:
+        async def _chat_sse():
+            yield b'data: {"choices": [{"delta": {"tool_calls": [{"index": 0, "id": "call_1", "function": {"name": "lookup", "arguments": "{\\"q\\":"}}]}}]}\n\n'
+            yield b'data: {"choices": [{"delta": {"tool_calls": [{"index": 0, "function": {"arguments": " \\\"x\\\"}"}}]}}]}\n\n'
+            yield b"data: [DONE]\n\n"
+
+        req = ResponsesRequest(model="org_auto", input="hi", stream=True)
+        mock_chat = AsyncMock(
+            return_value=StreamingResponse(_chat_sse(), media_type="text/event-stream")
+        )
+
+        with patch.object(main, "v1_chat_completions", mock_chat):
+            response = await main.v1_responses(req, _make_request("/v1/responses"))
+            body = await _collect_stream(response)
+
+        self.assertEqual(body.count('"item_id": "call_1"'), 3)
+        self.assertIn('"name": "lookup"', body)
+        self.assertIn('"delta": "{\\"q\\":"', body)
+        self.assertIn('"delta": " \\\"x\\\"}"', body)
+        self.assertIn('"arguments": "{\\"q\\": \\\"x\\\"}"', body)
+        self.assertIn('data: [DONE]', body)
+
    async def test_responses_stream_emits_completed_when_upstream_closes_without_done(self) -> None:
        async def _chat_sse_without_done():
            yield b'data: {"choices": [{"delta": {"content": "partial"}}]}\n\n'
@@ -954,7 +1289,31 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
        self.assertIn('data: [DONE]', body)


-    async def test_responses_non_stream_returns_502_on_invalid_upstream_json(self) -> None:
+    async def test_responses_alias_matches_v1_responses_behavior(self) -> None:
+        req = ResponsesRequest(model="org_auto", input="hello", stream=False)
+        chat_payload = {
+            "id": "chatcmpl-alias1",
+            "created": 123,
+            "model": "org_auto",
+            "choices": [
+                {
+                    "index": 0,
+                    "finish_reason": "stop",
+                    "message": {"role": "assistant", "content": "done"},
+                }
+            ],
+            "usage": {"prompt_tokens": 4, "completion_tokens": 2, "total_tokens": 6},
+        }
+
+        mock_chat = AsyncMock(return_value=JSONResponse(content=chat_payload))
+        with patch.object(main, "v1_chat_completions", mock_chat):
+            response = await main.v1_responses(req, _make_request("/responses"))
+
+        payload = json.loads(response.body)
+        self.assertEqual(payload["id"], "resp_alias1")
+        self.assertEqual(payload["status"], "completed")
+        mock_chat.assert_awaited_once()
+
        req = ResponsesRequest(model="org_auto", input="hi", stream=False)
        mock_chat = AsyncMock(return_value=Response(content="not-json", media_type="text/plain"))