fix: harden responses streaming and tool-call fallback

Ensure /v1/responses streams always terminate with response.completed and normalize Lingma tool_code fallbacks into structured tool calls, including single-argument forms.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
mmc
2026-04-20 19:24:02 +08:00
parent 866a212573
commit d0df089282
6 changed files with 927 additions and 18 deletions

View File

@@ -187,6 +187,17 @@ class ConfigParsingTests(unittest.TestCase):
settings_without_accounts = load_settings()
self.assertEqual(settings_without_accounts.instance_count, 1)
def test_load_settings_parses_tool_allowlist_csv(self) -> None:
with patch.dict(os.environ, {"TOOL_ALLOWLIST": " lookup , write_file ,,search_docs "}, clear=True):
settings = load_settings()
self.assertEqual(settings.tool_allowlist, ["lookup", "write_file", "search_docs"])
def test_load_settings_empty_tool_allowlist(self) -> None:
with patch.dict(os.environ, {"TOOL_ALLOWLIST": " , , "}, clear=True):
settings = load_settings()
self.assertEqual(settings.tool_allowlist, [])
if __name__ == "__main__":

View File

@@ -263,6 +263,120 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
{"query": "gateway"},
)
async def test_openai_non_stream_fallbacks_to_tool_code_structured_tool_call_for_forced_tool(self) -> None:
fake_client = _FakeClient(
stream_events=[],
complete_result={
"text": "```tool_code\nlookup(query=\"gateway\")\n```",
"toolEvents": [],
"sessionId": "sess-fallback-tool-code-openai",
},
)
req = ChatCompletionsRequest(
model="org_auto",
messages=[{"role": "user", "content": "hi"}],
stream=False,
tools=[{"type": "function", "function": {"name": "lookup", "parameters": {}}}],
tool_choice={"type": "function", "function": {"name": "lookup"}},
)
with (
patch.object(main, "pool", _FakePool(_FakeInstance(fake_client))),
patch.object(main, "chat_guard", _FakeGuard()),
patch.object(main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"})),
patch.object(main.stats_collector, "record_chat", AsyncMock(return_value=None)),
):
response = await main.v1_chat_completions(req, _make_request("/v1/chat/completions"))
payload = json.loads(response.body)
message = payload["choices"][0]["message"]
self.assertEqual(payload["choices"][0]["finish_reason"], "tool_calls")
self.assertEqual(message["content"], "")
self.assertEqual(message["tool_calls"][0]["function"]["name"], "lookup")
self.assertEqual(
json.loads(message["tool_calls"][0]["function"]["arguments"]),
{"query": "gateway"},
)
async def test_openai_non_stream_fallbacks_to_tool_code_structured_tool_call_for_forced_tool_with_positional_arg(self) -> None:
fake_client = _FakeClient(
stream_events=[],
complete_result={
"text": "```tool_code\nlookup(\"gateway\")\n```",
"toolEvents": [],
"sessionId": "sess-fallback-tool-code-openai-positional",
},
)
req = ChatCompletionsRequest(
model="org_auto",
messages=[{"role": "user", "content": "hi"}],
stream=False,
tools=[
{
"type": "function",
"function": {
"name": "lookup",
"parameters": {
"type": "object",
"properties": {"query": {"type": "string"}},
"required": ["query"],
},
},
}
],
tool_choice={"type": "function", "function": {"name": "lookup"}},
)
with (
patch.object(main, "pool", _FakePool(_FakeInstance(fake_client))),
patch.object(main, "chat_guard", _FakeGuard()),
patch.object(main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"})),
patch.object(main.stats_collector, "record_chat", AsyncMock(return_value=None)),
):
response = await main.v1_chat_completions(req, _make_request("/v1/chat/completions"))
payload = json.loads(response.body)
message = payload["choices"][0]["message"]
self.assertEqual(payload["choices"][0]["finish_reason"], "tool_calls")
self.assertEqual(message["content"], "")
self.assertEqual(message["tool_calls"][0]["function"]["name"], "lookup")
self.assertEqual(
json.loads(message["tool_calls"][0]["function"]["arguments"]),
{"query": "gateway"},
)
async def test_openai_stream_fallbacks_to_tool_code_structured_tool_call_for_forced_tool(self) -> None:
fake_client = _FakeClient(
stream_events=[
{"type": "text", "text": "```tool_code\n"},
{"type": "text", "text": 'lookup(query=\"gateway\")\n'},
{"type": "text", "text": "```"},
],
complete_result={},
)
req = ChatCompletionsRequest(
model="org_auto",
messages=[{"role": "user", "content": "hi"}],
stream=True,
tools=[{"type": "function", "function": {"name": "lookup", "parameters": {}}}],
tool_choice={"type": "function", "function": {"name": "lookup"}},
)
with (
patch.object(main, "pool", _FakePool(_FakeInstance(fake_client))),
patch.object(main, "chat_guard", _FakeGuard()),
patch.object(main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"})),
patch.object(main.stats_collector, "record_chat", AsyncMock(return_value=None)),
):
response = await main.v1_chat_completions(req, _make_request("/v1/chat/completions"))
body = await _collect_stream(response)
self.assertIn('"tool_calls"', body)
self.assertIn('"name": "lookup"', body)
self.assertIn('{"query": "gateway"}', body)
self.assertIn('"finish_reason": "tool_calls"', body)
self.assertIn('data: [DONE]', body)
async def test_openai_stream_bridges_tool_and_text_events(self) -> None:
fake_client = _FakeClient(
stream_events=[
@@ -300,6 +414,7 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
self.assertIn('"usage"', body)
self.assertIn("data: [DONE]", body)
async def test_anthropic_non_stream_bridges_tool_blocks(self) -> None:
fake_client = _FakeClient(
stream_events=[],
@@ -605,6 +720,57 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
self.assertEqual(spy_client.last_complete_args[2], "agent")
async def test_openai_non_stream_filters_tools_by_allowlist(self) -> None:
spy_client = _SpyClient(stream_events=[], complete_result={"text": "ok", "toolEvents": []})
req = ChatCompletionsRequest(
model="org_auto",
messages=[{"role": "user", "content": "hi"}],
stream=False,
tools=[
{"type": "function", "function": {"name": "lookup", "parameters": {}}},
{"type": "function", "function": {"name": "write_file", "parameters": {}}},
],
tool_choice={"type": "function", "function": {"name": "lookup"}},
)
with (
patch.object(main, "pool", _FakePool(_FakeInstance(spy_client))),
patch.object(main, "chat_guard", _FakeGuard()),
patch.object(main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"})),
patch.object(main.stats_collector, "record_chat", AsyncMock(return_value=None)),
_SettingsPatch(tool_forward_enabled=True, tool_allowlist=["lookup"]),
):
await main.v1_chat_completions(req, _make_request("/v1/chat/completions"))
cfg = spy_client.last_complete_kwargs["tool_config"]
self.assertEqual([tool["function"]["name"] for tool in cfg["tools"]], ["lookup"])
self.assertEqual(cfg["tool_choice"], req.tool_choice)
async def test_openai_non_stream_rejects_forced_tool_outside_allowlist(self) -> None:
spy_client = _SpyClient(stream_events=[], complete_result={"text": "ok", "toolEvents": []})
req = ChatCompletionsRequest(
model="org_auto",
messages=[{"role": "user", "content": "hi"}],
stream=False,
tools=[{"type": "function", "function": {"name": "lookup", "parameters": {}}}],
tool_choice={"type": "function", "function": {"name": "write_file"}},
)
with (
patch.object(main, "pool", _FakePool(_FakeInstance(spy_client))),
patch.object(main, "chat_guard", _FakeGuard()),
patch.object(main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"})),
patch.object(main.stats_collector, "record_chat", AsyncMock(return_value=None)),
_SettingsPatch(tool_forward_enabled=True, tool_allowlist=["lookup"]),
):
with self.assertRaises(main.HTTPException) as cm:
await main.v1_chat_completions(req, _make_request("/v1/chat/completions"))
self.assertEqual(cm.exception.status_code, 400)
self.assertEqual(cm.exception.detail["error"]["type"], "invalid_request_error")
self.assertIn("write_file", cm.exception.detail["error"]["message"])
async def test_openai_tooling_context_disables_session_reuse_cache(self) -> None:
fake_cache = _FakeSessionCache()
fake_client = _FakeClient(
@@ -757,6 +923,74 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
self.assertEqual(len(cfg["tools"]), 1)
self.assertEqual(spy_client.last_complete_args[2], "agent")
async def test_anthropic_non_stream_filters_tools_by_allowlist(self) -> None:
spy_client = _SpyClient(stream_events=[], complete_result={"text": "ok", "toolEvents": []})
req = AnthropicMessagesRequest(
model="claude-3-5-sonnet-20241022",
max_tokens=128,
messages=[{"role": "user", "content": "hi"}],
stream=False,
tools=[
{"name": "lookup", "input_schema": {"type": "object", "properties": {}}},
{"name": "write_file", "input_schema": {"type": "object", "properties": {}}},
],
tool_choice={"type": "tool", "name": "lookup"},
)
with (
patch.object(main, "pool", _FakePool(_FakeInstance(spy_client))),
patch.object(main, "chat_guard", _FakeGuard()),
patch.object(main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"})),
patch.object(main.stats_collector, "record_chat", AsyncMock(return_value=None)),
patch.object(main.settings, "api_keys", ["test-key"]),
_SettingsPatch(tool_forward_enabled=True, tool_allowlist=["lookup"]),
):
await main.v1_messages(
req,
_make_request(
"/v1/messages",
headers={"x-api-key": "test-key", "anthropic-version": "2023-06-01"},
),
)
cfg = spy_client.last_complete_kwargs["tool_config"]
self.assertEqual([tool["name"] for tool in cfg["tools"]], ["lookup"])
self.assertEqual(cfg["tool_choice"], req.tool_choice)
async def test_anthropic_non_stream_rejects_forced_tool_outside_allowlist(self) -> None:
spy_client = _SpyClient(stream_events=[], complete_result={"text": "ok", "toolEvents": []})
req = AnthropicMessagesRequest(
model="claude-3-5-sonnet-20241022",
max_tokens=128,
messages=[{"role": "user", "content": "hi"}],
stream=False,
tools=[{"name": "lookup", "input_schema": {"type": "object", "properties": {}}}],
tool_choice={"type": "tool", "name": "write_file"},
)
with (
patch.object(main, "pool", _FakePool(_FakeInstance(spy_client))),
patch.object(main, "chat_guard", _FakeGuard()),
patch.object(main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"})),
patch.object(main.stats_collector, "record_chat", AsyncMock(return_value=None)),
patch.object(main.settings, "api_keys", ["test-key"]),
_SettingsPatch(tool_forward_enabled=True, tool_allowlist=["lookup"]),
):
response = await main.v1_messages(
req,
_make_request(
"/v1/messages",
headers={"x-api-key": "test-key", "anthropic-version": "2023-06-01"},
),
)
self.assertEqual(response.status_code, 400)
payload = json.loads(response.body)
self.assertEqual(payload["type"], "error")
self.assertEqual(payload["error"]["type"], "invalid_request_error")
self.assertIn("write_file", payload["error"]["message"])
async def test_anthropic_tooling_context_disables_session_reuse_cache(self) -> None:
fake_cache = _FakeSessionCache()
fake_client = _FakeClient(
@@ -833,6 +1067,54 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
messages_dump = [m.model_dump() for m in chat_req.messages]
self.assertEqual(messages_dump, [{"role": "user", "content": "hello from responses", "name": None, "tool_call_id": None, "tool_calls": None}])
async def test_responses_non_stream_maps_chat_tool_calls_to_function_call_output(self) -> None:
req = ResponsesRequest(
model="org_auto",
input="tool please",
stream=False,
)
chat_payload = {
"id": "chatcmpl-tools1",
"created": 234,
"model": "org_auto",
"choices": [
{
"index": 0,
"finish_reason": "tool_calls",
"message": {
"role": "assistant",
"content": "",
"tool_calls": [
{
"id": "call_1",
"type": "function",
"function": {
"name": "lookup",
"arguments": "{\"q\":\"gateway\"}",
},
}
],
},
}
],
"usage": {"prompt_tokens": 8, "completion_tokens": 3, "total_tokens": 11},
}
mock_chat = AsyncMock(return_value=JSONResponse(content=chat_payload))
with patch.object(main, "v1_chat_completions", mock_chat):
response = await main.v1_responses(req, _make_request("/v1/responses"))
payload = json.loads(response.body)
self.assertEqual(payload["status"], "completed")
self.assertEqual(payload["output_text"], "")
self.assertEqual(payload["usage"], {"input_tokens": 8, "output_tokens": 3, "total_tokens": 11})
self.assertEqual(len(payload["output"]), 1)
self.assertEqual(payload["output"][0]["type"], "function_call")
self.assertEqual(payload["output"][0]["call_id"], "call_1")
self.assertEqual(payload["output"][0]["id"], "call_1")
self.assertEqual(payload["output"][0]["name"], "lookup")
self.assertEqual(payload["output"][0]["arguments"], "{\"q\":\"gateway\"}")
async def test_responses_forwards_input_tools_and_tool_choice_to_chat_request(self) -> None:
req = ResponsesRequest(
model="org_auto",
@@ -883,17 +1165,70 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
response = await main.v1_responses(req, _make_request("/v1/responses"))
body = await _collect_stream(response)
self.assertIn('"type": "response.created"', body)
self.assertIn('"type": "response.output_item.added"', body)
self.assertIn('"type": "response.output_text.delta"', body)
self.assertIn('"delta": "hello"', body)
self.assertIn('"type": "response.function_call.delta"', body)
self.assertIn('"type": "response.function_call_arguments.delta"', body)
self.assertIn('"item_id": "call_1"', body)
self.assertIn('"output_index": 1', body)
self.assertIn('"delta": "{\\"q\\": \\"x\\"}"', body)
self.assertIn('"type": "response.function_call_arguments.done"', body)
self.assertIn('"arguments": "{\\"q\\": \\"x\\"}"', body)
self.assertIn('"type": "response.output_item.done"', body)
self.assertIn('"type": "function_call"', body)
self.assertIn('"name": "lookup"', body)
self.assertIn('"arguments": "{\\"q\\": \\"x\\"}"', body)
self.assertIn('"type": "response.completed"', body)
self.assertIn('"input_tokens": 3', body)
self.assertIn('"output_tokens": 2', body)
self.assertIn('data: [DONE]', body)
async def test_responses_stream_accumulates_fragmented_tool_arguments(self) -> None:
async def _chat_sse():
yield b'data: {"choices": [{"delta": {"tool_calls": [{"id": "call_1", "function": {"name": "lookup", "arguments": "{\\"q\\":"}}]}}]}\n\n'
yield b'data: {"choices": [{"delta": {"tool_calls": [{"id": "call_1", "function": {"name": "lookup", "arguments": " \\\"x\\\"}"}}]}}]}\n\n'
yield b"data: [DONE]\n\n"
req = ResponsesRequest(model="org_auto", input="hi", stream=True)
mock_chat = AsyncMock(
return_value=StreamingResponse(_chat_sse(), media_type="text/event-stream")
)
with patch.object(main, "v1_chat_completions", mock_chat):
response = await main.v1_responses(req, _make_request("/v1/responses"))
body = await _collect_stream(response)
self.assertIn('"type": "response.function_call_arguments.delta"', body)
self.assertIn('"delta": "{\\"q\\":"', body)
self.assertIn('"delta": " \\\"x\\\"}"', body)
self.assertIn('"type": "response.function_call_arguments.done"', body)
self.assertIn('"arguments": "{\\"q\\": \\\"x\\\"}"', body)
self.assertIn('"type": "response.output_item.done"', body)
self.assertIn('"arguments": "{\\"q\\": \\\"x\\\"}"', body)
self.assertIn('data: [DONE]', body)
async def test_responses_stream_accumulates_fragmented_tool_arguments_without_repeated_id_or_name(self) -> None:
async def _chat_sse():
yield b'data: {"choices": [{"delta": {"tool_calls": [{"index": 0, "id": "call_1", "function": {"name": "lookup", "arguments": "{\\"q\\":"}}]}}]}\n\n'
yield b'data: {"choices": [{"delta": {"tool_calls": [{"index": 0, "function": {"arguments": " \\\"x\\\"}"}}]}}]}\n\n'
yield b"data: [DONE]\n\n"
req = ResponsesRequest(model="org_auto", input="hi", stream=True)
mock_chat = AsyncMock(
return_value=StreamingResponse(_chat_sse(), media_type="text/event-stream")
)
with patch.object(main, "v1_chat_completions", mock_chat):
response = await main.v1_responses(req, _make_request("/v1/responses"))
body = await _collect_stream(response)
self.assertEqual(body.count('"item_id": "call_1"'), 3)
self.assertIn('"name": "lookup"', body)
self.assertIn('"delta": "{\\"q\\":"', body)
self.assertIn('"delta": " \\\"x\\\"}"', body)
self.assertIn('"arguments": "{\\"q\\": \\\"x\\\"}"', body)
self.assertIn('data: [DONE]', body)
async def test_responses_stream_emits_completed_when_upstream_closes_without_done(self) -> None:
async def _chat_sse_without_done():
yield b'data: {"choices": [{"delta": {"content": "partial"}}]}\n\n'
@@ -954,7 +1289,31 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
self.assertIn('data: [DONE]', body)
async def test_responses_non_stream_returns_502_on_invalid_upstream_json(self) -> None:
async def test_responses_alias_matches_v1_responses_behavior(self) -> None:
req = ResponsesRequest(model="org_auto", input="hello", stream=False)
chat_payload = {
"id": "chatcmpl-alias1",
"created": 123,
"model": "org_auto",
"choices": [
{
"index": 0,
"finish_reason": "stop",
"message": {"role": "assistant", "content": "done"},
}
],
"usage": {"prompt_tokens": 4, "completion_tokens": 2, "total_tokens": 6},
}
mock_chat = AsyncMock(return_value=JSONResponse(content=chat_payload))
with patch.object(main, "v1_chat_completions", mock_chat):
response = await main.v1_responses(req, _make_request("/responses"))
payload = json.loads(response.body)
self.assertEqual(payload["id"], "resp_alias1")
self.assertEqual(payload["status"], "completed")
mock_chat.assert_awaited_once()
req = ResponsesRequest(model="org_auto", input="hi", stream=False)
mock_chat = AsyncMock(return_value=Response(content="not-json", media_type="text/plain"))