fix: harden responses streaming and tool-call fallback
Ensure /v1/responses streams always terminate with response.completed and normalize Lingma tool_code fallbacks into structured tool calls, including single-argument forms. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -263,6 +263,120 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
|
||||
{"query": "gateway"},
|
||||
)
|
||||
|
||||
async def test_openai_non_stream_fallbacks_to_tool_code_structured_tool_call_for_forced_tool(self) -> None:
|
||||
fake_client = _FakeClient(
|
||||
stream_events=[],
|
||||
complete_result={
|
||||
"text": "```tool_code\nlookup(query=\"gateway\")\n```",
|
||||
"toolEvents": [],
|
||||
"sessionId": "sess-fallback-tool-code-openai",
|
||||
},
|
||||
)
|
||||
req = ChatCompletionsRequest(
|
||||
model="org_auto",
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
stream=False,
|
||||
tools=[{"type": "function", "function": {"name": "lookup", "parameters": {}}}],
|
||||
tool_choice={"type": "function", "function": {"name": "lookup"}},
|
||||
)
|
||||
|
||||
with (
|
||||
patch.object(main, "pool", _FakePool(_FakeInstance(fake_client))),
|
||||
patch.object(main, "chat_guard", _FakeGuard()),
|
||||
patch.object(main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"})),
|
||||
patch.object(main.stats_collector, "record_chat", AsyncMock(return_value=None)),
|
||||
):
|
||||
response = await main.v1_chat_completions(req, _make_request("/v1/chat/completions"))
|
||||
|
||||
payload = json.loads(response.body)
|
||||
message = payload["choices"][0]["message"]
|
||||
self.assertEqual(payload["choices"][0]["finish_reason"], "tool_calls")
|
||||
self.assertEqual(message["content"], "")
|
||||
self.assertEqual(message["tool_calls"][0]["function"]["name"], "lookup")
|
||||
self.assertEqual(
|
||||
json.loads(message["tool_calls"][0]["function"]["arguments"]),
|
||||
{"query": "gateway"},
|
||||
)
|
||||
|
||||
async def test_openai_non_stream_fallbacks_to_tool_code_structured_tool_call_for_forced_tool_with_positional_arg(self) -> None:
|
||||
fake_client = _FakeClient(
|
||||
stream_events=[],
|
||||
complete_result={
|
||||
"text": "```tool_code\nlookup(\"gateway\")\n```",
|
||||
"toolEvents": [],
|
||||
"sessionId": "sess-fallback-tool-code-openai-positional",
|
||||
},
|
||||
)
|
||||
req = ChatCompletionsRequest(
|
||||
model="org_auto",
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
stream=False,
|
||||
tools=[
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "lookup",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {"query": {"type": "string"}},
|
||||
"required": ["query"],
|
||||
},
|
||||
},
|
||||
}
|
||||
],
|
||||
tool_choice={"type": "function", "function": {"name": "lookup"}},
|
||||
)
|
||||
|
||||
with (
|
||||
patch.object(main, "pool", _FakePool(_FakeInstance(fake_client))),
|
||||
patch.object(main, "chat_guard", _FakeGuard()),
|
||||
patch.object(main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"})),
|
||||
patch.object(main.stats_collector, "record_chat", AsyncMock(return_value=None)),
|
||||
):
|
||||
response = await main.v1_chat_completions(req, _make_request("/v1/chat/completions"))
|
||||
|
||||
payload = json.loads(response.body)
|
||||
message = payload["choices"][0]["message"]
|
||||
self.assertEqual(payload["choices"][0]["finish_reason"], "tool_calls")
|
||||
self.assertEqual(message["content"], "")
|
||||
self.assertEqual(message["tool_calls"][0]["function"]["name"], "lookup")
|
||||
self.assertEqual(
|
||||
json.loads(message["tool_calls"][0]["function"]["arguments"]),
|
||||
{"query": "gateway"},
|
||||
)
|
||||
|
||||
async def test_openai_stream_fallbacks_to_tool_code_structured_tool_call_for_forced_tool(self) -> None:
|
||||
fake_client = _FakeClient(
|
||||
stream_events=[
|
||||
{"type": "text", "text": "```tool_code\n"},
|
||||
{"type": "text", "text": 'lookup(query=\"gateway\")\n'},
|
||||
{"type": "text", "text": "```"},
|
||||
],
|
||||
complete_result={},
|
||||
)
|
||||
req = ChatCompletionsRequest(
|
||||
model="org_auto",
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
stream=True,
|
||||
tools=[{"type": "function", "function": {"name": "lookup", "parameters": {}}}],
|
||||
tool_choice={"type": "function", "function": {"name": "lookup"}},
|
||||
)
|
||||
|
||||
with (
|
||||
patch.object(main, "pool", _FakePool(_FakeInstance(fake_client))),
|
||||
patch.object(main, "chat_guard", _FakeGuard()),
|
||||
patch.object(main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"})),
|
||||
patch.object(main.stats_collector, "record_chat", AsyncMock(return_value=None)),
|
||||
):
|
||||
response = await main.v1_chat_completions(req, _make_request("/v1/chat/completions"))
|
||||
body = await _collect_stream(response)
|
||||
|
||||
self.assertIn('"tool_calls"', body)
|
||||
self.assertIn('"name": "lookup"', body)
|
||||
self.assertIn('{"query": "gateway"}', body)
|
||||
self.assertIn('"finish_reason": "tool_calls"', body)
|
||||
self.assertIn('data: [DONE]', body)
|
||||
|
||||
async def test_openai_stream_bridges_tool_and_text_events(self) -> None:
|
||||
fake_client = _FakeClient(
|
||||
stream_events=[
|
||||
@@ -300,6 +414,7 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
|
||||
self.assertIn('"usage"', body)
|
||||
self.assertIn("data: [DONE]", body)
|
||||
|
||||
|
||||
async def test_anthropic_non_stream_bridges_tool_blocks(self) -> None:
|
||||
fake_client = _FakeClient(
|
||||
stream_events=[],
|
||||
@@ -605,6 +720,57 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
|
||||
self.assertEqual(spy_client.last_complete_args[2], "agent")
|
||||
|
||||
|
||||
|
||||
async def test_openai_non_stream_filters_tools_by_allowlist(self) -> None:
|
||||
spy_client = _SpyClient(stream_events=[], complete_result={"text": "ok", "toolEvents": []})
|
||||
req = ChatCompletionsRequest(
|
||||
model="org_auto",
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
stream=False,
|
||||
tools=[
|
||||
{"type": "function", "function": {"name": "lookup", "parameters": {}}},
|
||||
{"type": "function", "function": {"name": "write_file", "parameters": {}}},
|
||||
],
|
||||
tool_choice={"type": "function", "function": {"name": "lookup"}},
|
||||
)
|
||||
|
||||
with (
|
||||
patch.object(main, "pool", _FakePool(_FakeInstance(spy_client))),
|
||||
patch.object(main, "chat_guard", _FakeGuard()),
|
||||
patch.object(main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"})),
|
||||
patch.object(main.stats_collector, "record_chat", AsyncMock(return_value=None)),
|
||||
_SettingsPatch(tool_forward_enabled=True, tool_allowlist=["lookup"]),
|
||||
):
|
||||
await main.v1_chat_completions(req, _make_request("/v1/chat/completions"))
|
||||
|
||||
cfg = spy_client.last_complete_kwargs["tool_config"]
|
||||
self.assertEqual([tool["function"]["name"] for tool in cfg["tools"]], ["lookup"])
|
||||
self.assertEqual(cfg["tool_choice"], req.tool_choice)
|
||||
|
||||
async def test_openai_non_stream_rejects_forced_tool_outside_allowlist(self) -> None:
|
||||
spy_client = _SpyClient(stream_events=[], complete_result={"text": "ok", "toolEvents": []})
|
||||
req = ChatCompletionsRequest(
|
||||
model="org_auto",
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
stream=False,
|
||||
tools=[{"type": "function", "function": {"name": "lookup", "parameters": {}}}],
|
||||
tool_choice={"type": "function", "function": {"name": "write_file"}},
|
||||
)
|
||||
|
||||
with (
|
||||
patch.object(main, "pool", _FakePool(_FakeInstance(spy_client))),
|
||||
patch.object(main, "chat_guard", _FakeGuard()),
|
||||
patch.object(main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"})),
|
||||
patch.object(main.stats_collector, "record_chat", AsyncMock(return_value=None)),
|
||||
_SettingsPatch(tool_forward_enabled=True, tool_allowlist=["lookup"]),
|
||||
):
|
||||
with self.assertRaises(main.HTTPException) as cm:
|
||||
await main.v1_chat_completions(req, _make_request("/v1/chat/completions"))
|
||||
|
||||
self.assertEqual(cm.exception.status_code, 400)
|
||||
self.assertEqual(cm.exception.detail["error"]["type"], "invalid_request_error")
|
||||
self.assertIn("write_file", cm.exception.detail["error"]["message"])
|
||||
|
||||
async def test_openai_tooling_context_disables_session_reuse_cache(self) -> None:
|
||||
fake_cache = _FakeSessionCache()
|
||||
fake_client = _FakeClient(
|
||||
@@ -757,6 +923,74 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
|
||||
self.assertEqual(len(cfg["tools"]), 1)
|
||||
self.assertEqual(spy_client.last_complete_args[2], "agent")
|
||||
|
||||
|
||||
async def test_anthropic_non_stream_filters_tools_by_allowlist(self) -> None:
|
||||
spy_client = _SpyClient(stream_events=[], complete_result={"text": "ok", "toolEvents": []})
|
||||
req = AnthropicMessagesRequest(
|
||||
model="claude-3-5-sonnet-20241022",
|
||||
max_tokens=128,
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
stream=False,
|
||||
tools=[
|
||||
{"name": "lookup", "input_schema": {"type": "object", "properties": {}}},
|
||||
{"name": "write_file", "input_schema": {"type": "object", "properties": {}}},
|
||||
],
|
||||
tool_choice={"type": "tool", "name": "lookup"},
|
||||
)
|
||||
|
||||
with (
|
||||
patch.object(main, "pool", _FakePool(_FakeInstance(spy_client))),
|
||||
patch.object(main, "chat_guard", _FakeGuard()),
|
||||
patch.object(main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"})),
|
||||
patch.object(main.stats_collector, "record_chat", AsyncMock(return_value=None)),
|
||||
patch.object(main.settings, "api_keys", ["test-key"]),
|
||||
_SettingsPatch(tool_forward_enabled=True, tool_allowlist=["lookup"]),
|
||||
):
|
||||
await main.v1_messages(
|
||||
req,
|
||||
_make_request(
|
||||
"/v1/messages",
|
||||
headers={"x-api-key": "test-key", "anthropic-version": "2023-06-01"},
|
||||
),
|
||||
)
|
||||
|
||||
cfg = spy_client.last_complete_kwargs["tool_config"]
|
||||
self.assertEqual([tool["name"] for tool in cfg["tools"]], ["lookup"])
|
||||
self.assertEqual(cfg["tool_choice"], req.tool_choice)
|
||||
|
||||
async def test_anthropic_non_stream_rejects_forced_tool_outside_allowlist(self) -> None:
|
||||
spy_client = _SpyClient(stream_events=[], complete_result={"text": "ok", "toolEvents": []})
|
||||
req = AnthropicMessagesRequest(
|
||||
model="claude-3-5-sonnet-20241022",
|
||||
max_tokens=128,
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
stream=False,
|
||||
tools=[{"name": "lookup", "input_schema": {"type": "object", "properties": {}}}],
|
||||
tool_choice={"type": "tool", "name": "write_file"},
|
||||
)
|
||||
|
||||
with (
|
||||
patch.object(main, "pool", _FakePool(_FakeInstance(spy_client))),
|
||||
patch.object(main, "chat_guard", _FakeGuard()),
|
||||
patch.object(main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"})),
|
||||
patch.object(main.stats_collector, "record_chat", AsyncMock(return_value=None)),
|
||||
patch.object(main.settings, "api_keys", ["test-key"]),
|
||||
_SettingsPatch(tool_forward_enabled=True, tool_allowlist=["lookup"]),
|
||||
):
|
||||
response = await main.v1_messages(
|
||||
req,
|
||||
_make_request(
|
||||
"/v1/messages",
|
||||
headers={"x-api-key": "test-key", "anthropic-version": "2023-06-01"},
|
||||
),
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, 400)
|
||||
payload = json.loads(response.body)
|
||||
self.assertEqual(payload["type"], "error")
|
||||
self.assertEqual(payload["error"]["type"], "invalid_request_error")
|
||||
self.assertIn("write_file", payload["error"]["message"])
|
||||
|
||||
async def test_anthropic_tooling_context_disables_session_reuse_cache(self) -> None:
|
||||
fake_cache = _FakeSessionCache()
|
||||
fake_client = _FakeClient(
|
||||
@@ -833,6 +1067,54 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
|
||||
messages_dump = [m.model_dump() for m in chat_req.messages]
|
||||
self.assertEqual(messages_dump, [{"role": "user", "content": "hello from responses", "name": None, "tool_call_id": None, "tool_calls": None}])
|
||||
|
||||
async def test_responses_non_stream_maps_chat_tool_calls_to_function_call_output(self) -> None:
|
||||
req = ResponsesRequest(
|
||||
model="org_auto",
|
||||
input="tool please",
|
||||
stream=False,
|
||||
)
|
||||
chat_payload = {
|
||||
"id": "chatcmpl-tools1",
|
||||
"created": 234,
|
||||
"model": "org_auto",
|
||||
"choices": [
|
||||
{
|
||||
"index": 0,
|
||||
"finish_reason": "tool_calls",
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"tool_calls": [
|
||||
{
|
||||
"id": "call_1",
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "lookup",
|
||||
"arguments": "{\"q\":\"gateway\"}",
|
||||
},
|
||||
}
|
||||
],
|
||||
},
|
||||
}
|
||||
],
|
||||
"usage": {"prompt_tokens": 8, "completion_tokens": 3, "total_tokens": 11},
|
||||
}
|
||||
|
||||
mock_chat = AsyncMock(return_value=JSONResponse(content=chat_payload))
|
||||
with patch.object(main, "v1_chat_completions", mock_chat):
|
||||
response = await main.v1_responses(req, _make_request("/v1/responses"))
|
||||
|
||||
payload = json.loads(response.body)
|
||||
self.assertEqual(payload["status"], "completed")
|
||||
self.assertEqual(payload["output_text"], "")
|
||||
self.assertEqual(payload["usage"], {"input_tokens": 8, "output_tokens": 3, "total_tokens": 11})
|
||||
self.assertEqual(len(payload["output"]), 1)
|
||||
self.assertEqual(payload["output"][0]["type"], "function_call")
|
||||
self.assertEqual(payload["output"][0]["call_id"], "call_1")
|
||||
self.assertEqual(payload["output"][0]["id"], "call_1")
|
||||
self.assertEqual(payload["output"][0]["name"], "lookup")
|
||||
self.assertEqual(payload["output"][0]["arguments"], "{\"q\":\"gateway\"}")
|
||||
|
||||
async def test_responses_forwards_input_tools_and_tool_choice_to_chat_request(self) -> None:
|
||||
req = ResponsesRequest(
|
||||
model="org_auto",
|
||||
@@ -883,17 +1165,70 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
|
||||
response = await main.v1_responses(req, _make_request("/v1/responses"))
|
||||
body = await _collect_stream(response)
|
||||
|
||||
self.assertIn('"type": "response.created"', body)
|
||||
self.assertIn('"type": "response.output_item.added"', body)
|
||||
self.assertIn('"type": "response.output_text.delta"', body)
|
||||
self.assertIn('"delta": "hello"', body)
|
||||
self.assertIn('"type": "response.function_call.delta"', body)
|
||||
self.assertIn('"type": "response.function_call_arguments.delta"', body)
|
||||
self.assertIn('"item_id": "call_1"', body)
|
||||
self.assertIn('"output_index": 1', body)
|
||||
self.assertIn('"delta": "{\\"q\\": \\"x\\"}"', body)
|
||||
self.assertIn('"type": "response.function_call_arguments.done"', body)
|
||||
self.assertIn('"arguments": "{\\"q\\": \\"x\\"}"', body)
|
||||
self.assertIn('"type": "response.output_item.done"', body)
|
||||
self.assertIn('"type": "function_call"', body)
|
||||
self.assertIn('"name": "lookup"', body)
|
||||
self.assertIn('"arguments": "{\\"q\\": \\"x\\"}"', body)
|
||||
self.assertIn('"type": "response.completed"', body)
|
||||
self.assertIn('"input_tokens": 3', body)
|
||||
self.assertIn('"output_tokens": 2', body)
|
||||
self.assertIn('data: [DONE]', body)
|
||||
|
||||
async def test_responses_stream_accumulates_fragmented_tool_arguments(self) -> None:
|
||||
async def _chat_sse():
|
||||
yield b'data: {"choices": [{"delta": {"tool_calls": [{"id": "call_1", "function": {"name": "lookup", "arguments": "{\\"q\\":"}}]}}]}\n\n'
|
||||
yield b'data: {"choices": [{"delta": {"tool_calls": [{"id": "call_1", "function": {"name": "lookup", "arguments": " \\\"x\\\"}"}}]}}]}\n\n'
|
||||
yield b"data: [DONE]\n\n"
|
||||
|
||||
req = ResponsesRequest(model="org_auto", input="hi", stream=True)
|
||||
mock_chat = AsyncMock(
|
||||
return_value=StreamingResponse(_chat_sse(), media_type="text/event-stream")
|
||||
)
|
||||
|
||||
with patch.object(main, "v1_chat_completions", mock_chat):
|
||||
response = await main.v1_responses(req, _make_request("/v1/responses"))
|
||||
body = await _collect_stream(response)
|
||||
|
||||
self.assertIn('"type": "response.function_call_arguments.delta"', body)
|
||||
self.assertIn('"delta": "{\\"q\\":"', body)
|
||||
self.assertIn('"delta": " \\\"x\\\"}"', body)
|
||||
self.assertIn('"type": "response.function_call_arguments.done"', body)
|
||||
self.assertIn('"arguments": "{\\"q\\": \\\"x\\\"}"', body)
|
||||
self.assertIn('"type": "response.output_item.done"', body)
|
||||
self.assertIn('"arguments": "{\\"q\\": \\\"x\\\"}"', body)
|
||||
self.assertIn('data: [DONE]', body)
|
||||
|
||||
async def test_responses_stream_accumulates_fragmented_tool_arguments_without_repeated_id_or_name(self) -> None:
|
||||
async def _chat_sse():
|
||||
yield b'data: {"choices": [{"delta": {"tool_calls": [{"index": 0, "id": "call_1", "function": {"name": "lookup", "arguments": "{\\"q\\":"}}]}}]}\n\n'
|
||||
yield b'data: {"choices": [{"delta": {"tool_calls": [{"index": 0, "function": {"arguments": " \\\"x\\\"}"}}]}}]}\n\n'
|
||||
yield b"data: [DONE]\n\n"
|
||||
|
||||
req = ResponsesRequest(model="org_auto", input="hi", stream=True)
|
||||
mock_chat = AsyncMock(
|
||||
return_value=StreamingResponse(_chat_sse(), media_type="text/event-stream")
|
||||
)
|
||||
|
||||
with patch.object(main, "v1_chat_completions", mock_chat):
|
||||
response = await main.v1_responses(req, _make_request("/v1/responses"))
|
||||
body = await _collect_stream(response)
|
||||
|
||||
self.assertEqual(body.count('"item_id": "call_1"'), 3)
|
||||
self.assertIn('"name": "lookup"', body)
|
||||
self.assertIn('"delta": "{\\"q\\":"', body)
|
||||
self.assertIn('"delta": " \\\"x\\\"}"', body)
|
||||
self.assertIn('"arguments": "{\\"q\\": \\\"x\\\"}"', body)
|
||||
self.assertIn('data: [DONE]', body)
|
||||
|
||||
async def test_responses_stream_emits_completed_when_upstream_closes_without_done(self) -> None:
|
||||
async def _chat_sse_without_done():
|
||||
yield b'data: {"choices": [{"delta": {"content": "partial"}}]}\n\n'
|
||||
@@ -954,7 +1289,31 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
|
||||
self.assertIn('data: [DONE]', body)
|
||||
|
||||
|
||||
async def test_responses_non_stream_returns_502_on_invalid_upstream_json(self) -> None:
|
||||
async def test_responses_alias_matches_v1_responses_behavior(self) -> None:
|
||||
req = ResponsesRequest(model="org_auto", input="hello", stream=False)
|
||||
chat_payload = {
|
||||
"id": "chatcmpl-alias1",
|
||||
"created": 123,
|
||||
"model": "org_auto",
|
||||
"choices": [
|
||||
{
|
||||
"index": 0,
|
||||
"finish_reason": "stop",
|
||||
"message": {"role": "assistant", "content": "done"},
|
||||
}
|
||||
],
|
||||
"usage": {"prompt_tokens": 4, "completion_tokens": 2, "total_tokens": 6},
|
||||
}
|
||||
|
||||
mock_chat = AsyncMock(return_value=JSONResponse(content=chat_payload))
|
||||
with patch.object(main, "v1_chat_completions", mock_chat):
|
||||
response = await main.v1_responses(req, _make_request("/responses"))
|
||||
|
||||
payload = json.loads(response.body)
|
||||
self.assertEqual(payload["id"], "resp_alias1")
|
||||
self.assertEqual(payload["status"], "completed")
|
||||
mock_chat.assert_awaited_once()
|
||||
|
||||
req = ResponsesRequest(model="org_auto", input="hi", stream=False)
|
||||
mock_chat = AsyncMock(return_value=Response(content="not-json", media_type="text/plain"))
|
||||
|
||||
|
||||
Reference in New Issue
Block a user