fix: stop replaying OpenAI stream text

Avoid replaying buffered text at the end of OpenAI streams so text-only responses are emitted once while forced tool fallback behavior stays intact.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
mmc
2026-04-25 15:20:13 +08:00
parent 109c34a8dc
commit 3c9d419726
2 changed files with 28 additions and 4 deletions

View File

@@ -650,9 +650,7 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
buffered_text_parts.clear() buffered_text_parts.clear()
yield f"data: {json.dumps(payload, ensure_ascii=False)}\n\n" yield f"data: {json.dumps(payload, ensure_ascii=False)}\n\n"
if buffered_text_parts: if buffered_text_parts and forced_tool_name and saw_tool_call:
for buffered_text in buffered_text_parts:
yield _text_payload(buffered_text)
buffered_text_parts.clear() buffered_text_parts.clear()
done_payload = { done_payload = {

View File

@@ -414,12 +414,38 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
body = await _collect_stream(response) body = await _collect_stream(response)
self.assertIn('"tool_calls"', body) self.assertIn('"tool_calls"', body)
self.assertIn('"content": "hello"', body) self.assertEqual(body.count('"content": "hello"'), 1)
self.assertIn('"finish_reason": "tool_calls"', body) self.assertIn('"finish_reason": "tool_calls"', body)
self.assertIn('"usage"', body) self.assertIn('"usage"', body)
self.assertIn("data: [DONE]", body) self.assertIn("data: [DONE]", body)
async def test_openai_stream_emits_text_delta_only_once_without_tools(self) -> None:
fake_client = _FakeClient(
stream_events=[
{"type": "text", "text": "你好"},
],
complete_result={},
)
req = ChatCompletionsRequest(
model="org_auto",
messages=[{"role": "user", "content": "hi"}],
stream=True,
)
with (
patch.object(main, "pool", _FakePool(_FakeInstance(fake_client))),
patch.object(main, "chat_guard", _FakeGuard()),
patch.object(main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"})),
patch.object(main.stats_collector, "record_chat", AsyncMock(return_value=None)),
):
response = await main.v1_chat_completions(req, _make_request("/v1/chat/completions"))
body = await _collect_stream(response)
self.assertEqual(body.count('"content": "你好"'), 1)
self.assertIn('"finish_reason": "stop"', body)
self.assertIn("data: [DONE]", body)
async def test_openai_stream_filters_tool_events_by_allowlist(self) -> None: async def test_openai_stream_filters_tool_events_by_allowlist(self) -> None:
fake_client = _FakeClient( fake_client = _FakeClient(
stream_events=[ stream_events=[