fix: stop replaying OpenAI stream text

Avoid replaying buffered text at the end of OpenAI streams so text-only responses are emitted once while forced tool fallback behavior stays intact. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-25 15:20:13 +08:00
parent 109c34a8dc
commit 3c9d419726
2 changed files with 28 additions and 4 deletions
--- a/app/main.py
+++ b/app/main.py
@@ -650,9 +650,7 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
                            buffered_text_parts.clear()
                            yield f"data: {json.dumps(payload, ensure_ascii=False)}\n\n"
-                    if buffered_text_parts:
+                    if buffered_text_parts and forced_tool_name and saw_tool_call:
                        for buffered_text in buffered_text_parts:
                            yield _text_payload(buffered_text)
                        buffered_text_parts.clear()
                    done_payload = {
--- a/tests/test_tool_call_bridge.py
+++ b/tests/test_tool_call_bridge.py
@@ -414,12 +414,38 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
            body = await _collect_stream(response)
        self.assertIn('"tool_calls"', body)
-        self.assertIn('"content": "hello"', body)
+        self.assertEqual(body.count('"content": "hello"'), 1)
        self.assertIn('"finish_reason": "tool_calls"', body)
        self.assertIn('"usage"', body)
        self.assertIn("data: [DONE]", body)
    async def test_openai_stream_emits_text_delta_only_once_without_tools(self) -> None:
        fake_client = _FakeClient(
            stream_events=[
                {"type": "text", "text": "你好"},
            ],
            complete_result={},
        )
        req = ChatCompletionsRequest(
            model="org_auto",
            messages=[{"role": "user", "content": "hi"}],
            stream=True,
        )
        with (
            patch.object(main, "pool", _FakePool(_FakeInstance(fake_client))),
            patch.object(main, "chat_guard", _FakeGuard()),
            patch.object(main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"})),
            patch.object(main.stats_collector, "record_chat", AsyncMock(return_value=None)),
        ):
            response = await main.v1_chat_completions(req, _make_request("/v1/chat/completions"))
            body = await _collect_stream(response)
        self.assertEqual(body.count('"content": "你好"'), 1)
        self.assertIn('"finish_reason": "stop"', body)
        self.assertIn("data: [DONE]", body)
    async def test_openai_stream_filters_tool_events_by_allowlist(self) -> None:
        fake_client = _FakeClient(
            stream_events=[