fix: harden responses stream termination

Ensure /v1/responses streaming always emits completion frames on upstream EOF, errors, and cancellation, and add targeted diagnostics for interrupted Lingma streams. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-20 14:55:32 +08:00
parent 12a4d9584e
commit 5e6c1c1a63
3 changed files with 139 additions and 73 deletions
--- a/tests/test_tool_call_bridge.py
+++ b/tests/test_tool_call_bridge.py
@@ -4,6 +4,7 @@ import json
 import sys
 import types
 import unittest
+import asyncio
 from unittest.mock import AsyncMock, patch


@@ -914,6 +915,44 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
        self.assertIn('"output_tokens": 1', body)
        self.assertIn('data: [DONE]', body)

+    async def test_responses_stream_emits_completed_when_upstream_iterator_errors(self) -> None:
+        async def _chat_sse_error():
+            yield b'data: {"choices": [{"delta": {"content": "partial"}}]}\n\n'
+            raise RuntimeError("boom")
+
+        req = ResponsesRequest(model="org_auto", input="hi", stream=True)
+        mock_chat = AsyncMock(
+            return_value=StreamingResponse(_chat_sse_error(), media_type="text/event-stream")
+        )
+
+        with patch.object(main, "v1_chat_completions", mock_chat):
+            response = await main.v1_responses(req, _make_request("/v1/responses"))
+            body = await _collect_stream(response)
+
+        self.assertIn('"type": "response.output_text.delta"', body)
+        self.assertIn('"delta": "partial"', body)
+        self.assertIn('"type": "response.completed"', body)
+        self.assertIn('data: [DONE]', body)
+
+    async def test_responses_stream_emits_completed_when_upstream_cancels(self) -> None:
+        async def _chat_sse_cancelled():
+            yield b'data: {"choices": [{"delta": {"content": "partial"}}]}\n\n'
+            raise asyncio.CancelledError()
+
+        req = ResponsesRequest(model="org_auto", input="hi", stream=True)
+        mock_chat = AsyncMock(
+            return_value=StreamingResponse(_chat_sse_cancelled(), media_type="text/event-stream")
+        )
+
+        with patch.object(main, "v1_chat_completions", mock_chat):
+            response = await main.v1_responses(req, _make_request("/v1/responses"))
+            body = await _collect_stream(response)
+
+        self.assertIn('"type": "response.output_text.delta"', body)
+        self.assertIn('"delta": "partial"', body)
+        self.assertIn('"type": "response.completed"', body)
+        self.assertIn('data: [DONE]', body)
+

    async def test_responses_non_stream_returns_502_on_invalid_upstream_json(self) -> None:
        req = ResponsesRequest(model="org_auto", input="hi", stream=False)