diff --git a/app/main.py b/app/main.py index fbe27e4..9e00920 100644 --- a/app/main.py +++ b/app/main.py @@ -1193,6 +1193,21 @@ async def _responses_stream_from_chat_stream( } ) + yield _sse_data( + { + "type": "response.completed", + "response": { + "id": response_id, + "object": "response", + "created_at": created_at, + "status": "completed", + "model": model, + "usage": usage, + }, + } + ) + yield "data: [DONE]\\n\\n" + @app.post("/v1/responses", dependencies=[Depends(auth_guard)]) diff --git a/tests/test_tool_call_bridge.py b/tests/test_tool_call_bridge.py index 7fc60c6..57119fe 100644 --- a/tests/test_tool_call_bridge.py +++ b/tests/test_tool_call_bridge.py @@ -805,6 +805,27 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase): self.assertIn('"output_tokens": 2', body) self.assertIn('data: [DONE]', body) + async def test_responses_stream_emits_completed_when_upstream_closes_without_done(self) -> None: + async def _chat_sse_without_done(): + yield b'data: {"choices": [{"delta": {"content": "partial"}}]}\n\n' + yield b'data: {"usage": {"prompt_tokens": 7, "completion_tokens": 1, "total_tokens": 8}, "choices": [{"delta": {}}]}\n\n' + + req = ResponsesRequest(model="org_auto", input="hi", stream=True) + mock_chat = AsyncMock( + return_value=StreamingResponse(_chat_sse_without_done(), media_type="text/event-stream") + ) + + with patch.object(main, "v1_chat_completions", mock_chat): + response = await main.v1_responses(req, _make_request("/v1/responses")) + body = await _collect_stream(response) + + self.assertIn('"type": "response.output_text.delta"', body) + self.assertIn('"delta": "partial"', body) + self.assertIn('"type": "response.completed"', body) + self.assertIn('"input_tokens": 7', body) + self.assertIn('"output_tokens": 1', body) + self.assertIn('data: [DONE]', body) + async def test_responses_non_stream_returns_502_on_invalid_upstream_json(self) -> None: req = ResponsesRequest(model="org_auto", input="hi", stream=False)