fix: harden responses stream termination

Ensure /v1/responses streaming always emits completion frames on upstream EOF, errors, and cancellation, and add targeted diagnostics for interrupted Lingma streams. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-20 14:55:32 +08:00
parent 12a4d9584e
commit 5e6c1c1a63
3 changed files with 139 additions and 73 deletions
--- a/app/lingma_client.py
+++ b/app/lingma_client.py
@@ -495,13 +495,21 @@ class LspWsRpcClient:
        if stream is None:
            return
        start = time.monotonic()
+        last_chunk_at = start
        while True:
            remain = timeout - (time.monotonic() - start)
            if remain <= 0:
-                raise TimeoutError("chat stream timeout")
+                first_chunk_at = stream.get("first_chunk_at")
+                raise TimeoutError(
+                    "chat stream timeout "
+                    f"request_id={request_id} timeout={timeout:.1f}s "
+                    f"first_chunk_at={None if first_chunk_at is None else round(first_chunk_at - start, 3)}s "
+                    f"last_chunk_at={round(last_chunk_at - start, 3)}s"
+                )
            chunk = await asyncio.wait_for(stream["chunks"].get(), timeout=remain)
            if chunk is None:
                break
+            last_chunk_at = time.monotonic()
            yield chunk

    def get_stream_result(self, request_id: str) -> dict: