fix: harden responses stream termination

Ensure /v1/responses streaming always emits completion frames on upstream EOF, errors, and cancellation, and add targeted diagnostics for interrupted Lingma streams.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
mmc
2026-04-20 14:55:32 +08:00
parent 12a4d9584e
commit 5e6c1c1a63
3 changed files with 139 additions and 73 deletions

View File

@@ -495,13 +495,21 @@ class LspWsRpcClient:
if stream is None:
return
start = time.monotonic()
last_chunk_at = start
while True:
remain = timeout - (time.monotonic() - start)
if remain <= 0:
raise TimeoutError("chat stream timeout")
first_chunk_at = stream.get("first_chunk_at")
raise TimeoutError(
"chat stream timeout "
f"request_id={request_id} timeout={timeout:.1f}s "
f"first_chunk_at={None if first_chunk_at is None else round(first_chunk_at - start, 3)}s "
f"last_chunk_at={round(last_chunk_at - start, 3)}s"
)
chunk = await asyncio.wait_for(stream["chunks"].get(), timeout=remain)
if chunk is None:
break
last_chunk_at = time.monotonic()
yield chunk
def get_stream_result(self, request_id: str) -> dict: