fix: harden responses stream termination
Ensure /v1/responses streaming always emits completion frames on upstream EOF, errors, and cancellation, and add targeted diagnostics for interrupted Lingma streams. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -495,13 +495,21 @@ class LspWsRpcClient:
|
||||
if stream is None:
|
||||
return
|
||||
start = time.monotonic()
|
||||
last_chunk_at = start
|
||||
while True:
|
||||
remain = timeout - (time.monotonic() - start)
|
||||
if remain <= 0:
|
||||
raise TimeoutError("chat stream timeout")
|
||||
first_chunk_at = stream.get("first_chunk_at")
|
||||
raise TimeoutError(
|
||||
"chat stream timeout "
|
||||
f"request_id={request_id} timeout={timeout:.1f}s "
|
||||
f"first_chunk_at={None if first_chunk_at is None else round(first_chunk_at - start, 3)}s "
|
||||
f"last_chunk_at={round(last_chunk_at - start, 3)}s"
|
||||
)
|
||||
chunk = await asyncio.wait_for(stream["chunks"].get(), timeout=remain)
|
||||
if chunk is None:
|
||||
break
|
||||
last_chunk_at = time.monotonic()
|
||||
yield chunk
|
||||
|
||||
def get_stream_result(self, request_id: str) -> dict:
|
||||
|
||||
163
app/main.py
163
app/main.py
@@ -859,10 +859,21 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
|
||||
yield "data: [DONE]\n\n"
|
||||
success = True
|
||||
except asyncio.CancelledError:
|
||||
logger.info("chat.stream cancelled by client (inst=%s)", _inst.name)
|
||||
logger.info(
|
||||
"chat.stream cancelled by client (inst=%s, session_id=%s)",
|
||||
_inst.name,
|
||||
cached_session_id,
|
||||
)
|
||||
raise
|
||||
except Exception as exc:
|
||||
logger.warning("chat.stream error (inst=%s): %s", _inst.name, exc)
|
||||
logger.warning(
|
||||
"chat.stream error (inst=%s, session_id=%s, prompt_tokens=%s, completion_tokens=%s): %s",
|
||||
_inst.name,
|
||||
cached_session_id,
|
||||
prompt_tokens,
|
||||
completion_tokens_holder["n"],
|
||||
exc,
|
||||
)
|
||||
finally:
|
||||
# Persist upstream sessionId only on a clean chat/finish.
|
||||
# Partial streams (cancelled, timed out) leave Lingma's
|
||||
@@ -1157,6 +1168,23 @@ async def _responses_stream_from_chat_stream(
|
||||
):
|
||||
created_at = int(time.time())
|
||||
usage: dict[str, int] = {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}
|
||||
completed_sent = False
|
||||
|
||||
def _completed_frame() -> str:
|
||||
return _sse_data(
|
||||
{
|
||||
"type": "response.completed",
|
||||
"response": {
|
||||
"id": response_id,
|
||||
"object": "response",
|
||||
"created_at": created_at,
|
||||
"status": "completed",
|
||||
"model": model,
|
||||
"usage": usage,
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
yield _sse_data(
|
||||
{
|
||||
"type": "response.created",
|
||||
@@ -1170,86 +1198,77 @@ async def _responses_stream_from_chat_stream(
|
||||
}
|
||||
)
|
||||
|
||||
async for part in chat_stream.body_iterator:
|
||||
chunk = part.decode("utf-8") if isinstance(part, bytes) else str(part)
|
||||
for frame in chunk.split("\n\n"):
|
||||
frame = frame.strip()
|
||||
if not frame or not frame.startswith("data:"):
|
||||
continue
|
||||
body = frame[len("data:") :].strip()
|
||||
if body == "[DONE]":
|
||||
yield _sse_data(
|
||||
{
|
||||
"type": "response.completed",
|
||||
"response": {
|
||||
"id": response_id,
|
||||
"object": "response",
|
||||
"created_at": created_at,
|
||||
"status": "completed",
|
||||
"model": model,
|
||||
"usage": usage,
|
||||
},
|
||||
}
|
||||
)
|
||||
yield "data: [DONE]\\n\\n"
|
||||
return
|
||||
try:
|
||||
async for part in chat_stream.body_iterator:
|
||||
chunk = part.decode("utf-8") if isinstance(part, bytes) else str(part)
|
||||
for frame in chunk.split("\n\n"):
|
||||
frame = frame.strip()
|
||||
if not frame or not frame.startswith("data:"):
|
||||
continue
|
||||
body = frame[len("data:") :].strip()
|
||||
if body == "[DONE]":
|
||||
yield _completed_frame()
|
||||
yield "data: [DONE]\n\n"
|
||||
completed_sent = True
|
||||
return
|
||||
|
||||
try:
|
||||
payload = json.loads(body)
|
||||
except Exception:
|
||||
continue
|
||||
try:
|
||||
payload = json.loads(body)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
frame_usage = _responses_usage_from_chat(payload.get("usage"))
|
||||
if any(frame_usage.values()):
|
||||
usage = frame_usage
|
||||
frame_usage = _responses_usage_from_chat(payload.get("usage"))
|
||||
if any(frame_usage.values()):
|
||||
usage = frame_usage
|
||||
|
||||
choices = payload.get("choices")
|
||||
if not isinstance(choices, list) or not choices:
|
||||
continue
|
||||
choice = choices[0] if isinstance(choices[0], dict) else {}
|
||||
delta = choice.get("delta") if isinstance(choice.get("delta"), dict) else {}
|
||||
choices = payload.get("choices")
|
||||
if not isinstance(choices, list) or not choices:
|
||||
continue
|
||||
choice = choices[0] if isinstance(choices[0], dict) else {}
|
||||
delta = choice.get("delta") if isinstance(choice.get("delta"), dict) else {}
|
||||
|
||||
text = delta.get("content")
|
||||
if isinstance(text, str) and text:
|
||||
yield _sse_data(
|
||||
{
|
||||
"type": "response.output_text.delta",
|
||||
"response_id": response_id,
|
||||
"delta": text,
|
||||
}
|
||||
)
|
||||
|
||||
tool_calls = delta.get("tool_calls")
|
||||
if isinstance(tool_calls, list):
|
||||
for idx, tool_call in enumerate(tool_calls):
|
||||
if not isinstance(tool_call, dict):
|
||||
continue
|
||||
fn = tool_call.get("function") if isinstance(tool_call.get("function"), dict) else {}
|
||||
call_id = str(tool_call.get("id") or f"call_{idx}")
|
||||
text = delta.get("content")
|
||||
if isinstance(text, str) and text:
|
||||
yield _sse_data(
|
||||
{
|
||||
"type": "response.function_call.delta",
|
||||
"type": "response.output_text.delta",
|
||||
"response_id": response_id,
|
||||
"item_id": call_id,
|
||||
"name": str(fn.get("name") or "tool"),
|
||||
"arguments": str(fn.get("arguments") or "{}"),
|
||||
"delta": text,
|
||||
}
|
||||
)
|
||||
|
||||
yield _sse_data(
|
||||
{
|
||||
"type": "response.completed",
|
||||
"response": {
|
||||
"id": response_id,
|
||||
"object": "response",
|
||||
"created_at": created_at,
|
||||
"status": "completed",
|
||||
"model": model,
|
||||
"usage": usage,
|
||||
},
|
||||
}
|
||||
)
|
||||
yield "data: [DONE]\\n\\n"
|
||||
tool_calls = delta.get("tool_calls")
|
||||
if isinstance(tool_calls, list):
|
||||
for idx, tool_call in enumerate(tool_calls):
|
||||
if not isinstance(tool_call, dict):
|
||||
continue
|
||||
fn = tool_call.get("function") if isinstance(tool_call.get("function"), dict) else {}
|
||||
call_id = str(tool_call.get("id") or f"call_{idx}")
|
||||
yield _sse_data(
|
||||
{
|
||||
"type": "response.function_call.delta",
|
||||
"response_id": response_id,
|
||||
"item_id": call_id,
|
||||
"name": str(fn.get("name") or "tool"),
|
||||
"arguments": str(fn.get("arguments") or "{}"),
|
||||
}
|
||||
)
|
||||
except asyncio.CancelledError:
|
||||
if not completed_sent:
|
||||
yield _completed_frame()
|
||||
yield "data: [DONE]\n\n"
|
||||
completed_sent = True
|
||||
return
|
||||
except Exception:
|
||||
if not completed_sent:
|
||||
yield _completed_frame()
|
||||
yield "data: [DONE]\n\n"
|
||||
completed_sent = True
|
||||
return
|
||||
|
||||
if not completed_sent:
|
||||
yield _completed_frame()
|
||||
yield "data: [DONE]\n\n"
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user