refactor: extract Phase 1 gateway helpers

Move tool bridge and responses adapter helpers out of app.main so the main entrypoint can shrink without changing route orchestration behavior. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-21 08:05:09 +08:00
parent d0df089282
commit 0e146e60d9
6 changed files with 962 additions and 409 deletions
--- a/app/http/init.py
+++ b/app/http/init.py
--- a/app/http/responses_adapter.py
+++ b/app/http/responses_adapter.py
@@ -0,0 +1,176 @@
+from __future__ import annotations
+
+import json
+import time
+import uuid
+from typing import Any
+
+from fastapi import HTTPException
+
+from ..openai_schema import ChatCompletionsRequest, ResponsesRequest, flatten_content
+
+
+def _responses_input_to_messages(req: ResponsesRequest) -> list[dict[str, Any]]:
+    messages: list[dict[str, Any]] = []
+    if req.instructions:
+        messages.append({"role": "system", "content": req.instructions})
+
+    raw_input = req.input
+    if raw_input is None:
+        return messages
+
+    valid_roles = {"system", "user", "assistant", "tool", "developer", "function"}
+
+    def _append(role: str, content: Any, *, tool_call_id: str | None = None) -> None:
+        msg: dict[str, Any] = {"role": role, "content": flatten_content(content)}
+        if role == "tool" and tool_call_id:
+            msg["tool_call_id"] = tool_call_id
+        messages.append(msg)
+
+    if isinstance(raw_input, str):
+        _append("user", raw_input)
+        return messages
+
+    raw_items: list[Any]
+    if isinstance(raw_input, dict):
+        raw_items = [raw_input]
+    elif isinstance(raw_input, list):
+        raw_items = list(raw_input)
+    else:
+        _append("user", str(raw_input))
+        return messages
+
+    for item in raw_items:
+        if isinstance(item, str):
+            _append("user", item)
+            continue
+        if not isinstance(item, dict):
+            _append("user", str(item))
+            continue
+
+        role = item.get("role")
+        if isinstance(role, str) and role in valid_roles:
+            tool_call_id = item.get("tool_call_id") or item.get("call_id")
+            _append(role, item.get("content"), tool_call_id=str(tool_call_id) if tool_call_id else None)
+            continue
+
+        if item.get("type") == "function_call_output":
+            output = item.get("output")
+            if isinstance(output, (dict, list)):
+                output = json.dumps(output, ensure_ascii=False)
+            tool_call_id = item.get("call_id")
+            _append("tool", output, tool_call_id=str(tool_call_id) if tool_call_id else None)
+            continue
+
+        if "content" in item:
+            text = flatten_content(item.get("content"))
+        else:
+            text = flatten_content([item])
+        if text:
+            _append("user", text)
+
+    return messages
+
+
+def _responses_to_chat_request(req: ResponsesRequest) -> ChatCompletionsRequest:
+    return ChatCompletionsRequest(
+        model=req.model,
+        messages=_responses_input_to_messages(req),
+        stream=req.stream,
+        temperature=req.temperature,
+        top_p=req.top_p,
+        max_tokens=req.max_output_tokens,
+        user=req.user,
+        tools=req.tools,
+        tool_choice=req.tool_choice,
+    )
+
+
+def _responses_id_from_chat_id(chat_id: Any) -> str:
+    if isinstance(chat_id, str) and chat_id:
+        suffix = chat_id.removeprefix("chatcmpl-")
+        return f"resp_{suffix}"
+    return f"resp_{uuid.uuid4().hex}"
+
+
+def _responses_usage_from_chat(usage: Any) -> dict[str, int]:
+    if not isinstance(usage, dict):
+        return {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}
+    input_tokens = int(usage.get("prompt_tokens") or 0)
+    output_tokens = int(usage.get("completion_tokens") or 0)
+    return {
+        "input_tokens": input_tokens,
+        "output_tokens": output_tokens,
+        "total_tokens": int(usage.get("total_tokens") or (input_tokens + output_tokens)),
+    }
+
+
+def _responses_non_stream_from_chat_payload(chat_payload: Any) -> dict[str, Any]:
+    if not isinstance(chat_payload, dict):
+        raise HTTPException(
+            status_code=502,
+            detail={"error": {"message": "invalid upstream response", "type": "upstream_error"}},
+        )
+    choice = {}
+    choices = chat_payload.get("choices")
+    if isinstance(choices, list) and choices:
+        choice = choices[0] if isinstance(choices[0], dict) else {}
+    message = choice.get("message") if isinstance(choice.get("message"), dict) else {}
+
+    output: list[dict[str, Any]] = []
+    content = message.get("content")
+    if isinstance(content, str) and content:
+        output.append(
+            {
+                "type": "message",
+                "id": f"msg_{uuid.uuid4().hex}",
+                "status": "completed",
+                "role": "assistant",
+                "content": [{"type": "output_text", "text": content}],
+            }
+        )
+
+    tool_calls = message.get("tool_calls")
+    if isinstance(tool_calls, list):
+        for idx, tool_call in enumerate(tool_calls):
+            if not isinstance(tool_call, dict):
+                continue
+            fn = tool_call.get("function") if isinstance(tool_call.get("function"), dict) else {}
+            call_id = str(tool_call.get("id") or f"call_{idx}")
+            output.append(
+                {
+                    "type": "function_call",
+                    "id": call_id,
+                    "call_id": call_id,
+                    "name": str(fn.get("name") or "tool"),
+                    "arguments": str(fn.get("arguments") or "{}"),
+                }
+            )
+
+    output_text_parts: list[str] = []
+    for item in output:
+        if item.get("type") == "message":
+            blocks = item.get("content")
+            if isinstance(blocks, list):
+                for block in blocks:
+                    if isinstance(block, dict) and block.get("type") == "output_text":
+                        text = block.get("text")
+                        if isinstance(text, str) and text:
+                            output_text_parts.append(text)
+
+    return {
+        "id": _responses_id_from_chat_id(chat_payload.get("id")),
+        "object": "response",
+        "created_at": int(chat_payload.get("created") or time.time()),
+        "status": "completed",
+        "error": None,
+        "incomplete_details": None,
+        "model": chat_payload.get("model"),
+        "output": output,
+        "output_text": "".join(output_text_parts),
+        "usage": _responses_usage_from_chat(chat_payload.get("usage")),
+    }
+
+
+def _sse_data(payload: dict[str, Any]) -> str:
+    return f"data: {json.dumps(payload, ensure_ascii=False)}\n\n"
--- a/app/http/tool_bridge.py
+++ b/app/http/tool_bridge.py
@@ -0,0 +1,218 @@
+from __future__ import annotations
+
+import ast
+import json
+import uuid
+from typing import Any
+
+
+def _json_string(value: Any) -> str:
+    if isinstance(value, str):
+        return value
+    try:
+        return json.dumps(value if value is not None else {}, ensure_ascii=False)
+    except Exception:
+        return "{}"
+
+
+def _openai_forced_tool_name(tool_choice: Any) -> str | None:
+    if not isinstance(tool_choice, dict):
+        return None
+    fn = tool_choice.get("function")
+    if isinstance(fn, dict):
+        name = fn.get("name")
+        if isinstance(name, str) and name.strip():
+            return name.strip()
+    return None
+
+
+def _anthropic_forced_tool_name(tool_choice: Any) -> str | None:
+    if not isinstance(tool_choice, dict):
+        return None
+    if tool_choice.get("type") == "tool":
+        name = tool_choice.get("name")
+        if isinstance(name, str) and name.strip():
+            return name.strip()
+    fn = tool_choice.get("function")
+    if isinstance(fn, dict):
+        name = fn.get("name")
+        if isinstance(name, str) and name.strip():
+            return name.strip()
+    return None
+
+
+def _json_object_from_text(text: str) -> dict[str, Any] | None:
+    raw = text.strip()
+    if not raw:
+        return None
+    if raw.startswith("```") and raw.endswith("```"):
+        raw = raw[3:-3].strip()
+        if raw.lower().startswith("json"):
+            raw = raw[4:].strip()
+    try:
+        parsed = json.loads(raw)
+    except Exception:
+        return None
+    return parsed if isinstance(parsed, dict) else None
+
+
+def _tool_code_single_arg_name(tools: list[dict[str, Any]] | None, forced_tool_name: str) -> str | None:
+    if not isinstance(tools, list):
+        return None
+    for tool in tools:
+        if not isinstance(tool, dict):
+            continue
+        schema: dict[str, Any] | None = None
+        if tool.get("type") == "function":
+            fn = tool.get("function")
+            if isinstance(fn, dict) and fn.get("name") == forced_tool_name:
+                params = fn.get("parameters")
+                if isinstance(params, dict):
+                    schema = params
+        elif tool.get("name") == forced_tool_name:
+            input_schema = tool.get("input_schema")
+            if isinstance(input_schema, dict):
+                schema = input_schema
+        if not isinstance(schema, dict):
+            continue
+        properties = schema.get("properties")
+        if not isinstance(properties, dict) or len(properties) != 1:
+            return None
+        only_name = next(iter(properties.keys()), None)
+        if isinstance(only_name, str) and only_name.strip():
+            return only_name
+        return None
+    return None
+
+
+def _tool_code_object_from_text(
+    text: str,
+    forced_tool_name: str,
+    *,
+    single_arg_name: str | None = None,
+) -> dict[str, Any] | None:
+    raw = text.strip()
+    if not raw.startswith("```tool_code") or not raw.endswith("```"):
+        return None
+    lines = raw.splitlines()
+    if len(lines) < 2:
+        return None
+    body = "\n".join(lines[1:-1]).strip()
+    try:
+        parsed = ast.parse(body, mode="eval")
+    except Exception:
+        return None
+    call = parsed.body
+    if not isinstance(call, ast.Call):
+        return None
+    if not isinstance(call.func, ast.Name) or call.func.id != forced_tool_name:
+        return None
+    arguments: dict[str, Any] = {}
+    if call.args:
+        if len(call.args) != 1 or call.keywords or not single_arg_name:
+            return None
+        try:
+            arguments[single_arg_name] = ast.literal_eval(call.args[0])
+        except Exception:
+            return None
+        return {"arguments": arguments}
+    for kw in call.keywords:
+        if kw.arg is None:
+            return None
+        try:
+            arguments[kw.arg] = ast.literal_eval(kw.value)
+        except Exception:
+            return None
+    return {"arguments": arguments}
+
+
+def _forced_tool_event_from_text(
+    text: str,
+    forced_tool_name: str,
+    *,
+    single_arg_name: str | None = None,
+) -> dict[str, Any] | None:
+    parsed = _json_object_from_text(text)
+    if parsed is None:
+        parsed = _tool_code_object_from_text(text, forced_tool_name, single_arg_name=single_arg_name)
+    if parsed is None:
+        return None
+
+    explicit_name: Any = parsed.get("name") or parsed.get("tool")
+    fn = parsed.get("function")
+    if explicit_name is None and isinstance(fn, dict):
+        explicit_name = fn.get("name")
+    if explicit_name is not None and str(explicit_name) != forced_tool_name:
+        return None
+
+    tool_input: Any = None
+    if "input" in parsed:
+        tool_input = parsed.get("input")
+    elif "arguments" in parsed:
+        args = parsed.get("arguments")
+        if isinstance(args, str):
+            try:
+                tool_input = json.loads(args)
+            except Exception:
+                return None
+        else:
+            tool_input = args
+    elif isinstance(fn, dict) and "arguments" in fn:
+        args = fn.get("arguments")
+        if isinstance(args, str):
+            try:
+                tool_input = json.loads(args)
+            except Exception:
+                return None
+        else:
+            tool_input = args
+    else:
+        reserved = {"name", "tool", "function", "arguments", "input", "result"}
+        tool_input = {k: v for k, v in parsed.items() if k not in reserved}
+
+    event: dict[str, Any] = {
+        "name": forced_tool_name,
+        "input": tool_input if tool_input is not None else {},
+    }
+    if "result" in parsed:
+        event["result"] = parsed.get("result")
+    return event
+
+
+def _openai_tool_call(tool: dict[str, Any], *, forced_id: str | None = None) -> dict[str, Any]:
+    return {
+        "id": str(tool.get("id") or forced_id or f"call_{uuid.uuid4().hex}"),
+        "type": "function",
+        "function": {
+            "name": str(tool.get("name") or "tool"),
+            "arguments": _json_string(tool.get("input")),
+        },
+    }
+
+
+def _anthropic_tool_use_block(
+    tool: dict[str, Any], *, forced_id: str | None = None
+) -> dict[str, Any]:
+    return {
+        "type": "tool_use",
+        "id": str(tool.get("id") or forced_id or f"toolu_{uuid.uuid4().hex}"),
+        "name": str(tool.get("name") or "tool"),
+        "input": tool.get("input") if tool.get("input") is not None else {},
+    }
+
+
+def _anthropic_tool_result_block(
+    tool: dict[str, Any], *, forced_id: str | None = None
+) -> dict[str, Any] | None:
+    if "result" not in tool:
+        return None
+    result = tool.get("result")
+    if isinstance(result, str):
+        content: Any = result
+    else:
+        content = _json_string(result)
+    return {
+        "type": "tool_result",
+        "tool_use_id": str(tool.get("id") or forced_id or ""),
+        "content": content,
+    }
--- a/app/main.py
+++ b/app/main.py
@@ -1,6 +1,5 @@
 from __future__ import annotations

-import ast
 import asyncio
 import hashlib
 import json
@@ -26,6 +25,26 @@ from .auth import (
 )
 from .concurrency import BackpressureRejected, InFlightGuard
 from .config import Settings, load_settings
+from .http.responses_adapter import (
+    _responses_id_from_chat_id,
+    _responses_input_to_messages,
+    _responses_non_stream_from_chat_payload,
+    _responses_to_chat_request,
+    _responses_usage_from_chat,
+    _sse_data,
+)
+from .http.tool_bridge import (
+    _anthropic_forced_tool_name,
+    _anthropic_tool_result_block,
+    _anthropic_tool_use_block,
+    _forced_tool_event_from_text,
+    _json_object_from_text,
+    _json_string,
+    _openai_forced_tool_name,
+    _openai_tool_call,
+    _tool_code_object_from_text,
+    _tool_code_single_arg_name,
+)
 from .lingma_pool import LingmaPool, PoolInstance
 from .logging_config import configure_logging, get_logger, request_id_var
 from .model_map import build_model_name_map, flatten_model_keys, resolve_model
@@ -554,218 +573,6 @@ def _stream_tool_event(event: Any) -> dict[str, Any] | None:
    return None


-def _json_string(value: Any) -> str:
-    if isinstance(value, str):
-        return value
-    try:
-        return json.dumps(value if value is not None else {}, ensure_ascii=False)
-    except Exception:
-        return "{}"
-
-
-def _openai_forced_tool_name(tool_choice: Any) -> str | None:
-    if not isinstance(tool_choice, dict):
-        return None
-    fn = tool_choice.get("function")
-    if isinstance(fn, dict):
-        name = fn.get("name")
-        if isinstance(name, str) and name.strip():
-            return name.strip()
-    return None
-
-
-def _anthropic_forced_tool_name(tool_choice: Any) -> str | None:
-    if not isinstance(tool_choice, dict):
-        return None
-    if tool_choice.get("type") == "tool":
-        name = tool_choice.get("name")
-        if isinstance(name, str) and name.strip():
-            return name.strip()
-    fn = tool_choice.get("function")
-    if isinstance(fn, dict):
-        name = fn.get("name")
-        if isinstance(name, str) and name.strip():
-            return name.strip()
-    return None
-
-
-def _json_object_from_text(text: str) -> dict[str, Any] | None:
-    raw = text.strip()
-    if not raw:
-        return None
-    if raw.startswith("```") and raw.endswith("```"):
-        raw = raw[3:-3].strip()
-        if raw.lower().startswith("json"):
-            raw = raw[4:].strip()
-    try:
-        parsed = json.loads(raw)
-    except Exception:
-        return None
-    return parsed if isinstance(parsed, dict) else None
-
-
-def _tool_code_single_arg_name(tools: list[dict[str, Any]] | None, forced_tool_name: str) -> str | None:
-    if not isinstance(tools, list):
-        return None
-    for tool in tools:
-        if not isinstance(tool, dict):
-            continue
-        schema: dict[str, Any] | None = None
-        if tool.get("type") == "function":
-            fn = tool.get("function")
-            if isinstance(fn, dict) and fn.get("name") == forced_tool_name:
-                params = fn.get("parameters")
-                if isinstance(params, dict):
-                    schema = params
-        elif tool.get("name") == forced_tool_name:
-            input_schema = tool.get("input_schema")
-            if isinstance(input_schema, dict):
-                schema = input_schema
-        if not isinstance(schema, dict):
-            continue
-        properties = schema.get("properties")
-        if not isinstance(properties, dict) or len(properties) != 1:
-            return None
-        only_name = next(iter(properties.keys()), None)
-        if isinstance(only_name, str) and only_name.strip():
-            return only_name
-        return None
-    return None
-
-
-def _tool_code_object_from_text(
-    text: str,
-    forced_tool_name: str,
-    *,
-    single_arg_name: str | None = None,
-) -> dict[str, Any] | None:
-    raw = text.strip()
-    if not raw.startswith("```tool_code") or not raw.endswith("```"):
-        return None
-    lines = raw.splitlines()
-    if len(lines) < 2:
-        return None
-    body = "\n".join(lines[1:-1]).strip()
-    try:
-        parsed = ast.parse(body, mode="eval")
-    except Exception:
-        return None
-    call = parsed.body
-    if not isinstance(call, ast.Call):
-        return None
-    if not isinstance(call.func, ast.Name) or call.func.id != forced_tool_name:
-        return None
-    arguments: dict[str, Any] = {}
-    if call.args:
-        if len(call.args) != 1 or call.keywords or not single_arg_name:
-            return None
-        try:
-            arguments[single_arg_name] = ast.literal_eval(call.args[0])
-        except Exception:
-            return None
-        return {"arguments": arguments}
-    for kw in call.keywords:
-        if kw.arg is None:
-            return None
-        try:
-            arguments[kw.arg] = ast.literal_eval(kw.value)
-        except Exception:
-            return None
-    return {"arguments": arguments}
-
-
-def _forced_tool_event_from_text(
-    text: str,
-    forced_tool_name: str,
-    *,
-    single_arg_name: str | None = None,
-) -> dict[str, Any] | None:
-    parsed = _json_object_from_text(text)
-    if parsed is None:
-        parsed = _tool_code_object_from_text(text, forced_tool_name, single_arg_name=single_arg_name)
-    if parsed is None:
-        return None
-
-    explicit_name: Any = parsed.get("name") or parsed.get("tool")
-    fn = parsed.get("function")
-    if explicit_name is None and isinstance(fn, dict):
-        explicit_name = fn.get("name")
-    if explicit_name is not None and str(explicit_name) != forced_tool_name:
-        return None
-
-    tool_input: Any = None
-    if "input" in parsed:
-        tool_input = parsed.get("input")
-    elif "arguments" in parsed:
-        args = parsed.get("arguments")
-        if isinstance(args, str):
-            try:
-                tool_input = json.loads(args)
-            except Exception:
-                return None
-        else:
-            tool_input = args
-    elif isinstance(fn, dict) and "arguments" in fn:
-        args = fn.get("arguments")
-        if isinstance(args, str):
-            try:
-                tool_input = json.loads(args)
-            except Exception:
-                return None
-        else:
-            tool_input = args
-    else:
-        reserved = {"name", "tool", "function", "arguments", "input", "result"}
-        tool_input = {k: v for k, v in parsed.items() if k not in reserved}
-
-    event: dict[str, Any] = {
-        "name": forced_tool_name,
-        "input": tool_input if tool_input is not None else {},
-    }
-    if "result" in parsed:
-        event["result"] = parsed.get("result")
-    return event
-
-
-def _openai_tool_call(tool: dict[str, Any], *, forced_id: str | None = None) -> dict[str, Any]:
-    return {
-        "id": str(tool.get("id") or forced_id or f"call_{uuid.uuid4().hex}"),
-        "type": "function",
-        "function": {
-            "name": str(tool.get("name") or "tool"),
-            "arguments": _json_string(tool.get("input")),
-        },
-    }
-
-
-def _anthropic_tool_use_block(
-    tool: dict[str, Any], *, forced_id: str | None = None
-) -> dict[str, Any]:
-    return {
-        "type": "tool_use",
-        "id": str(tool.get("id") or forced_id or f"toolu_{uuid.uuid4().hex}"),
-        "name": str(tool.get("name") or "tool"),
-        "input": tool.get("input") if tool.get("input") is not None else {},
-    }
-
-
-def _anthropic_tool_result_block(
-    tool: dict[str, Any], *, forced_id: str | None = None
-) -> dict[str, Any] | None:
-    if "result" not in tool:
-        return None
-    result = tool.get("result")
-    if isinstance(result, str):
-        content: Any = result
-    else:
-        content = _json_string(result)
-    return {
-        "type": "tool_result",
-        "tool_use_id": str(tool.get("id") or forced_id or ""),
-        "content": content,
-    }
-
-
@app.post("/v1/chat/completions", dependencies=[Depends(auth_guard)])
 async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
    p = _require_pool()
@@ -908,6 +715,23 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
                tool_call_indexes: dict[str, int] = {}
                saw_tool_call = False
                buffered_text_parts: list[str] = []
+
+                def _text_payload(text: str) -> str:
+                    payload = {
+                        "id": completion_id,
+                        "object": "chat.completion.chunk",
+                        "created": created,
+                        "model": model,
+                        "choices": [
+                            {
+                                "index": 0,
+                                "delta": {"content": text},
+                                "finish_reason": None,
+                            }
+                        ],
+                    }
+                    return f"data: {json.dumps(payload, ensure_ascii=False)}\n\n"
+
                try:
                    async for chunk in _inst.client.chat_stream(
                        prompt,
@@ -922,6 +746,25 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
                            tool = _stream_tool_event(chunk)
                            if not tool:
                                continue
+
+                            tool_name = str(tool.get("name") or "")
+                            allowed = True
+                            if tool_config and isinstance(tool_config.get("tools"), list) and tool_config.get("tools"):
+                                allowed = False
+                                for t in tool_config.get("tools"):
+                                    if tool_name == _anthropic_tool_name(t) or tool_name == _openai_tool_name(t):
+                                        allowed = True
+                                        break
+                                if not allowed and forced_tool_name and tool_name == forced_tool_name:
+                                    allowed = True
+                            if not allowed:
+                                continue
+
+                            if buffered_text_parts:
+                                for buffered_text in buffered_text_parts:
+                                    yield _text_payload(buffered_text)
+                                buffered_text_parts.clear()
+
                            tool_id = str(tool.get("id") or "")
                            if not tool_id:
                                tool_id = f"call_{len(tool_call_indexes)}"
@@ -958,22 +801,11 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
                            continue
                        buffered_text_parts.append(text)
                        completion_tokens_holder["n"] += estimate_tokens(text)
-                        payload = {
-                            "id": completion_id,
-                            "object": "chat.completion.chunk",
-                            "created": created,
-                            "model": model,
-                            "choices": [
-                                {
-                                    "index": 0,
-                                    "delta": {"content": text},
-                                    "finish_reason": None,
-                                }
-                            ],
-                        }
-                        yield f"data: {json.dumps(payload, ensure_ascii=False)}\n\n"
+                        if forced_tool_name and not saw_tool_call:
+                            continue
+                        yield _text_payload(text)

-                    if not saw_tool_call and forced_tool_name:
+                    if buffered_text_parts and not saw_tool_call and forced_tool_name:
                        fallback_event = _forced_tool_event_from_text(
                            "".join(buffered_text_parts),
                            forced_tool_name,
@@ -984,6 +816,7 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
                            tool_id = "call_fallback_0"
                            idx = 0
                            tool_call_indexes[tool_id] = idx
+                            fallback_tool_call = _openai_tool_call(fallback_event, forced_id=tool_id)
                            payload = {
                                "id": completion_id,
                                "object": "chat.completion.chunk",
@@ -996,7 +829,7 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
                                            "tool_calls": [
                                                {
                                                    "index": idx,
-                                                    **_openai_tool_call(fallback_event, forced_id=tool_id),
+                                                    **fallback_tool_call,
                                                }
                                            ]
                                        },
@@ -1004,8 +837,14 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
                                    }
                                ],
                            }
+                            buffered_text_parts.clear()
                            yield f"data: {json.dumps(payload, ensure_ascii=False)}\n\n"

+                    if buffered_text_parts:
+                        for buffered_text in buffered_text_parts:
+                            yield _text_payload(buffered_text)
+                        buffered_text_parts.clear()
+
                    done_payload = {
                        "id": completion_id,
                        "object": "chat.completion.chunk",
@@ -1021,7 +860,6 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
                    }
                    yield f"data: {json.dumps(done_payload, ensure_ascii=False)}\n\n"

-
                    if include_usage:
                        usage_payload = {
                            "id": completion_id,
@@ -1056,9 +894,6 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
                        exc,
                    )
                finally:
-                    # Persist upstream sessionId only on a clean chat/finish.
-                    # Partial streams (cancelled, timed out) leave Lingma's
-                    # session in an indeterminate state, so we must not reuse.
                    if success and write_key:
                        sid = _meta.get("session_id")
                        if sid:
@@ -1075,7 +910,6 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
            ticket_transferred = True
            return _streaming_response(event_stream())

-
        try:
            result = await inst.client.chat_complete(
                prompt,
@@ -1117,14 +951,27 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
        message_content = result.get("text") or ""
        tool_calls: list[dict[str, Any]] = []
        saw_tool_call = False
+        forced_tool_name = _openai_forced_tool_name(req.tool_choice)
        if isinstance(tool_events, list):
            for idx, item in enumerate(tool_events):
                if isinstance(item, dict):
+                    tool_name = str(item.get("name") or "")
+                    allowed = True
+                    if tool_config and isinstance(tool_config.get("tools"), list) and tool_config.get("tools"):
+                        allowed = False
+                        for t in tool_config.get("tools"):
+                            if tool_name == _anthropic_tool_name(t) or tool_name == _openai_tool_name(t):
+                                allowed = True
+                                break
+                        if not allowed and forced_tool_name and tool_name == forced_tool_name:
+                            allowed = True
+                    if not allowed:
+                        continue
+
                    tool_id = str(item.get("id") or f"call_{idx}")
                    tool_calls.append(_openai_tool_call(item, forced_id=tool_id))
                    saw_tool_call = True
        if not saw_tool_call:
-            forced_tool_name = _openai_forced_tool_name(req.tool_choice)
            if forced_tool_name:
                fallback_event = _forced_tool_event_from_text(
                    message_content,
@@ -1173,178 +1020,6 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):



-def _responses_input_to_messages(req: ResponsesRequest) -> list[dict[str, Any]]:
-    messages: list[dict[str, Any]] = []
-    if req.instructions:
-        messages.append({"role": "system", "content": req.instructions})
-
-    raw_input = req.input
-    if raw_input is None:
-        return messages
-
-    valid_roles = {"system", "user", "assistant", "tool", "developer", "function"}
-
-    def _append(role: str, content: Any, *, tool_call_id: str | None = None) -> None:
-        msg: dict[str, Any] = {"role": role, "content": flatten_content(content)}
-        if role == "tool" and tool_call_id:
-            msg["tool_call_id"] = tool_call_id
-        messages.append(msg)
-
-    if isinstance(raw_input, str):
-        _append("user", raw_input)
-        return messages
-
-    raw_items: list[Any]
-    if isinstance(raw_input, dict):
-        raw_items = [raw_input]
-    elif isinstance(raw_input, list):
-        raw_items = list(raw_input)
-    else:
-        _append("user", str(raw_input))
-        return messages
-
-    for item in raw_items:
-        if isinstance(item, str):
-            _append("user", item)
-            continue
-        if not isinstance(item, dict):
-            _append("user", str(item))
-            continue
-
-        role = item.get("role")
-        if isinstance(role, str) and role in valid_roles:
-            tool_call_id = item.get("tool_call_id") or item.get("call_id")
-            _append(role, item.get("content"), tool_call_id=str(tool_call_id) if tool_call_id else None)
-            continue
-
-        if item.get("type") == "function_call_output":
-            output = item.get("output")
-            if isinstance(output, (dict, list)):
-                output = json.dumps(output, ensure_ascii=False)
-            tool_call_id = item.get("call_id")
-            _append("tool", output, tool_call_id=str(tool_call_id) if tool_call_id else None)
-            continue
-
-        if "content" in item:
-            text = flatten_content(item.get("content"))
-        else:
-            text = flatten_content([item])
-        if text:
-            _append("user", text)
-
-    return messages
-
-
-
-def _responses_to_chat_request(req: ResponsesRequest) -> ChatCompletionsRequest:
-    return ChatCompletionsRequest(
-        model=req.model,
-        messages=_responses_input_to_messages(req),
-        stream=req.stream,
-        temperature=req.temperature,
-        top_p=req.top_p,
-        max_tokens=req.max_output_tokens,
-        user=req.user,
-        tools=req.tools,
-        tool_choice=req.tool_choice,
-    )
-
-
-
-def _responses_id_from_chat_id(chat_id: Any) -> str:
-    if isinstance(chat_id, str) and chat_id:
-        suffix = chat_id.removeprefix("chatcmpl-")
-        return f"resp_{suffix}"
-    return f"resp_{uuid.uuid4().hex}"
-
-
-
-def _responses_usage_from_chat(usage: Any) -> dict[str, int]:
-    if not isinstance(usage, dict):
-        return {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}
-    input_tokens = int(usage.get("prompt_tokens") or 0)
-    output_tokens = int(usage.get("completion_tokens") or 0)
-    return {
-        "input_tokens": input_tokens,
-        "output_tokens": output_tokens,
-        "total_tokens": int(usage.get("total_tokens") or (input_tokens + output_tokens)),
-    }
-
-
-
-def _responses_non_stream_from_chat_payload(chat_payload: Any) -> dict[str, Any]:
-    if not isinstance(chat_payload, dict):
-        raise HTTPException(
-            status_code=502,
-            detail={"error": {"message": "invalid upstream response", "type": "upstream_error"}},
-        )
-    choice = {}
-    choices = chat_payload.get("choices")
-    if isinstance(choices, list) and choices:
-        choice = choices[0] if isinstance(choices[0], dict) else {}
-    message = choice.get("message") if isinstance(choice.get("message"), dict) else {}
-
-    output: list[dict[str, Any]] = []
-    content = message.get("content")
-    if isinstance(content, str) and content:
-        output.append(
-            {
-                "type": "message",
-                "id": f"msg_{uuid.uuid4().hex}",
-                "status": "completed",
-                "role": "assistant",
-                "content": [{"type": "output_text", "text": content}],
-            }
-        )
-
-    tool_calls = message.get("tool_calls")
-    if isinstance(tool_calls, list):
-        for idx, tool_call in enumerate(tool_calls):
-            if not isinstance(tool_call, dict):
-                continue
-            fn = tool_call.get("function") if isinstance(tool_call.get("function"), dict) else {}
-            call_id = str(tool_call.get("id") or f"call_{idx}")
-            output.append(
-                {
-                    "type": "function_call",
-                    "id": call_id,
-                    "call_id": call_id,
-                    "name": str(fn.get("name") or "tool"),
-                    "arguments": str(fn.get("arguments") or "{}"),
-                }
-            )
-
-    output_text_parts: list[str] = []
-    for item in output:
-        if item.get("type") == "message":
-            blocks = item.get("content")
-            if isinstance(blocks, list):
-                for block in blocks:
-                    if isinstance(block, dict) and block.get("type") == "output_text":
-                        text = block.get("text")
-                        if isinstance(text, str) and text:
-                            output_text_parts.append(text)
-
-    return {
-        "id": _responses_id_from_chat_id(chat_payload.get("id")),
-        "object": "response",
-        "created_at": int(chat_payload.get("created") or time.time()),
-        "status": "completed",
-        "error": None,
-        "incomplete_details": None,
-        "model": chat_payload.get("model"),
-        "output": output,
-        "output_text": "".join(output_text_parts),
-        "usage": _responses_usage_from_chat(chat_payload.get("usage")),
-    }
-
-
-
-def _sse_data(payload: dict[str, Any]) -> str:
-    return f"data: {json.dumps(payload, ensure_ascii=False)}\n\n"
-
-
-
 async def _responses_stream_from_chat_stream(
    chat_stream: StreamingResponse,
    *,
@@ -1911,6 +1586,21 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
                            tool = _stream_tool_event(chunk)
                            if not tool:
                                continue
+                            
+                            tool_name = str(tool.get("name") or "")
+                            allowed = True
+                            if tool_config and isinstance(tool_config.get("tools"), list) and tool_config.get("tools"):
+                                allowed = False
+                                for t in tool_config.get("tools"):
+                                    if tool_name == _anthropic_tool_name(t) or tool_name == _openai_tool_name(t):
+                                        allowed = True
+                                        break
+                                forced_tool_name = _anthropic_forced_tool_name(req.tool_choice)
+                                if not allowed and forced_tool_name and tool_name == forced_tool_name:
+                                    allowed = True
+                            if not allowed:
+                                continue
+
                            tool_id = str(tool.get("id") or f"toolu_stream_{block_index}")

                            tool_use_block = _anthropic_tool_use_block(tool, forced_id=tool_id)
@@ -2086,6 +1776,21 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
            for idx, item in enumerate(tool_events):
                if not isinstance(item, dict):
                    continue
+                
+                tool_name = str(item.get("name") or "")
+                allowed = True
+                if tool_config and isinstance(tool_config.get("tools"), list) and tool_config.get("tools"):
+                    allowed = False
+                    for t in tool_config.get("tools"):
+                        if tool_name == _anthropic_tool_name(t) or tool_name == _openai_tool_name(t):
+                            allowed = True
+                            break
+                    forced_tool_name = _anthropic_forced_tool_name(req.tool_choice)
+                    if not allowed and forced_tool_name and tool_name == forced_tool_name:
+                        allowed = True
+                if not allowed:
+                    continue
+
                saw_tool_event = True
                tool_id = str(item.get("id") or f"toolu_nonstream_{idx}")
                content_blocks.append(_anthropic_tool_use_block(item, forced_id=tool_id))