feat: add emulated tool-calling bridge for Lingma

Add a proxy-side tool emulation layer so Lingma requests can surface stable OpenAI tool_calls and Anthropic tool_use blocks even when upstream tool events are missing or inconsistent. Constraint: Keep native Lingma tool event bridging as the first path and layer emulation as a fallback Rejected: Depend exclusively on Lingma native tool/invoke events | tool visibility remains inconsistent across models and transports Confidence: high Scope-risk: moderate
2026-05-07 18:10:01 +08:00
parent 5911e4322e
commit 94a8025ae5
11 changed files with 1808 additions and 4 deletions
--- a/README.md
+++ b/README.md
@@ -6,6 +6,7 @@
 - Anthropic：`/v1/messages`、`/v1/messages/count_tokens`（含 stream）
 - 内置：多实例池、会话复用、Prometheus 指标、登录态 bundle 注入
 - 工具事件桥接：Lingma 上游返回 `tool` 事件时，网关会输出为 OpenAI `tool_calls`（stream/non-stream）和 Anthropic `tool_use` / `tool_result`（stream/non-stream）；请求侧 `tools` / `tool_choice` 仅在 `TOOL_FORWARD_ENABLED=true` 时透传（默认开启，可显式关闭）
 - 工具模拟回退：当 Lingma 未稳定外显原生 `tool/*` 事件时，网关会把注入后的 `json action` / `#Tool Call` 等动作文本归一化为 OpenAI `tool_calls`，并支持 tool result continuation
 - 多模态降级：OpenAI `image_url` / `input_image` 转 `[image]`，`input_audio` 转 `[audio]`；Anthropic `image` 转 `[image]`
 > 架构设计与二开细节请看 [`DESIGN.md`](./DESIGN.md)。
@@ -85,6 +86,9 @@ python3 -m unittest tests/test_tool_call_bridge.py
 # 全量 unittest
 python3 -m unittest discover -s tests -p "test_*.py"
 # Docker 端到端工具调用冒烟
 bash scripts/smoke_tool_calls.sh
 ```
 ---
@@ -201,6 +205,7 @@ curl -s "http://127.0.0.1:${PORT}/healthz"
 | `healthz` 正常但请求失败 | 用错端口 | 以 `.env` 的 `PORT` 为准，`docker compose ps` 再确认 |
 | `git pull` 提示 not on a branch | 处于 detached HEAD | 执行 `git checkout -B main origin/main` |
 | 自动登录不稳定 | 浏览器流程波动 | 优先使用 `LINGMA_SESSION_BUNDLE(_FILE)` |
 | 日志出现 `extension main js path not found` / `ExtensionApi executor not inited` | Lingma 扩展运行时未完整提取，MCP/工具执行器未初始化 | 重启容器触发 bootstrap 自愈；确认 `data/bin/<version>/extension/main.js` 已存在 |
 | 工具调用未触发 | 模型未选择工具或当前协议路径不支持合成回退 | OpenAI 可配合 `tool_choice` 强制并约束输出 JSON；Anthropic 当前仅 non-stream 支持合成 `tool_use` / `tool_result` 回退 |
 ---
--- a/app/bootstrap_lingma.py
+++ b/app/bootstrap_lingma.py
@@ -3,6 +3,7 @@ from __future__ import annotations
 import io
 import json
 import os
 import shutil
 import time
 import urllib.request
 import zipfile
@@ -70,6 +71,15 @@ def _extract_release_tree(
            dst.write(src.read())
 def _release_dir_for_binary(lingma_bin: Path, release_root: str | None) -> Path:
    return lingma_bin.parent / ((release_root or "").strip() or "2.5.20")
 def _release_has_required_assets(release_dir: Path) -> bool:
    extension_main = release_dir / "extension" / "main.js"
    return extension_main.exists() and extension_main.is_file()
 def _query_marketplace_latest_vsix(
    publisher: str, extension: str
 ) -> tuple[str, str, dict]:
@@ -168,8 +178,19 @@ def bootstrap_from_vsix() -> None:
            )
            resolved_url = vsix_url
    current_release_dir = _release_dir_for_binary(
        lingma_bin, old_marker.get("release_root") if isinstance(old_marker, dict) else None
    )
    release_ready = _release_has_required_assets(current_release_dir)
    if lingma_bin.exists() and not release_ready:
        print(
            "[bootstrap] existing Lingma binary found but extension assets are incomplete; "
            f"refreshing install under {current_release_dir}"
        )
    if (
        lingma_bin.exists()
        and release_ready
        and not force_refresh
        and (
            (not always_refresh)
@@ -215,7 +236,8 @@ def bootstrap_from_vsix() -> None:
            lingma_bytes = inner_zip.read(lingma_member)
            release_root = _infer_release_root(lingma_member)
            lingma_bin.parent.mkdir(parents=True, exist_ok=True)
-            release_dir = lingma_bin.parent / (release_root or "2.5.20")
+            release_dir = _release_dir_for_binary(lingma_bin, release_root)
            shutil.rmtree(release_dir, ignore_errors=True)
            _extract_release_tree(inner_zip, release_root, release_dir)
        lingma_bin.write_bytes(lingma_bytes)
@@ -224,7 +246,9 @@ def bootstrap_from_vsix() -> None:
        if extension_main.exists():
            print(f"[bootstrap] extension ready: {extension_main}")
        else:
-            print(f"[bootstrap] extension missing under: {release_dir}")
+            raise RuntimeError(
                f"extension assets missing after extraction under: {release_dir}"
            )
        marker = {
            "source": source_type,
--- a/app/http/tool_bridge.py
+++ b/app/http/tool_bridge.py
@@ -196,6 +196,33 @@ def _extract_tool_calls_from_text(text: str) -> list[dict[str, Any]] | None:
    return None
 def _extract_hash_tool_call_event_from_text(
    text: str,
    *,
    forced_tool_name: str | None = None,
 ) -> dict[str, Any] | None:
    raw = (text or "").strip()
    if not raw:
        return None
    match = re.search(
        r"#Tool Call\s*```([A-Za-z0-9_\-.]+)\s*(\{.*?\})\s*```",
        raw,
        flags=re.S,
    )
    if not match:
        return None
    name = match.group(1).strip()
    if forced_tool_name and name != forced_tool_name:
        return None
    try:
        arguments = json.loads(match.group(2))
    except Exception:
        return None
    if not isinstance(arguments, dict):
        return None
    return {"name": name, "input": arguments}
 def _tool_code_single_arg_name(
    tools: list[dict[str, Any]] | None, forced_tool_name: str
 ) -> str | None:
@@ -342,6 +369,45 @@ def _forced_tool_fallback_event(
    )
 def _declared_tool_names(tools: list[dict[str, Any]] | None) -> list[str]:
    if not isinstance(tools, list):
        return []
    out: list[str] = []
    for tool in tools:
        name = _openai_tool_name(tool) or _anthropic_tool_name(tool)
        if name and name not in out:
            out.append(name)
    return out
 def _infer_tool_event_from_declared_tools(
    text: str,
    *,
    tools: list[dict[str, Any]] | None,
 ) -> dict[str, Any] | None:
    for tool_name in _declared_tool_names(tools):
        inferred = _extract_function_call_event_from_text(
            text,
            forced_tool_name=tool_name,
        )
        if inferred is not None:
            return inferred
        inferred = _extract_hash_tool_call_event_from_text(
            text,
            forced_tool_name=tool_name,
        )
        if inferred is not None:
            return inferred
        inferred = _forced_tool_fallback_event(
            text,
            forced_tool_name=tool_name,
            tools=tools,
        )
        if inferred is not None:
            return inferred
    return None
 def _openai_tool_call(
    tool: dict[str, Any], *, forced_id: str | None = None
 ) -> dict[str, Any]:
--- a/app/http/tool_emulation.py
+++ b/app/http/tool_emulation.py
@@ -0,0 +1,657 @@
 from __future__ import annotations
 import json
 import re
 import uuid
 from dataclasses import dataclass
 from typing import Any
@dataclass
 class EmulatedToolDef:
    name: str
    description: str
    input_schema: dict[str, Any]
@dataclass
 class EmulatedToolChoice:
    mode: str
    name: str = ""
@dataclass
 class EmulatedToolCall:
    id: str
    name: str
    arguments: dict[str, Any]
 def extract_openai_tools(raw: Any) -> list[EmulatedToolDef]:
    if not isinstance(raw, list):
        return []
    out: list[EmulatedToolDef] = []
    for item in raw:
        if not isinstance(item, dict):
            continue
        fn = item.get("function")
        if not isinstance(fn, dict):
            continue
        name = str(fn.get("name") or "").strip()
        if not name:
            continue
        schema = fn.get("parameters") if isinstance(fn.get("parameters"), dict) else {}
        out.append(
            EmulatedToolDef(
                name=name,
                description=str(fn.get("description") or "").strip(),
                input_schema=dict(schema),
            )
        )
    return out
 def extract_anthropic_tools(raw: Any) -> list[EmulatedToolDef]:
    if not isinstance(raw, list):
        return []
    out: list[EmulatedToolDef] = []
    for item in raw:
        if not isinstance(item, dict):
            continue
        tool_type = str(item.get("type") or "").strip()
        if tool_type.startswith("web_search_"):
            continue
        name = str(item.get("name") or "").strip()
        if not name:
            continue
        schema = item.get("input_schema") if isinstance(item.get("input_schema"), dict) else {}
        out.append(
            EmulatedToolDef(
                name=name,
                description=str(item.get("description") or "").strip(),
                input_schema=dict(schema),
            )
        )
    return out
 def extract_openai_tool_choice(raw: Any) -> EmulatedToolChoice:
    if raw is None:
        return EmulatedToolChoice(mode="auto")
    if isinstance(raw, str):
        value = raw.strip()
        if value in {"", "auto"}:
            return EmulatedToolChoice(mode="auto")
        if value == "none":
            return EmulatedToolChoice(mode="none")
        if value in {"required", "any"}:
            return EmulatedToolChoice(mode="any")
        return EmulatedToolChoice(mode="tool", name=value)
    if not isinstance(raw, dict):
        return EmulatedToolChoice(mode="auto")
    type_name = str(raw.get("type") or "").strip()
    if type_name in {"required", "any"}:
        return EmulatedToolChoice(mode="any")
    if type_name in {"none"}:
        return EmulatedToolChoice(mode="none")
    if type_name in {"function", "tool"}:
        fn = raw.get("function")
        if isinstance(fn, dict):
            name = str(fn.get("name") or "").strip()
            if name:
                return EmulatedToolChoice(mode="tool", name=name)
        name = str(raw.get("name") or "").strip()
        if name:
            return EmulatedToolChoice(mode="tool", name=name)
    return EmulatedToolChoice(mode="auto")
 def extract_anthropic_tool_choice(raw: Any) -> EmulatedToolChoice:
    if raw is None:
        return EmulatedToolChoice(mode="auto")
    if not isinstance(raw, dict):
        return extract_openai_tool_choice(raw)
    type_name = str(raw.get("type") or "").strip()
    if type_name in {"", "auto"}:
        return EmulatedToolChoice(mode="auto")
    if type_name == "none":
        return EmulatedToolChoice(mode="none")
    if type_name in {"any", "required"}:
        return EmulatedToolChoice(mode="any")
    if type_name == "tool":
        name = str(raw.get("name") or "").strip()
        if name:
            return EmulatedToolChoice(mode="tool", name=name)
    return EmulatedToolChoice(mode="auto")
 def has_tool_request(tools: list[EmulatedToolDef], choice: EmulatedToolChoice) -> bool:
    return bool(tools) or choice.mode not in {"", "auto"}
 def inject_tooling(system: str, tools: list[EmulatedToolDef], choice: EmulatedToolChoice) -> str:
    system = system.strip()
    if not tools:
        return system
    tool_lines: list[str] = []
    for tool in tools:
        signature = _compact_schema(tool.input_schema)
        line = f"{tool.name}({signature})"
        if tool.description:
            line += f" - {_truncate(tool.description, 120)}"
        tool_lines.append(line)
    parts = [
        "You are an AI assistant with DIRECT tool access.",
        "When a request needs local files, terminal state, browser state, current web data, or another external result, use the proxy tools listed below.",
        "Do not claim tools are unavailable.",
        "When you need to use a tool, output exactly one or more structured action blocks in this format:",
        '```json action\n{"tool":"NAME","parameters":{"key":"value"}}\n```',
        "Available tools:",
        "\n".join(tool_lines),
        "Rules:",
        "- Use ```json action``` blocks for tool calls.",
        "- If a tool is needed, do not explain first; emit the action block directly.",
        "- If no tool is needed, answer normally.",
        "- Never say tools are unavailable.",
        _force_constraint(choice),
    ]
    tooling = "\n\n".join(part for part in parts if part)
    if not system:
        return tooling
    return f"{system}\n\n---\n\n{tooling}"
 def action_output_prompt(tool_call_id: str | None, output: str) -> str:
    output = (output or "").strip()
    if not output:
        return ""
    suffix = (
        "Based on the tool result above, answer the user's request directly if you have enough information. "
        "Only use another tool call if a specific missing fact still requires it."
    )
    if tool_call_id and tool_call_id.strip():
        return f"Tool result for {tool_call_id.strip()}:\n{output}\n\n{suffix}"
    return f"Tool result:\n{output}\n\n{suffix}"
 def parse_action_blocks(
    text: str,
    tools: list[EmulatedToolDef],
    *,
    max_scan_bytes: int = 0,
    max_tool_calls: int = 8,
 ) -> tuple[list[EmulatedToolCall], str]:
    if not text or not text.strip():
        return [], ""
    if max_scan_bytes > 0 and len(text) > max_scan_bytes:
        text = text[:max_scan_bytes]
    tool_name_map = {tool.name.lower(): tool.name for tool in tools if tool.name.strip()}
    tool_schema_map = {tool.name: tool.input_schema for tool in tools if tool.name.strip()}
    calls: list[EmulatedToolCall] = []
    spans: list[tuple[int, int]] = []
    seen: set[str] = set()
    for match in re.finditer(r"```json(?:\s+action)?\s*(.*?)```", text, flags=re.S | re.I):
        raw = (match.group(1) or "").strip()
        if not raw:
            continue
        parsed = _parse_tool_call_json(raw)
        if parsed is None:
            continue
        name, arguments = parsed
        normalized = _normalize_tool_name(name, tool_name_map)
        schema = tool_schema_map.get(normalized)
        if schema:
            arguments = _filter_args_by_schema(arguments, schema)
            if not _has_required_args(arguments, schema):
                continue
        key = _tool_call_key(normalized, arguments)
        if key in seen:
            spans.append(match.span())
            continue
        seen.add(key)
        calls.append(
            EmulatedToolCall(
                id=_stable_call_id(normalized, arguments),
                name=normalized,
                arguments=arguments,
            )
        )
        spans.append(match.span())
        if len(calls) >= max_tool_calls:
            break
    if not calls:
        return [], text.strip()
    clean = text
    for start, end in reversed(spans):
        clean = clean[:start] + clean[end:]
    return calls, clean.strip()
 def looks_like_refusal(text: str) -> bool:
    lowered = (text or "").strip().lower()
    if not lowered:
        return False
    needles = [
        "tools are unavailable",
        "cannot call tools",
        "can't call tools",
        "cannot execute",
        "can't execute",
        "没有可用的工具",
        "工具不可用",
        "不能调用工具",
        "无法直接执行",
    ]
    return any(needle in lowered for needle in needles)
 def looks_like_missed_tool_use(text: str) -> bool:
    lowered = (text or "").strip().lower()
    if not lowered:
        return False
    needles = [
        "let me use",
        "i need to use",
        "i will use",
        "i need to run",
        "i will run",
        "我需要使用",
        "让我使用",
        "执行命令",
        "读取文件",
        "查看文件",
        "查询天气",
        "#tool call",
    ]
    return any(needle in lowered for needle in needles)
 def infer_tool_calls_from_text(
    text: str,
    tools: list[EmulatedToolDef],
 ) -> list[EmulatedToolCall]:
    if not (looks_like_refusal(text) or looks_like_missed_tool_use(text)):
        return []
    direct = infer_declared_tool_call_from_text(text, tools)
    return [direct] if direct is not None else []
 def force_tooling_prompt(choice: EmulatedToolChoice) -> str:
    prompt = (
        "Your last response did not include any ```json action``` block. "
        "You must respond with at least one valid action block now. "
        "Select the single most appropriate available tool for the user request. "
        "Do not explain. Do not say tools are unavailable. Output the action block directly."
    )
    if choice.mode == "tool" and choice.name.strip():
        prompt += f' You must call "{choice.name.strip()}".'
    return prompt
 def infer_declared_tool_call_from_text(
    text: str,
    tools: list[EmulatedToolDef],
 ) -> EmulatedToolCall | None:
    for tool in tools:
        event = _extract_fenced_json_tool_call_event_from_text(
            text, forced_tool_name=tool.name
        )
        if event is None:
            event = _extract_hash_tool_call_event_from_text(text, forced_tool_name=tool.name)
        if event is None:
            event = _extract_function_call_event_from_text(text, forced_tool_name=tool.name)
        if event is None:
            event = _forced_tool_fallback_event(text, forced_tool_name=tool.name, tools=tools)
        if event is None:
            continue
        schema = tool.input_schema
        arguments = dict(event.get("input") or {})
        if schema:
            arguments = _filter_args_by_schema(arguments, schema)
            if not _has_required_args(arguments, schema):
                continue
        return EmulatedToolCall(
            id=_stable_call_id(tool.name, arguments),
            name=tool.name,
            arguments=arguments,
        )
    return None
 def openai_tool_call_from_emulated(call: EmulatedToolCall) -> dict[str, Any]:
    return {
        "id": call.id,
        "type": "function",
        "function": {
            "name": call.name,
            "arguments": json.dumps(call.arguments, ensure_ascii=False),
        },
    }
 def _extract_hash_tool_call_event_from_text(
    text: str,
    *,
    forced_tool_name: str | None = None,
 ) -> dict[str, Any] | None:
    raw = (text or "").strip()
    match = re.search(
        r"#Tool Call\s*```([A-Za-z0-9_\-.]+)\s*(\{.*?\})\s*```",
        raw,
        flags=re.S,
    )
    if not match:
        return None
    name = match.group(1).strip()
    if forced_tool_name and name != forced_tool_name:
        return None
    try:
        arguments = json.loads(match.group(2))
    except Exception:
        return None
    if not isinstance(arguments, dict):
        return None
    return {"name": name, "input": arguments}
 def _extract_fenced_json_tool_call_event_from_text(
    text: str,
    *,
    forced_tool_name: str | None = None,
 ) -> dict[str, Any] | None:
    raw = (text or "").strip()
    match = re.search(r"```json(?:\s+action)?\s*(\{.*?\})\s*```", raw, flags=re.S | re.I)
    if not match:
        return None
    try:
        payload = json.loads(match.group(1))
    except Exception:
        return None
    if not isinstance(payload, dict):
        return None
    name = str(payload.get("tool") or payload.get("name") or "").strip()
    fn = payload.get("function")
    if not name and isinstance(fn, dict):
        name = str(fn.get("name") or "").strip()
    if not name:
        return None
    if forced_tool_name and name != forced_tool_name:
        return None
    arguments = payload.get("parameters")
    if arguments is None:
        arguments = payload.get("arguments")
    if arguments is None:
        arguments = payload.get("input")
    if arguments is None and isinstance(fn, dict):
        arguments = fn.get("arguments")
    if isinstance(arguments, str):
        try:
            arguments = json.loads(arguments)
        except Exception:
            return None
    if arguments is None:
        arguments = {}
    if not isinstance(arguments, dict):
        return None
    return {"name": name, "input": arguments}
 def _extract_function_call_event_from_text(
    text: str,
    *,
    forced_tool_name: str | None = None,
 ) -> dict[str, Any] | None:
    raw = (text or "").strip()
    match = re.search(r"<function_calls>\s*(\{.*?\})\s*</function_calls>", raw, flags=re.S)
    if not match:
        return None
    try:
        payload = json.loads(match.group(1))
    except Exception:
        return None
    if not isinstance(payload, dict):
        return None
    name = str(payload.get("name") or "").strip()
    if not name:
        return None
    if forced_tool_name and name != forced_tool_name:
        return None
    arguments = payload.get("arguments")
    if isinstance(arguments, str):
        try:
            arguments = json.loads(arguments)
        except Exception:
            return None
    if arguments is None:
        arguments = {}
    if not isinstance(arguments, dict):
        return None
    return {"name": name, "input": arguments}
 def _forced_tool_fallback_event(
    text: str,
    *,
    forced_tool_name: str | None,
    tools: list[EmulatedToolDef],
 ) -> dict[str, Any] | None:
    if not forced_tool_name:
        return None
    parsed = _tool_code_object_from_text(
        text,
        forced_tool_name,
        single_arg_name=_tool_code_single_arg_name(tools, forced_tool_name),
    )
    if parsed is None:
        try:
            parsed = json.loads((text or "").strip())
        except Exception:
            return None
    if not isinstance(parsed, dict):
        return None
    explicit_name = parsed.get("name") or parsed.get("tool")
    if explicit_name is not None and str(explicit_name) != forced_tool_name:
        return None
    tool_input = parsed.get("input")
    if tool_input is None and "arguments" in parsed:
        tool_input = parsed.get("arguments")
    if isinstance(tool_input, str):
        try:
            tool_input = json.loads(tool_input)
        except Exception:
            return None
    if tool_input is None:
        reserved = {"name", "tool", "function", "arguments", "input", "result"}
        tool_input = {k: v for k, v in parsed.items() if k not in reserved}
    if not isinstance(tool_input, dict):
        return None
    return {"name": forced_tool_name, "input": tool_input}
 def _tool_code_single_arg_name(
    tools: list[EmulatedToolDef], forced_tool_name: str
 ) -> str | None:
    for tool in tools:
        if tool.name != forced_tool_name:
            continue
        properties = tool.input_schema.get("properties")
        if not isinstance(properties, dict) or len(properties) != 1:
            return None
        only_name = next(iter(properties.keys()), None)
        return only_name if isinstance(only_name, str) and only_name.strip() else None
    return None
 def _tool_code_object_from_text(
    text: str,
    forced_tool_name: str,
    *,
    single_arg_name: str | None = None,
 ) -> dict[str, Any] | None:
    raw = (text or "").strip()
    if not raw.startswith("```") or not raw.endswith("```"):
        return None
    lines = raw.splitlines()
    if len(lines) < 2:
        return None
    fence = lines[0].strip().lower()
    language = fence[3:].strip()
    if language and language not in {"tool_code", "python", "py"}:
        return None
    body = "\n".join(lines[1:-1]).strip()
    call_match = re.fullmatch(rf"{re.escape(forced_tool_name)}\((.*)\)", body, flags=re.S)
    if not call_match:
        return None
    arguments_text = call_match.group(1).strip()
    if not arguments_text:
        return {"arguments": {}}
    if single_arg_name and not re.search(r"\w+\s*=", arguments_text):
        try:
            value = json.loads(arguments_text)
        except Exception:
            value = arguments_text.strip('"\'')
        return {"arguments": {single_arg_name: value}}
    arguments: dict[str, Any] = {}
    for part in [p.strip() for p in arguments_text.split(",") if p.strip()]:
        if "=" not in part:
            return None
        key, value_text = part.split("=", 1)
        key = key.strip()
        value_text = value_text.strip()
        try:
            value = json.loads(value_text)
        except Exception:
            value = value_text.strip('"\'')
        arguments[key] = value
    return {"arguments": arguments}
 def _parse_tool_call_json(raw: str) -> tuple[str, dict[str, Any]] | None:
    try:
        obj = json.loads(_normalize_json(raw))
    except Exception:
        return None
    if not isinstance(obj, dict):
        return None
    name = str(obj.get("tool") or obj.get("name") or "").strip()
    fn = obj.get("function")
    if not name and isinstance(fn, dict):
        name = str(fn.get("name") or "").strip()
    if not name:
        return None
    arguments = obj.get("parameters")
    if arguments is None:
        arguments = obj.get("arguments")
    if arguments is None:
        arguments = obj.get("input")
    if arguments is None and isinstance(fn, dict):
        arguments = fn.get("arguments")
    if isinstance(arguments, str):
        try:
            arguments = json.loads(arguments)
        except Exception:
            arguments = {}
    if arguments is None:
        arguments = {k: v for k, v in obj.items() if k not in {"tool", "name"}}
    if not isinstance(arguments, dict):
        return None
    return name, arguments
 def _normalize_tool_name(raw: str, available: dict[str, str]) -> str:
    name = raw.strip()
    if not name:
        return ""
    exact = available.get(name.lower())
    if exact:
        return exact
    key = name.lower().replace("-", "_").replace(" ", "_")
    aliases = {
        "bash": "terminal",
        "shell": "terminal",
        "read": "read_file",
        "grep": "search_files",
        "glob": "search_files",
        "edit": "patch",
        "write": "write_file",
    }
    mapped = aliases.get(key)
    if mapped and mapped in available:
        return available[mapped]
    return name
 def _filter_args_by_schema(args: dict[str, Any], schema: dict[str, Any]) -> dict[str, Any]:
    properties = schema.get("properties")
    if not isinstance(properties, dict) or not properties:
        return args
    return {k: v for k, v in args.items() if k in properties}
 def _has_required_args(args: dict[str, Any], schema: dict[str, Any]) -> bool:
    required = schema.get("required")
    if not isinstance(required, list):
        return True
    for key in required:
        if not isinstance(key, str):
            continue
        if key not in args:
            return False
        value = args.get(key)
        if isinstance(value, str) and not value.strip():
            return False
    return True
 def _compact_schema(schema: dict[str, Any]) -> str:
    properties = schema.get("properties")
    if not isinstance(properties, dict) or not properties:
        return ""
    required = {item for item in schema.get("required", []) if isinstance(item, str)}
    parts: list[str] = []
    for key in sorted(properties.keys()):
        parts.append(key if key in required else f"{key}?")
    return ", ".join(parts)
 def _truncate(text: str, max_len: int) -> str:
    text = text.strip()
    if len(text) <= max_len:
        return text
    return text[:max_len] + "..."
 def _force_constraint(choice: EmulatedToolChoice) -> str:
    if choice.mode == "any":
        return "- You must output at least one ```json action``` block in this reply."
    if choice.mode == "tool" and choice.name.strip():
        return f'- You must call "{choice.name.strip()}" in this reply.'
    return ""
 def _normalize_json(text: str) -> str:
    return (
        text.strip()
        .replace("“", '"')
        .replace("”", '"')
        .replace(",\n}", "\n}")
        .replace(",\n]", "\n]")
    )
 def _tool_call_key(name: str, arguments: dict[str, Any]) -> str:
    return f"{name.lower()}\0{json.dumps(arguments, ensure_ascii=False, sort_keys=True)}"
 def _stable_call_id(name: str, arguments: dict[str, Any]) -> str:
    key = _tool_call_key(name, arguments)
    return "call_" + uuid.uuid5(uuid.NAMESPACE_OID, key).hex[:16]
--- a/app/lingma_client.py
+++ b/app/lingma_client.py
@@ -419,6 +419,17 @@ class LspWsRpcClient:
        method = msg.get("method")
        params = msg.get("params") or {}
        if method and (
            method.startswith("tool/")
            or method.startswith("mcp/")
            or method in {"chat/answer", "chat/finish"}
        ):
            logger.info(
                "lingma server message method=%s params=%s",
                method,
                params,
            )
        if method == "chat/answer":
            req_id = params.get("requestId")
            stream = self._chat_streams.get(req_id)
--- a/app/main.py
+++ b/app/main.py
@@ -36,6 +36,20 @@ from .http.execution_core import (
    release_execution,
    start_execution,
 )
 from .http.tool_emulation import (
    action_output_prompt,
    extract_anthropic_tool_choice as _em_extract_anthropic_tool_choice,
    extract_anthropic_tools as _em_extract_anthropic_tools,
    extract_openai_tool_choice as _em_extract_openai_tool_choice,
    extract_openai_tools as _em_extract_openai_tools,
    force_tooling_prompt,
    has_tool_request as _em_has_tool_request,
    infer_declared_tool_call_from_text,
    infer_tool_calls_from_text,
    inject_tooling,
    openai_tool_call_from_emulated,
    parse_action_blocks,
 )
 from .http.openai_responses import handle_responses
 from .http.tool_bridge import (
    _allowed_stream_tool_event,
@@ -44,8 +58,10 @@ from .http.tool_bridge import (
    _anthropic_tool_result_block,
    _anthropic_tool_use_block,
    _extract_function_call_event_from_text,
    _extract_hash_tool_call_event_from_text,
    _extract_tool_calls_from_text,
    _forced_tool_fallback_event,
    _infer_tool_event_from_declared_tools,
    _json_string,
    _openai_forced_tool_name,
    _openai_tool_call,
@@ -417,6 +433,82 @@ def _messages_to_prompt(messages: list[dict]) -> str:
    return "\n".join(parts).strip()
 def _messages_to_emulation_prompt(
    messages: list[dict[str, Any]],
    *,
    system_text: str,
    tools: list[dict[str, Any]] | None,
    tool_choice: Any,
 ) -> str:
    filtered: list[tuple[str, str]] = []
    for message in messages:
        role = str(message.get("role") or "").strip().lower()
        if role in {"system", "developer"}:
            continue
        text = flatten_content(message.get("content"))
        if role == "tool":
            text = action_output_prompt(message.get("tool_call_id"), text)
            role = "user"
        if not text:
            continue
        if role not in {"user", "assistant"}:
            continue
        filtered.append((role, text))
    if not filtered:
        return system_text.strip()
    em_tools = _em_extract_openai_tools(tools)
    em_choice = _em_extract_openai_tool_choice(tool_choice)
    injected_system = inject_tooling(system_text, em_tools, em_choice)
    parts: list[str] = []
    for role, text in filtered:
        label = "User" if role == "user" else "Assistant"
        parts.append(f"{label}: {text}")
    if injected_system:
        parts.append(injected_system)
    parts.append("Assistant:")
    return "\n\n".join(parts).strip()
 def _anthropic_messages_to_emulation_prompt(
    messages: list[dict[str, Any]],
    *,
    system_text: str,
    tools: list[dict[str, Any]] | None,
    tool_choice: Any,
 ) -> str:
    filtered: list[tuple[str, str]] = []
    for message in messages:
        role = str(message.get("role") or "").strip().lower()
        text = str(message.get("content") or "").strip()
        if role == "tool":
            text = action_output_prompt(message.get("tool_call_id"), text)
            role = "user"
        if not text:
            continue
        if role not in {"user", "assistant"}:
            continue
        filtered.append((role, text))
    if not filtered:
        return system_text.strip()
    em_tools = _em_extract_anthropic_tools(tools)
    em_choice = _em_extract_anthropic_tool_choice(tool_choice)
    injected_system = inject_tooling(system_text, em_tools, em_choice)
    parts: list[str] = []
    for role, text in filtered:
        label = "User" if role == "user" else "Assistant"
        parts.append(f"{label}: {text}")
    if injected_system:
        parts.append(injected_system)
    parts.append("Assistant:")
    return "\n\n".join(parts).strip()
 def _include_usage(stream_options: dict | None) -> bool:
    if not isinstance(stream_options, dict):
        return False
@@ -525,6 +617,20 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
    is_reply = execution.is_reply
    include_usage = _include_usage(req.stream_options)
    em_tools = _em_extract_openai_tools(req.tools)
    em_choice = _em_extract_openai_tool_choice(req.tool_choice)
    if _em_has_tool_request(em_tools, em_choice):
        system_parts = [
            flatten_content(m.content)
            for m in req.messages
            if m.role in {"system", "developer"} and flatten_content(m.content)
        ]
        prompt = _messages_to_emulation_prompt(
            messages_dump,
            system_text="\n\n".join(system_parts),
            tools=req.tools,
            tool_choice=req.tool_choice,
        )
    try:
        started = await start_execution(
@@ -709,6 +815,11 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
                            merged_text,
                            forced_tool_name=forced_tool_name,
                        )
                        if inferred is None:
                            inferred = _extract_hash_tool_call_event_from_text(
                                merged_text,
                                forced_tool_name=forced_tool_name,
                            )
                        if inferred is None:
                            inferred = _forced_tool_fallback_event(
                                merged_text,
@@ -747,6 +858,72 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
                    if buffered_text_parts and forced_tool_name and saw_tool_call:
                        buffered_text_parts.clear()
                    if buffered_text_parts and req.tools and not saw_tool_call:
                        merged_text = "".join(buffered_text_parts)
                        inferred = _infer_tool_event_from_declared_tools(
                            merged_text,
                            tools=req.tools,
                        )
                        if inferred is not None:
                            tool_id = "call_inferred_0"
                            tool_call_indexes[tool_id] = 0
                            saw_tool_call = True
                            payload = {
                                "id": completion_id,
                                "object": "chat.completion.chunk",
                                "created": created,
                                "model": model,
                                "choices": [
                                    {
                                        "index": 0,
                                        "delta": {
                                            "tool_calls": [
                                                {
                                                    "index": 0,
                                                    **_openai_tool_call(
                                                        inferred, forced_id=tool_id
                                                    ),
                                                }
                                            ]
                                        },
                                        "finish_reason": None,
                                    }
                                ],
                            }
                            buffered_text_parts.clear()
                            yield f"data: {json.dumps(payload, ensure_ascii=False)}\n\n"
                    if buffered_text_parts and req.tools and not saw_tool_call:
                        merged_text = "".join(buffered_text_parts)
                        parsed_calls, remaining = parse_action_blocks(merged_text, em_tools)
                        if parsed_calls:
                            saw_tool_call = True
                            for i, call in enumerate(parsed_calls):
                                tool_id = call.id or f"call_inferred_{i}"
                                tool_call_indexes[tool_id] = i
                                payload = {
                                    "id": completion_id,
                                    "object": "chat.completion.chunk",
                                    "created": created,
                                    "model": model,
                                    "choices": [
                                        {
                                            "index": 0,
                                            "delta": {
                                                "tool_calls": [
                                                    {
                                                        "index": i,
                                                        **openai_tool_call_from_emulated(call),
                                                    }
                                                ]
                                            },
                                            "finish_reason": None,
                                        }
                                    ],
                                }
                                yield f"data: {json.dumps(payload, ensure_ascii=False)}\n\n"
                            buffered_text_parts = [remaining] if remaining else []
                    done_payload = {
                        "id": completion_id,
                        "object": "chat.completion.chunk",
@@ -866,6 +1043,11 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
                message_content,
                forced_tool_name=forced_tool_name,
            )
            if inferred is None:
                inferred = _extract_hash_tool_call_event_from_text(
                    message_content,
                    forced_tool_name=forced_tool_name,
                )
            if inferred is None:
                inferred = _forced_tool_fallback_event(
                    message_content,
@@ -878,6 +1060,59 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
                )
                saw_tool_call = True
                message_content = ""
        if not saw_tool_call and req.tools:
            inferred = _infer_tool_event_from_declared_tools(
                message_content,
                tools=req.tools,
            )
            if inferred is not None:
                tool_calls.append(
                    _openai_tool_call(inferred, forced_id="call_inferred_0")
                )
                saw_tool_call = True
                message_content = ""
        if not saw_tool_call and em_tools:
            parsed_calls, remaining = parse_action_blocks(message_content, em_tools)
            if parsed_calls:
                for call in parsed_calls:
                    tool_calls.append(openai_tool_call_from_emulated(call))
                saw_tool_call = True
                message_content = remaining
        if not saw_tool_call and em_tools:
            inferred_call = infer_declared_tool_call_from_text(message_content, em_tools)
            if inferred_call is None:
                inferred_calls = infer_tool_calls_from_text(message_content, em_tools)
                inferred_call = inferred_calls[0] if inferred_calls else None
            if inferred_call is not None:
                tool_calls.append(openai_tool_call_from_emulated(inferred_call))
                saw_tool_call = True
                message_content = ""
        if not saw_tool_call and em_tools:
            retry_prompt = f"{prompt}\n\n{force_tooling_prompt(em_choice)}"
            retry_result = await inst.client.chat_complete(
                retry_prompt,
                model,
                ask_mode,
                session_id=None,
                is_reply=False,
                tool_config=tool_config,
            )
            retry_text = retry_result.get("text") or ""
            parsed_calls, remaining = parse_action_blocks(retry_text, em_tools)
            if parsed_calls:
                for call in parsed_calls:
                    tool_calls.append(openai_tool_call_from_emulated(call))
                saw_tool_call = True
                message_content = remaining
            else:
                inferred_call = infer_declared_tool_call_from_text(retry_text, em_tools)
                if inferred_call is None:
                    inferred_calls = infer_tool_calls_from_text(retry_text, em_tools)
                    inferred_call = inferred_calls[0] if inferred_calls else None
                if inferred_call is not None:
                    tool_calls.append(openai_tool_call_from_emulated(inferred_call))
                    saw_tool_call = True
                    message_content = ""
        response = ChatCompletionResponse(
            id=f"chatcmpl-{uuid.uuid4().hex}",
            created=int(time.time()),
@@ -1049,6 +1284,16 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
    model = execution.model
    prompt = execution.prompt
    is_reply = execution.is_reply
    em_anthropic_tools = _em_extract_anthropic_tools(req.tools)
    em_anthropic_choice = _em_extract_anthropic_tool_choice(req.tool_choice)
    if _em_has_tool_request(em_anthropic_tools, em_anthropic_choice):
        system_text = flatten_anthropic_content(req.system) if req.system else ""
        prompt = _anthropic_messages_to_emulation_prompt(
            messages_dump,
            system_text=system_text,
            tools=req.tools,
            tool_choice=req.tool_choice,
        )
    try:
        started = await start_execution(
@@ -1090,12 +1335,14 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
            stream_meta: dict = {}
            max_tokens = req.max_tokens
            forced_tool_name = _anthropic_forced_tool_name(req.tool_choice)
            aggregate_emulated_tools = bool(em_anthropic_tools)
            async def event_stream(_ticket=ticket, _inst=inst, _meta=stream_meta):
                success = False
                block_index = 0
                text_block_open = False
                saw_pending_tool_use = False
                buffered_text_parts: list[str] = []
                try:
                    # 1) message_start — Anthropic SDKs read this first to get
                    #    the message envelope (id/model/initial usage).
@@ -1196,7 +1443,78 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
                        text = _stream_text(chunk)
                        if not text:
                            continue
-                        completion_tokens_holder["n"] += estimate_tokens(text)
+                        if aggregate_emulated_tools:
                            buffered_text_parts.append(text)
                            completion_tokens_holder["n"] += estimate_tokens(text)
                            continue
                        buffered_text_parts.append(text)
                        merged_text = "".join(buffered_text_parts)
                        parsed_calls, remaining = parse_action_blocks(
                            merged_text, em_anthropic_tools
                        )
                        if not parsed_calls:
                            inferred = infer_declared_tool_call_from_text(
                                merged_text,
                                em_anthropic_tools,
                            )
                            if inferred is None:
                                inferred_calls = infer_tool_calls_from_text(
                                    merged_text,
                                    em_anthropic_tools,
                                )
                                inferred = inferred_calls[0] if inferred_calls else None
                            if inferred is not None:
                                parsed_calls = [inferred]
                                remaining = ""
                        if parsed_calls:
                            if text_block_open:
                                yield _sse(
                                    "content_block_stop",
                                    {"type": "content_block_stop", "index": block_index},
                                )
                                block_index += 1
                                text_block_open = False
                            saw_pending_tool_use = True
                            for call in parsed_calls:
                                yield _sse(
                                    "content_block_start",
                                    {
                                        "type": "content_block_start",
                                        "index": block_index,
                                        "content_block": {
                                            "type": "tool_use",
                                            "id": call.id,
                                            "name": call.name,
                                            "input": {},
                                        },
                                    },
                                )
                                yield _sse(
                                    "content_block_delta",
                                    {
                                        "type": "content_block_delta",
                                        "index": block_index,
                                        "delta": {
                                            "type": "input_json_delta",
                                            "partial_json": json.dumps(call.arguments, ensure_ascii=False),
                                        },
                                    },
                                )
                                yield _sse(
                                    "content_block_stop",
                                    {"type": "content_block_stop", "index": block_index},
                                )
                                block_index += 1
                            buffered_text_parts = [remaining] if remaining else []
                            if not buffered_text_parts:
                                continue
                        text_to_emit = "".join(buffered_text_parts)
                        buffered_text_parts.clear()
                        completion_tokens_holder["n"] += estimate_tokens(text_to_emit)
                        if not text_block_open:
                            yield _sse(
                                "content_block_start",
@@ -1213,10 +1531,106 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
                            {
                                "type": "content_block_delta",
                                "index": block_index,
-                                "delta": {"type": "text_delta", "text": text},
+                                "delta": {"type": "text_delta", "text": text_to_emit},
                            },
                        )
                    if aggregate_emulated_tools:
                        merged_text = "".join(buffered_text_parts)
                        parsed_calls, remaining = parse_action_blocks(
                            merged_text, em_anthropic_tools
                        )
                        if not parsed_calls:
                            inferred = infer_declared_tool_call_from_text(
                                merged_text,
                                em_anthropic_tools,
                            )
                            if inferred is None:
                                inferred_calls = infer_tool_calls_from_text(
                                    merged_text,
                                    em_anthropic_tools,
                                )
                                inferred = inferred_calls[0] if inferred_calls else None
                            if inferred is not None:
                                parsed_calls = [inferred]
                                remaining = ""
                        if parsed_calls:
                            if remaining.strip():
                                yield _sse(
                                    "content_block_start",
                                    {
                                        "type": "content_block_start",
                                        "index": block_index,
                                        "content_block": {"type": "text", "text": ""},
                                    },
                                )
                                yield _sse(
                                    "content_block_delta",
                                    {
                                        "type": "content_block_delta",
                                        "index": block_index,
                                        "delta": {"type": "text_delta", "text": remaining},
                                    },
                                )
                                yield _sse(
                                    "content_block_stop",
                                    {"type": "content_block_stop", "index": block_index},
                                )
                                block_index += 1
                            for call in parsed_calls:
                                saw_pending_tool_use = True
                                yield _sse(
                                    "content_block_start",
                                    {
                                        "type": "content_block_start",
                                        "index": block_index,
                                        "content_block": {
                                            "type": "tool_use",
                                            "id": call.id,
                                            "name": call.name,
                                            "input": {},
                                        },
                                    },
                                )
                                yield _sse(
                                    "content_block_delta",
                                    {
                                        "type": "content_block_delta",
                                        "index": block_index,
                                        "delta": {
                                            "type": "input_json_delta",
                                            "partial_json": json.dumps(call.arguments, ensure_ascii=False),
                                        },
                                    },
                                )
                                yield _sse(
                                    "content_block_stop",
                                    {"type": "content_block_stop", "index": block_index},
                                )
                                block_index += 1
                        elif merged_text.strip():
                            yield _sse(
                                "content_block_start",
                                {
                                    "type": "content_block_start",
                                    "index": block_index,
                                    "content_block": {"type": "text", "text": ""},
                                },
                            )
                            yield _sse(
                                "content_block_delta",
                                {
                                    "type": "content_block_delta",
                                    "index": block_index,
                                    "delta": {"type": "text_delta", "text": merged_text},
                                },
                            )
                            yield _sse(
                                "content_block_stop",
                                {"type": "content_block_stop", "index": block_index},
                            )
                    if text_block_open:
                        yield _sse(
                            "content_block_stop",
@@ -1323,6 +1737,135 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
            else:
                saw_pending_tool_use = True
        if not saw_tool_event and em_anthropic_tools:
            parsed_calls, remaining = parse_action_blocks(text, em_anthropic_tools)
            if parsed_calls:
                content_blocks = []
                if remaining:
                    content_blocks.append({"type": "text", "text": remaining})
                for call in parsed_calls:
                    content_blocks.append(
                        {
                            "type": "tool_use",
                            "id": call.id,
                            "name": call.name,
                            "input": call.arguments,
                        }
                    )
                saw_tool_event = True
                saw_pending_tool_use = True
                text = remaining
        if not saw_tool_event and em_anthropic_tools:
            inferred_call = infer_declared_tool_call_from_text(text, em_anthropic_tools)
            if inferred_call is None:
                inferred_calls = infer_tool_calls_from_text(text, em_anthropic_tools)
                inferred_call = inferred_calls[0] if inferred_calls else None
            if inferred_call is not None:
                content_blocks = [
                    {
                        "type": "tool_use",
                        "id": inferred_call.id,
                        "name": inferred_call.name,
                        "input": inferred_call.arguments,
                    }
                ]
                saw_tool_event = True
                saw_pending_tool_use = True
                text = ""
        if not saw_tool_event and em_anthropic_tools:
            retry_prompt = f"{prompt}\n\n{force_tooling_prompt(em_anthropic_choice)}"
            retry_result = await inst.client.chat_complete(
                retry_prompt,
                model,
                ask_mode,
                session_id=None,
                is_reply=False,
                tool_config=tool_config,
            )
            retry_text = retry_result.get("text") or ""
            parsed_calls, remaining = parse_action_blocks(retry_text, em_anthropic_tools)
            if parsed_calls:
                content_blocks = []
                if remaining:
                    content_blocks.append({"type": "text", "text": remaining})
                for call in parsed_calls:
                    content_blocks.append(
                        {
                            "type": "tool_use",
                            "id": call.id,
                            "name": call.name,
                            "input": call.arguments,
                        }
                    )
                saw_tool_event = True
                saw_pending_tool_use = True
                text = remaining
            else:
                inferred_call = infer_declared_tool_call_from_text(retry_text, em_anthropic_tools)
                if inferred_call is None:
                    inferred_calls = infer_tool_calls_from_text(retry_text, em_anthropic_tools)
                    inferred_call = inferred_calls[0] if inferred_calls else None
                if inferred_call is not None:
                    content_blocks = [
                        {
                            "type": "tool_use",
                            "id": inferred_call.id,
                            "name": inferred_call.name,
                            "input": inferred_call.arguments,
                        }
                    ]
                    saw_tool_event = True
                    saw_pending_tool_use = True
                    text = ""
        if not saw_tool_event and em_anthropic_tools and text.strip():
            retry_prompt = f"{prompt}\n\n{force_tooling_prompt(em_anthropic_choice)}"
            retry_result = await inst.client.chat_complete(
                retry_prompt,
                model,
                ask_mode,
                session_id=None,
                is_reply=False,
                tool_config=tool_config,
            )
            retry_text = retry_result.get("text") or ""
            parsed_calls, remaining = parse_action_blocks(retry_text, em_anthropic_tools)
            if parsed_calls:
                content_blocks = []
                if remaining:
                    content_blocks.append({"type": "text", "text": remaining})
                for call in parsed_calls:
                    content_blocks.append(
                        {
                            "type": "tool_use",
                            "id": call.id,
                            "name": call.name,
                            "input": call.arguments,
                        }
                    )
                saw_tool_event = True
                saw_pending_tool_use = True
                text = remaining
            else:
                inferred_call = infer_declared_tool_call_from_text(retry_text, em_anthropic_tools)
                if inferred_call is None:
                    inferred_calls = infer_tool_calls_from_text(retry_text, em_anthropic_tools)
                    inferred_call = inferred_calls[0] if inferred_calls else None
                if inferred_call is not None:
                    content_blocks = [
                        {
                            "type": "tool_use",
                            "id": inferred_call.id,
                            "name": inferred_call.name,
                            "input": inferred_call.arguments,
                        }
                    ]
                    saw_tool_event = True
                    saw_pending_tool_use = True
                    text = ""
        if not saw_tool_event and forced_tool_name:
            inferred = _extract_function_call_event_from_text(
                text,
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,3 +3,4 @@ uvicorn[standard]==0.30.6
 websockets==13.1
 pydantic==2.9.2
 playwright==1.52.0
 mcp==1.12.4
--- a/scripts/smoke_tool_calls.sh
+++ b/scripts/smoke_tool_calls.sh
@@ -0,0 +1,117 @@
 #!/usr/bin/env bash
 set -euo pipefail
 ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
 ENV_FILE="$ROOT_DIR/.env"
 if [[ ! -f "$ENV_FILE" ]]; then
  printf 'missing .env: %s\n' "$ENV_FILE" >&2
  exit 1
 fi
 PORT="$(python3 - <<'PY'
 from pathlib import Path
 env = Path("/root/lingma-openai-gateway/.env")
 vals = {}
 for line in env.read_text().splitlines():
    line = line.strip()
    if not line or line.startswith('#') or '=' not in line:
        continue
    k, v = line.split('=', 1)
    vals[k.strip()] = v.strip()
 print(vals.get('PORT', '13013'))
 PY
 )"
 API_KEY="$(python3 - <<'PY'
 from pathlib import Path
 env = Path("/root/lingma-openai-gateway/.env")
 vals = {}
 for line in env.read_text().splitlines():
    line = line.strip()
    if not line or line.startswith('#') or '=' not in line:
        continue
    k, v = line.split('=', 1)
    vals[k.strip()] = v.strip()
 keys = vals.get('API_KEYS', '')
 print(keys.split(',')[0].strip())
 PY
 )"
 BASE_URL="http://127.0.0.1:${PORT}"
 printf '\n[1/5] /v1/models\n'
 curl -fsS "$BASE_URL/v1/models" \
  -H "Authorization: Bearer ${API_KEY}" | python3 -m json.tool
 printf '\n[2/5] OpenAI non-stream tool call\n'
 curl -fsS "$BASE_URL/v1/chat/completions" \
  -H "Authorization: Bearer ${API_KEY}" \
  -H 'Content-Type: application/json' \
  -d '{
    "model": "org_auto",
    "stream": false,
    "messages": [
      {"role": "system", "content": "Use tools when available."},
      {"role": "user", "content": "Use fetch_weather for Hangzhou and return the tool call."}
    ],
    "tools": [
      {"type": "function", "function": {"name": "fetch_weather", "description": "Get weather for a city", "parameters": {"type": "object", "properties": {"city": {"type": "string"}}, "required": ["city"]}}}
    ],
    "tool_choice": {"type": "function", "function": {"name": "fetch_weather"}}
  }' | python3 -m json.tool
 printf '\n[3/5] Anthropic non-stream tool use\n'
 curl -fsS "$BASE_URL/v1/messages" \
  -H "x-api-key: ${API_KEY}" \
  -H 'anthropic-version: 2023-06-01' \
  -H 'Content-Type: application/json' \
  -d '{
    "model": "claude-3-5-sonnet-20241022",
    "max_tokens": 256,
    "stream": false,
    "messages": [
      {"role": "user", "content": "Use fetch_weather for Hangzhou and return the tool call."}
    ],
    "tools": [
      {"name": "fetch_weather", "description": "Get weather for a city", "input_schema": {"type": "object", "properties": {"city": {"type": "string"}}, "required": ["city"]}}
    ],
    "tool_choice": {"type": "tool", "name": "fetch_weather"}
  }' | python3 -m json.tool
 printf '\n[4/5] OpenAI stream tool call\n'
 curl -fsS -N "$BASE_URL/v1/chat/completions" \
  -H "Authorization: Bearer ${API_KEY}" \
  -H 'Content-Type: application/json' \
  -d '{
    "model": "org_auto",
    "stream": true,
    "messages": [
      {"role": "system", "content": "Use tools when available."},
      {"role": "user", "content": "Use fetch_weather for Hangzhou and return the tool call."}
    ],
    "tools": [
      {"type": "function", "function": {"name": "fetch_weather", "description": "Get weather for a city", "parameters": {"type": "object", "properties": {"city": {"type": "string"}}, "required": ["city"]}}}
    ],
    "tool_choice": {"type": "function", "function": {"name": "fetch_weather"}}
  }'
 printf '\n[5/5] Anthropic stream tool use\n'
 curl -fsS -N "$BASE_URL/v1/messages" \
  -H "x-api-key: ${API_KEY}" \
  -H 'anthropic-version: 2023-06-01' \
  -H 'Content-Type: application/json' \
  -d '{
    "model": "claude-3-5-sonnet-20241022",
    "max_tokens": 256,
    "stream": true,
    "messages": [
      {"role": "user", "content": "Use fetch_weather for Hangzhou and return the tool call."}
    ],
    "tools": [
      {"name": "fetch_weather", "description": "Get weather for a city", "input_schema": {"type": "object", "properties": {"city": {"type": "string"}}, "required": ["city"]}}
    ],
    "tool_choice": {"type": "tool", "name": "fetch_weather"}
  }'
 printf '\nsmoke tool-call checks completed\n'
--- a/tests/TEST_PLAN.md
+++ b/tests/TEST_PLAN.md
@@ -42,6 +42,7 @@
 1. 定点执行新增测试文件。
 2. 全量执行 `tests/` 下 `test_*.py`。
 3. 汇总通过率与失败项（若失败，给出定位与修复建议）。
 4. Docker 运行态执行 `bash scripts/smoke_tool_calls.sh`，验证 OpenAI / Anthropic 的 stream / non-stream 工具调用。
 ## 6. 执行命令
 ```bash
@@ -50,4 +51,5 @@ python3 -m unittest tests/test_session_cache_tooling.py
 python3 -m unittest tests/test_schema_normalization.py
 python3 -m unittest tests/test_tool_call_bridge.py
 python3 -m unittest discover -s tests -p "test_*.py"
 bash scripts/smoke_tool_calls.sh
 ```
--- a/tests/test_pool_stats_config.py
+++ b/tests/test_pool_stats_config.py
@@ -3,10 +3,12 @@ from __future__ import annotations
 import json
 import os
 import sys
 import tempfile
 import types
 import unittest
 from types import SimpleNamespace
 from unittest.mock import patch
 import zipfile
 # app.lingma_pool imports auto_login; tests here don't execute Playwright paths.
 # Stub module import so test environments without playwright can import pool code.
@@ -28,6 +30,7 @@ sys.modules.setdefault("playwright", _playwright)
 sys.modules.setdefault("playwright.async_api", _playwright_async)
 from app.config import _parse_accounts, load_settings
 from app.bootstrap_lingma import bootstrap_from_vsix
 from app.lingma_pool import LingmaPool
 from app.stats import StatsCollector, estimate_tokens
@@ -212,5 +215,57 @@ class ConfigParsingTests(unittest.TestCase):
        self.assertEqual(settings.tool_allowlist, [])
 class BootstrapLingmaTests(unittest.TestCase):
    def _make_test_vsix(self, root: str) -> str:
        nested_zip_path = os.path.join(root, "nested.zip")
        with zipfile.ZipFile(nested_zip_path, "w") as nested:
            nested.writestr("2.5.20/x86_64_linux/Lingma", b"new-binary")
            nested.writestr("2.5.20/extension/main.js", b"console.log('ok')")
        vsix_path = os.path.join(root, "test.vsix")
        with zipfile.ZipFile(vsix_path, "w") as vsix:
            with open(nested_zip_path, "rb") as nested_file:
                vsix.writestr(
                    "extension/dist/bin/lingma-2.5.20.zip",
                    nested_file.read(),
                )
        return vsix_path
    def test_bootstrap_refreshes_when_extension_assets_missing(self) -> None:
        with tempfile.TemporaryDirectory() as tmpdir:
            bin_dir = os.path.join(tmpdir, "data", "bin")
            release_dir = os.path.join(bin_dir, "2.5.20")
            os.makedirs(release_dir, exist_ok=True)
            lingma_bin = os.path.join(bin_dir, "Lingma")
            with open(lingma_bin, "wb") as f:
                f.write(b"old-binary")
            marker = {
                "version": "2.5.20",
                "release_root": "2.5.20",
            }
            with open(os.path.join(bin_dir, ".lingma-bootstrap.json"), "w", encoding="utf-8") as f:
                json.dump(marker, f)
            vsix_path = self._make_test_vsix(tmpdir)
            env = {
                "LINGMA_BIN": lingma_bin,
                "LINGMA_SOURCE_TYPE": "vsix",
                "LINGMA_VSIX_URL": f"file://{vsix_path}",
                "LINGMA_BOOTSTRAP_ALWAYS": "false",
                "LINGMA_FORCE_REFRESH": "false",
            }
            with patch.dict(os.environ, env, clear=False):
                bootstrap_from_vsix()
            with open(lingma_bin, "rb") as f:
                self.assertEqual(f.read(), b"new-binary")
            self.assertTrue(
                os.path.exists(os.path.join(release_dir, "extension", "main.js"))
            )
 if __name__ == "__main__":
    unittest.main()
--- a/tests/test_tool_call_bridge.py
+++ b/tests/test_tool_call_bridge.py
@@ -388,6 +388,169 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
            {"query": "gateway"},
        )
    async def test_openai_non_stream_synthesizes_tool_call_from_hash_tool_call_block(
        self,
    ) -> None:
        fake_client = _FakeClient(
            stream_events=[],
            complete_result={
                "text": '#Tool Call\n```fetch_weather\n{"city": "Hangzhou"}\n```\n',
                "toolEvents": [],
                "sessionId": "sess-fallback-hash-tool-call-openai",
            },
        )
        req = ChatCompletionsRequest(
            model="org_auto",
            messages=[{"role": "user", "content": "hi"}],
            stream=False,
            tools=[
                {
                    "type": "function",
                    "function": {
                        "name": "fetch_weather",
                        "parameters": {
                            "type": "object",
                            "properties": {"city": {"type": "string"}},
                            "required": ["city"],
                        },
                    },
                }
            ],
            tool_choice={"type": "function", "function": {"name": "fetch_weather"}},
        )
        with (
            patch.object(main, "pool", _FakePool(_FakeInstance(fake_client))),
            patch.object(main, "chat_guard", _FakeGuard()),
            patch.object(
                main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"})
            ),
            patch.object(
                main.stats_collector, "record_chat", AsyncMock(return_value=None)
            ),
        ):
            response = await main.v1_chat_completions(
                req, _make_request("/v1/chat/completions")
            )
        payload = json.loads(response.body)
        message = payload["choices"][0]["message"]
        self.assertEqual(payload["choices"][0]["finish_reason"], "tool_calls")
        self.assertEqual(message["content"], "")
        self.assertEqual(message["tool_calls"][0]["function"]["name"], "fetch_weather")
        self.assertEqual(
            json.loads(message["tool_calls"][0]["function"]["arguments"]),
            {"city": "Hangzhou"},
        )
    async def test_openai_non_stream_synthesizes_tool_call_from_hash_tool_call_block_without_tool_choice(
        self,
    ) -> None:
        fake_client = _FakeClient(
            stream_events=[],
            complete_result={
                "text": '#Tool Call\n```fetch_weather\n{"city": "Hangzhou"}\n```\n',
                "toolEvents": [],
                "sessionId": "sess-fallback-hash-tool-call-openai-no-choice",
            },
        )
        req = ChatCompletionsRequest(
            model="org_auto",
            messages=[{"role": "user", "content": "hi"}],
            stream=False,
            tools=[
                {
                    "type": "function",
                    "function": {
                        "name": "fetch_weather",
                        "parameters": {
                            "type": "object",
                            "properties": {"city": {"type": "string"}},
                            "required": ["city"],
                        },
                    },
                }
            ],
        )
        with (
            patch.object(main, "pool", _FakePool(_FakeInstance(fake_client))),
            patch.object(main, "chat_guard", _FakeGuard()),
            patch.object(
                main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"})
            ),
            patch.object(
                main.stats_collector, "record_chat", AsyncMock(return_value=None)
            ),
        ):
            response = await main.v1_chat_completions(
                req, _make_request("/v1/chat/completions")
            )
        payload = json.loads(response.body)
        message = payload["choices"][0]["message"]
        self.assertEqual(payload["choices"][0]["finish_reason"], "tool_calls")
        self.assertEqual(message["content"], "")
        self.assertEqual(message["tool_calls"][0]["function"]["name"], "fetch_weather")
        self.assertEqual(
            json.loads(message["tool_calls"][0]["function"]["arguments"]),
            {"city": "Hangzhou"},
        )
    async def test_openai_non_stream_synthesizes_tool_call_from_json_action_block(
        self,
    ) -> None:
        fake_client = _FakeClient(
            stream_events=[],
            complete_result={
                "text": '```json action\n{"tool":"fetch_weather","parameters":{"city":"Hangzhou"}}\n```',
                "toolEvents": [],
                "sessionId": "sess-action-block-openai",
            },
        )
        req = ChatCompletionsRequest(
            model="org_auto",
            messages=[{"role": "user", "content": "hi"}],
            stream=False,
            tools=[
                {
                    "type": "function",
                    "function": {
                        "name": "fetch_weather",
                        "parameters": {
                            "type": "object",
                            "properties": {"city": {"type": "string"}},
                            "required": ["city"],
                        },
                    },
                }
            ],
        )
        with (
            patch.object(main, "pool", _FakePool(_FakeInstance(fake_client))),
            patch.object(main, "chat_guard", _FakeGuard()),
            patch.object(
                main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"})
            ),
            patch.object(
                main.stats_collector, "record_chat", AsyncMock(return_value=None)
            ),
        ):
            response = await main.v1_chat_completions(
                req, _make_request("/v1/chat/completions")
            )
        payload = json.loads(response.body)
        message = payload["choices"][0]["message"]
        self.assertEqual(payload["choices"][0]["finish_reason"], "tool_calls")
        self.assertEqual(message["content"], "")
        self.assertEqual(message["tool_calls"][0]["function"]["name"], "fetch_weather")
        self.assertEqual(
            json.loads(message["tool_calls"][0]["function"]["arguments"]),
            {"city": "Hangzhou"},
        )
    async def test_openai_stream_synthesizes_tool_call_from_tool_code(
        self,
    ) -> None:
@@ -439,6 +602,55 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
        self.assertIn('"finish_reason": "tool_calls"', body)
        self.assertIn("data: [DONE]", body)
    async def test_openai_stream_synthesizes_tool_call_from_hash_tool_call_block_without_tool_choice(
        self,
    ) -> None:
        fake_client = _FakeClient(
            stream_events=[
                {"type": "text", "text": "#Tool Call\n```fetch_weather\n"},
                {"type": "text", "text": '{"city": "Hangzhou"}\n'},
                {"type": "text", "text": "```\n"},
            ],
            complete_result={},
        )
        req = ChatCompletionsRequest(
            model="org_auto",
            messages=[{"role": "user", "content": "hi"}],
            stream=True,
            tools=[
                {
                    "type": "function",
                    "function": {
                        "name": "fetch_weather",
                        "parameters": {
                            "type": "object",
                            "properties": {"city": {"type": "string"}},
                            "required": ["city"],
                        },
                    },
                }
            ],
        )
        with (
            patch.object(main, "pool", _FakePool(_FakeInstance(fake_client))),
            patch.object(main, "chat_guard", _FakeGuard()),
            patch.object(
                main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"})
            ),
            patch.object(
                main.stats_collector, "record_chat", AsyncMock(return_value=None)
            ),
        ):
            response = await main.v1_chat_completions(
                req, _make_request("/v1/chat/completions")
            )
            body = await _collect_stream(response)
        self.assertIn('"tool_calls"', body)
        self.assertIn('"fetch_weather"', body)
        self.assertIn('"finish_reason": "tool_calls"', body)
    async def test_openai_non_stream_synthesizes_tool_call_from_json_array(self) -> None:
        fake_client = _FakeClient(
            stream_events=[],
@@ -1918,6 +2130,117 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
        self.assertEqual(messages_dump[3]["role"], "user")
        self.assertEqual(messages_dump[3]["content"], "follow up")
    async def test_openai_tool_result_is_emulated_into_followup_prompt(self) -> None:
        spy_client = _SpyClient(
            stream_events=[],
            complete_result={
                "text": "done",
                "toolEvents": [],
                "sessionId": "sess-emulated-tool-result",
            },
        )
        req = ChatCompletionsRequest(
            model="org_auto",
            messages=[
                {"role": "assistant", "content": None, "tool_calls": [{
                    "id": "call_1",
                    "type": "function",
                    "function": {"name": "fetch_weather", "arguments": '{"city":"Hangzhou"}'},
                }]},
                {"role": "tool", "tool_call_id": "call_1", "content": '{"temperature":"22C"}'},
                {"role": "user", "content": "continue"},
            ],
            stream=False,
            tools=[
                {
                    "type": "function",
                    "function": {
                        "name": "fetch_weather",
                        "parameters": {
                            "type": "object",
                            "properties": {"city": {"type": "string"}},
                            "required": ["city"],
                        },
                    },
                }
            ],
        )
        with (
            patch.object(main, "pool", _FakePool(_FakeInstance(spy_client))),
            patch.object(main, "chat_guard", _FakeGuard()),
            patch.object(
                main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"})
            ),
            patch.object(
                main.stats_collector, "record_chat", AsyncMock(return_value=None)
            ),
        ):
            await main.v1_chat_completions(req, _make_request("/v1/chat/completions"))
        prompt = spy_client.last_complete_args[0]
        self.assertIn("Tool result for call_1:", prompt)
        self.assertIn('{"temperature":"22C"}', prompt)
        self.assertIn("Assistant:", prompt)
    async def test_anthropic_non_stream_synthesizes_tool_use_from_json_action_block(
        self,
    ) -> None:
        fake_client = _FakeClient(
            stream_events=[],
            complete_result={
                "text": '```json action\n{"tool":"fetch_weather","parameters":{"city":"Hangzhou"}}\n```',
                "toolEvents": [],
                "sessionId": "sess-anthropic-action-block",
            },
        )
        req = AnthropicMessagesRequest(
            model="claude-3-5-sonnet-20241022",
            max_tokens=64,
            messages=[{"role": "user", "content": "weather"}],
            stream=False,
            tools=[
                {
                    "name": "fetch_weather",
                    "description": "Get weather for a city",
                    "input_schema": {
                        "type": "object",
                        "properties": {"city": {"type": "string"}},
                        "required": ["city"],
                    },
                }
            ],
            tool_choice={"type": "tool", "name": "fetch_weather"},
        )
        with (
            patch.object(main, "pool", _FakePool(_FakeInstance(fake_client))),
            patch.object(main, "chat_guard", _FakeGuard()),
            patch.object(
                main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"})
            ),
            patch.object(
                main.stats_collector, "record_chat", AsyncMock(return_value=None)
            ),
            patch.object(main.settings, "api_keys", ["test-key"]),
        ):
            response = await main.v1_messages(
                req,
                _make_request(
                    "/v1/messages",
                    headers={
                        "x-api-key": "test-key",
                        "anthropic-version": "2023-06-01",
                    },
                ),
            )
        payload = json.loads(response.body)
        tool_blocks = [item for item in payload["content"] if item["type"] == "tool_use"]
        self.assertEqual(payload["stop_reason"], "tool_use")
        self.assertEqual(tool_blocks[0]["name"], "fetch_weather")
        self.assertEqual(tool_blocks[0]["input"], {"city": "Hangzhou"})
    async def test_responses_stream_bridges_text_tool_and_completed_events(
        self,
    ) -> None: