feat: add emulated tool-calling bridge for Lingma

Add a proxy-side tool emulation layer so Lingma requests can surface stable OpenAI tool_calls and Anthropic tool_use blocks even when upstream tool events are missing or inconsistent. Constraint: Keep native Lingma tool event bridging as the first path and layer emulation as a fallback Rejected: Depend exclusively on Lingma native tool/invoke events | tool visibility remains inconsistent across models and transports Confidence: high Scope-risk: moderate
2026-05-07 18:10:01 +08:00
parent 5911e4322e
commit 94a8025ae5
11 changed files with 1808 additions and 4 deletions
--- a/README.md
+++ b/README.md
@@ -6,6 +6,7 @@
 - Anthropic：`/v1/messages`、`/v1/messages/count_tokens`（含 stream）
 - 内置：多实例池、会话复用、Prometheus 指标、登录态 bundle 注入
 - 工具事件桥接：Lingma 上游返回 `tool` 事件时，网关会输出为 OpenAI `tool_calls`（stream/non-stream）和 Anthropic `tool_use` / `tool_result`（stream/non-stream）；请求侧 `tools` / `tool_choice` 仅在 `TOOL_FORWARD_ENABLED=true` 时透传（默认开启，可显式关闭）
+- 工具模拟回退：当 Lingma 未稳定外显原生 `tool/*` 事件时，网关会把注入后的 `json action` / `#Tool Call` 等动作文本归一化为 OpenAI `tool_calls`，并支持 tool result continuation
 - 多模态降级：OpenAI `image_url` / `input_image` 转 `[image]`，`input_audio` 转 `[audio]`；Anthropic `image` 转 `[image]`

 > 架构设计与二开细节请看 [`DESIGN.md`](./DESIGN.md)。
@@ -85,6 +86,9 @@ python3 -m unittest tests/test_tool_call_bridge.py

 # 全量 unittest
 python3 -m unittest discover -s tests -p "test_*.py"
+
+# Docker 端到端工具调用冒烟
+bash scripts/smoke_tool_calls.sh
 ```

 ---
@@ -201,6 +205,7 @@ curl -s "http://127.0.0.1:${PORT}/healthz"
 | `healthz` 正常但请求失败 | 用错端口 | 以 `.env` 的 `PORT` 为准，`docker compose ps` 再确认 |
 | `git pull` 提示 not on a branch | 处于 detached HEAD | 执行 `git checkout -B main origin/main` |
 | 自动登录不稳定 | 浏览器流程波动 | 优先使用 `LINGMA_SESSION_BUNDLE(_FILE)` |
+| 日志出现 `extension main js path not found` / `ExtensionApi executor not inited` | Lingma 扩展运行时未完整提取，MCP/工具执行器未初始化 | 重启容器触发 bootstrap 自愈；确认 `data/bin/<version>/extension/main.js` 已存在 |
 | 工具调用未触发 | 模型未选择工具或当前协议路径不支持合成回退 | OpenAI 可配合 `tool_choice` 强制并约束输出 JSON；Anthropic 当前仅 non-stream 支持合成 `tool_use` / `tool_result` 回退 |

 ---
--- a/app/bootstrap_lingma.py
+++ b/app/bootstrap_lingma.py
@@ -3,6 +3,7 @@ from __future__ import annotations
 import io
 import json
 import os
+import shutil
 import time
 import urllib.request
 import zipfile
@@ -70,6 +71,15 @@ def _extract_release_tree(
            dst.write(src.read())


+def _release_dir_for_binary(lingma_bin: Path, release_root: str | None) -> Path:
+    return lingma_bin.parent / ((release_root or "").strip() or "2.5.20")
+
+
+def _release_has_required_assets(release_dir: Path) -> bool:
+    extension_main = release_dir / "extension" / "main.js"
+    return extension_main.exists() and extension_main.is_file()
+
+
 def _query_marketplace_latest_vsix(
    publisher: str, extension: str
 ) -> tuple[str, str, dict]:
@@ -168,8 +178,19 @@ def bootstrap_from_vsix() -> None:
            )
            resolved_url = vsix_url

+    current_release_dir = _release_dir_for_binary(
+        lingma_bin, old_marker.get("release_root") if isinstance(old_marker, dict) else None
+    )
+    release_ready = _release_has_required_assets(current_release_dir)
+    if lingma_bin.exists() and not release_ready:
+        print(
+            "[bootstrap] existing Lingma binary found but extension assets are incomplete; "
+            f"refreshing install under {current_release_dir}"
+        )
+
    if (
        lingma_bin.exists()
+        and release_ready
        and not force_refresh
        and (
            (not always_refresh)
@@ -215,7 +236,8 @@ def bootstrap_from_vsix() -> None:
            lingma_bytes = inner_zip.read(lingma_member)
            release_root = _infer_release_root(lingma_member)
            lingma_bin.parent.mkdir(parents=True, exist_ok=True)
-            release_dir = lingma_bin.parent / (release_root or "2.5.20")
+            release_dir = _release_dir_for_binary(lingma_bin, release_root)
+            shutil.rmtree(release_dir, ignore_errors=True)
            _extract_release_tree(inner_zip, release_root, release_dir)

        lingma_bin.write_bytes(lingma_bytes)
@@ -224,7 +246,9 @@ def bootstrap_from_vsix() -> None:
        if extension_main.exists():
            print(f"[bootstrap] extension ready: {extension_main}")
        else:
-            print(f"[bootstrap] extension missing under: {release_dir}")
+            raise RuntimeError(
+                f"extension assets missing after extraction under: {release_dir}"
+            )

        marker = {
            "source": source_type,
--- a/app/http/tool_bridge.py
+++ b/app/http/tool_bridge.py
@@ -196,6 +196,33 @@ def _extract_tool_calls_from_text(text: str) -> list[dict[str, Any]] | None:
    return None


+def _extract_hash_tool_call_event_from_text(
+    text: str,
+    *,
+    forced_tool_name: str | None = None,
+) -> dict[str, Any] | None:
+    raw = (text or "").strip()
+    if not raw:
+        return None
+    match = re.search(
+        r"#Tool Call\s*```([A-Za-z0-9_\-.]+)\s*(\{.*?\})\s*```",
+        raw,
+        flags=re.S,
+    )
+    if not match:
+        return None
+    name = match.group(1).strip()
+    if forced_tool_name and name != forced_tool_name:
+        return None
+    try:
+        arguments = json.loads(match.group(2))
+    except Exception:
+        return None
+    if not isinstance(arguments, dict):
+        return None
+    return {"name": name, "input": arguments}
+
+
 def _tool_code_single_arg_name(
    tools: list[dict[str, Any]] | None, forced_tool_name: str
 ) -> str | None:
@@ -342,6 +369,45 @@ def _forced_tool_fallback_event(
    )


+def _declared_tool_names(tools: list[dict[str, Any]] | None) -> list[str]:
+    if not isinstance(tools, list):
+        return []
+    out: list[str] = []
+    for tool in tools:
+        name = _openai_tool_name(tool) or _anthropic_tool_name(tool)
+        if name and name not in out:
+            out.append(name)
+    return out
+
+
+def _infer_tool_event_from_declared_tools(
+    text: str,
+    *,
+    tools: list[dict[str, Any]] | None,
+) -> dict[str, Any] | None:
+    for tool_name in _declared_tool_names(tools):
+        inferred = _extract_function_call_event_from_text(
+            text,
+            forced_tool_name=tool_name,
+        )
+        if inferred is not None:
+            return inferred
+        inferred = _extract_hash_tool_call_event_from_text(
+            text,
+            forced_tool_name=tool_name,
+        )
+        if inferred is not None:
+            return inferred
+        inferred = _forced_tool_fallback_event(
+            text,
+            forced_tool_name=tool_name,
+            tools=tools,
+        )
+        if inferred is not None:
+            return inferred
+    return None
+
+
 def _openai_tool_call(
    tool: dict[str, Any], *, forced_id: str | None = None
 ) -> dict[str, Any]:
--- a/app/http/tool_emulation.py
+++ b/app/http/tool_emulation.py
@@ -0,0 +1,657 @@
+from __future__ import annotations
+
+import json
+import re
+import uuid
+from dataclasses import dataclass
+from typing import Any
+
+
+@dataclass
+class EmulatedToolDef:
+    name: str
+    description: str
+    input_schema: dict[str, Any]
+
+
+@dataclass
+class EmulatedToolChoice:
+    mode: str
+    name: str = ""
+
+
+@dataclass
+class EmulatedToolCall:
+    id: str
+    name: str
+    arguments: dict[str, Any]
+
+
+def extract_openai_tools(raw: Any) -> list[EmulatedToolDef]:
+    if not isinstance(raw, list):
+        return []
+    out: list[EmulatedToolDef] = []
+    for item in raw:
+        if not isinstance(item, dict):
+            continue
+        fn = item.get("function")
+        if not isinstance(fn, dict):
+            continue
+        name = str(fn.get("name") or "").strip()
+        if not name:
+            continue
+        schema = fn.get("parameters") if isinstance(fn.get("parameters"), dict) else {}
+        out.append(
+            EmulatedToolDef(
+                name=name,
+                description=str(fn.get("description") or "").strip(),
+                input_schema=dict(schema),
+            )
+        )
+    return out
+
+
+def extract_anthropic_tools(raw: Any) -> list[EmulatedToolDef]:
+    if not isinstance(raw, list):
+        return []
+    out: list[EmulatedToolDef] = []
+    for item in raw:
+        if not isinstance(item, dict):
+            continue
+        tool_type = str(item.get("type") or "").strip()
+        if tool_type.startswith("web_search_"):
+            continue
+        name = str(item.get("name") or "").strip()
+        if not name:
+            continue
+        schema = item.get("input_schema") if isinstance(item.get("input_schema"), dict) else {}
+        out.append(
+            EmulatedToolDef(
+                name=name,
+                description=str(item.get("description") or "").strip(),
+                input_schema=dict(schema),
+            )
+        )
+    return out
+
+
+def extract_openai_tool_choice(raw: Any) -> EmulatedToolChoice:
+    if raw is None:
+        return EmulatedToolChoice(mode="auto")
+    if isinstance(raw, str):
+        value = raw.strip()
+        if value in {"", "auto"}:
+            return EmulatedToolChoice(mode="auto")
+        if value == "none":
+            return EmulatedToolChoice(mode="none")
+        if value in {"required", "any"}:
+            return EmulatedToolChoice(mode="any")
+        return EmulatedToolChoice(mode="tool", name=value)
+    if not isinstance(raw, dict):
+        return EmulatedToolChoice(mode="auto")
+    type_name = str(raw.get("type") or "").strip()
+    if type_name in {"required", "any"}:
+        return EmulatedToolChoice(mode="any")
+    if type_name in {"none"}:
+        return EmulatedToolChoice(mode="none")
+    if type_name in {"function", "tool"}:
+        fn = raw.get("function")
+        if isinstance(fn, dict):
+            name = str(fn.get("name") or "").strip()
+            if name:
+                return EmulatedToolChoice(mode="tool", name=name)
+        name = str(raw.get("name") or "").strip()
+        if name:
+            return EmulatedToolChoice(mode="tool", name=name)
+    return EmulatedToolChoice(mode="auto")
+
+
+def extract_anthropic_tool_choice(raw: Any) -> EmulatedToolChoice:
+    if raw is None:
+        return EmulatedToolChoice(mode="auto")
+    if not isinstance(raw, dict):
+        return extract_openai_tool_choice(raw)
+    type_name = str(raw.get("type") or "").strip()
+    if type_name in {"", "auto"}:
+        return EmulatedToolChoice(mode="auto")
+    if type_name == "none":
+        return EmulatedToolChoice(mode="none")
+    if type_name in {"any", "required"}:
+        return EmulatedToolChoice(mode="any")
+    if type_name == "tool":
+        name = str(raw.get("name") or "").strip()
+        if name:
+            return EmulatedToolChoice(mode="tool", name=name)
+    return EmulatedToolChoice(mode="auto")
+
+
+def has_tool_request(tools: list[EmulatedToolDef], choice: EmulatedToolChoice) -> bool:
+    return bool(tools) or choice.mode not in {"", "auto"}
+
+
+def inject_tooling(system: str, tools: list[EmulatedToolDef], choice: EmulatedToolChoice) -> str:
+    system = system.strip()
+    if not tools:
+        return system
+
+    tool_lines: list[str] = []
+    for tool in tools:
+        signature = _compact_schema(tool.input_schema)
+        line = f"{tool.name}({signature})"
+        if tool.description:
+            line += f" - {_truncate(tool.description, 120)}"
+        tool_lines.append(line)
+
+    parts = [
+        "You are an AI assistant with DIRECT tool access.",
+        "When a request needs local files, terminal state, browser state, current web data, or another external result, use the proxy tools listed below.",
+        "Do not claim tools are unavailable.",
+        "When you need to use a tool, output exactly one or more structured action blocks in this format:",
+        '```json action\n{"tool":"NAME","parameters":{"key":"value"}}\n```',
+        "Available tools:",
+        "\n".join(tool_lines),
+        "Rules:",
+        "- Use ```json action``` blocks for tool calls.",
+        "- If a tool is needed, do not explain first; emit the action block directly.",
+        "- If no tool is needed, answer normally.",
+        "- Never say tools are unavailable.",
+        _force_constraint(choice),
+    ]
+    tooling = "\n\n".join(part for part in parts if part)
+    if not system:
+        return tooling
+    return f"{system}\n\n---\n\n{tooling}"
+
+
+def action_output_prompt(tool_call_id: str | None, output: str) -> str:
+    output = (output or "").strip()
+    if not output:
+        return ""
+    suffix = (
+        "Based on the tool result above, answer the user's request directly if you have enough information. "
+        "Only use another tool call if a specific missing fact still requires it."
+    )
+    if tool_call_id and tool_call_id.strip():
+        return f"Tool result for {tool_call_id.strip()}:\n{output}\n\n{suffix}"
+    return f"Tool result:\n{output}\n\n{suffix}"
+
+
+def parse_action_blocks(
+    text: str,
+    tools: list[EmulatedToolDef],
+    *,
+    max_scan_bytes: int = 0,
+    max_tool_calls: int = 8,
+) -> tuple[list[EmulatedToolCall], str]:
+    if not text or not text.strip():
+        return [], ""
+    if max_scan_bytes > 0 and len(text) > max_scan_bytes:
+        text = text[:max_scan_bytes]
+
+    tool_name_map = {tool.name.lower(): tool.name for tool in tools if tool.name.strip()}
+    tool_schema_map = {tool.name: tool.input_schema for tool in tools if tool.name.strip()}
+
+    calls: list[EmulatedToolCall] = []
+    spans: list[tuple[int, int]] = []
+    seen: set[str] = set()
+
+    for match in re.finditer(r"```json(?:\s+action)?\s*(.*?)```", text, flags=re.S | re.I):
+        raw = (match.group(1) or "").strip()
+        if not raw:
+            continue
+        parsed = _parse_tool_call_json(raw)
+        if parsed is None:
+            continue
+        name, arguments = parsed
+        normalized = _normalize_tool_name(name, tool_name_map)
+        schema = tool_schema_map.get(normalized)
+        if schema:
+            arguments = _filter_args_by_schema(arguments, schema)
+            if not _has_required_args(arguments, schema):
+                continue
+        key = _tool_call_key(normalized, arguments)
+        if key in seen:
+            spans.append(match.span())
+            continue
+        seen.add(key)
+        calls.append(
+            EmulatedToolCall(
+                id=_stable_call_id(normalized, arguments),
+                name=normalized,
+                arguments=arguments,
+            )
+        )
+        spans.append(match.span())
+        if len(calls) >= max_tool_calls:
+            break
+
+    if not calls:
+        return [], text.strip()
+
+    clean = text
+    for start, end in reversed(spans):
+        clean = clean[:start] + clean[end:]
+    return calls, clean.strip()
+
+
+def looks_like_refusal(text: str) -> bool:
+    lowered = (text or "").strip().lower()
+    if not lowered:
+        return False
+    needles = [
+        "tools are unavailable",
+        "cannot call tools",
+        "can't call tools",
+        "cannot execute",
+        "can't execute",
+        "没有可用的工具",
+        "工具不可用",
+        "不能调用工具",
+        "无法直接执行",
+    ]
+    return any(needle in lowered for needle in needles)
+
+
+def looks_like_missed_tool_use(text: str) -> bool:
+    lowered = (text or "").strip().lower()
+    if not lowered:
+        return False
+    needles = [
+        "let me use",
+        "i need to use",
+        "i will use",
+        "i need to run",
+        "i will run",
+        "我需要使用",
+        "让我使用",
+        "执行命令",
+        "读取文件",
+        "查看文件",
+        "查询天气",
+        "#tool call",
+    ]
+    return any(needle in lowered for needle in needles)
+
+
+def infer_tool_calls_from_text(
+    text: str,
+    tools: list[EmulatedToolDef],
+) -> list[EmulatedToolCall]:
+    if not (looks_like_refusal(text) or looks_like_missed_tool_use(text)):
+        return []
+    direct = infer_declared_tool_call_from_text(text, tools)
+    return [direct] if direct is not None else []
+
+
+def force_tooling_prompt(choice: EmulatedToolChoice) -> str:
+    prompt = (
+        "Your last response did not include any ```json action``` block. "
+        "You must respond with at least one valid action block now. "
+        "Select the single most appropriate available tool for the user request. "
+        "Do not explain. Do not say tools are unavailable. Output the action block directly."
+    )
+    if choice.mode == "tool" and choice.name.strip():
+        prompt += f' You must call "{choice.name.strip()}".'
+    return prompt
+
+
+def infer_declared_tool_call_from_text(
+    text: str,
+    tools: list[EmulatedToolDef],
+) -> EmulatedToolCall | None:
+    for tool in tools:
+        event = _extract_fenced_json_tool_call_event_from_text(
+            text, forced_tool_name=tool.name
+        )
+        if event is None:
+            event = _extract_hash_tool_call_event_from_text(text, forced_tool_name=tool.name)
+        if event is None:
+            event = _extract_function_call_event_from_text(text, forced_tool_name=tool.name)
+        if event is None:
+            event = _forced_tool_fallback_event(text, forced_tool_name=tool.name, tools=tools)
+        if event is None:
+            continue
+        schema = tool.input_schema
+        arguments = dict(event.get("input") or {})
+        if schema:
+            arguments = _filter_args_by_schema(arguments, schema)
+            if not _has_required_args(arguments, schema):
+                continue
+        return EmulatedToolCall(
+            id=_stable_call_id(tool.name, arguments),
+            name=tool.name,
+            arguments=arguments,
+        )
+    return None
+
+
+def openai_tool_call_from_emulated(call: EmulatedToolCall) -> dict[str, Any]:
+    return {
+        "id": call.id,
+        "type": "function",
+        "function": {
+            "name": call.name,
+            "arguments": json.dumps(call.arguments, ensure_ascii=False),
+        },
+    }
+
+
+def _extract_hash_tool_call_event_from_text(
+    text: str,
+    *,
+    forced_tool_name: str | None = None,
+) -> dict[str, Any] | None:
+    raw = (text or "").strip()
+    match = re.search(
+        r"#Tool Call\s*```([A-Za-z0-9_\-.]+)\s*(\{.*?\})\s*```",
+        raw,
+        flags=re.S,
+    )
+    if not match:
+        return None
+    name = match.group(1).strip()
+    if forced_tool_name and name != forced_tool_name:
+        return None
+    try:
+        arguments = json.loads(match.group(2))
+    except Exception:
+        return None
+    if not isinstance(arguments, dict):
+        return None
+    return {"name": name, "input": arguments}
+
+
+def _extract_fenced_json_tool_call_event_from_text(
+    text: str,
+    *,
+    forced_tool_name: str | None = None,
+) -> dict[str, Any] | None:
+    raw = (text or "").strip()
+    match = re.search(r"```json(?:\s+action)?\s*(\{.*?\})\s*```", raw, flags=re.S | re.I)
+    if not match:
+        return None
+    try:
+        payload = json.loads(match.group(1))
+    except Exception:
+        return None
+    if not isinstance(payload, dict):
+        return None
+
+    name = str(payload.get("tool") or payload.get("name") or "").strip()
+    fn = payload.get("function")
+    if not name and isinstance(fn, dict):
+        name = str(fn.get("name") or "").strip()
+    if not name:
+        return None
+    if forced_tool_name and name != forced_tool_name:
+        return None
+
+    arguments = payload.get("parameters")
+    if arguments is None:
+        arguments = payload.get("arguments")
+    if arguments is None:
+        arguments = payload.get("input")
+    if arguments is None and isinstance(fn, dict):
+        arguments = fn.get("arguments")
+    if isinstance(arguments, str):
+        try:
+            arguments = json.loads(arguments)
+        except Exception:
+            return None
+    if arguments is None:
+        arguments = {}
+    if not isinstance(arguments, dict):
+        return None
+    return {"name": name, "input": arguments}
+
+
+def _extract_function_call_event_from_text(
+    text: str,
+    *,
+    forced_tool_name: str | None = None,
+) -> dict[str, Any] | None:
+    raw = (text or "").strip()
+    match = re.search(r"<function_calls>\s*(\{.*?\})\s*</function_calls>", raw, flags=re.S)
+    if not match:
+        return None
+    try:
+        payload = json.loads(match.group(1))
+    except Exception:
+        return None
+    if not isinstance(payload, dict):
+        return None
+    name = str(payload.get("name") or "").strip()
+    if not name:
+        return None
+    if forced_tool_name and name != forced_tool_name:
+        return None
+    arguments = payload.get("arguments")
+    if isinstance(arguments, str):
+        try:
+            arguments = json.loads(arguments)
+        except Exception:
+            return None
+    if arguments is None:
+        arguments = {}
+    if not isinstance(arguments, dict):
+        return None
+    return {"name": name, "input": arguments}
+
+
+def _forced_tool_fallback_event(
+    text: str,
+    *,
+    forced_tool_name: str | None,
+    tools: list[EmulatedToolDef],
+) -> dict[str, Any] | None:
+    if not forced_tool_name:
+        return None
+    parsed = _tool_code_object_from_text(
+        text,
+        forced_tool_name,
+        single_arg_name=_tool_code_single_arg_name(tools, forced_tool_name),
+    )
+    if parsed is None:
+        try:
+            parsed = json.loads((text or "").strip())
+        except Exception:
+            return None
+    if not isinstance(parsed, dict):
+        return None
+    explicit_name = parsed.get("name") or parsed.get("tool")
+    if explicit_name is not None and str(explicit_name) != forced_tool_name:
+        return None
+    tool_input = parsed.get("input")
+    if tool_input is None and "arguments" in parsed:
+        tool_input = parsed.get("arguments")
+    if isinstance(tool_input, str):
+        try:
+            tool_input = json.loads(tool_input)
+        except Exception:
+            return None
+    if tool_input is None:
+        reserved = {"name", "tool", "function", "arguments", "input", "result"}
+        tool_input = {k: v for k, v in parsed.items() if k not in reserved}
+    if not isinstance(tool_input, dict):
+        return None
+    return {"name": forced_tool_name, "input": tool_input}
+
+
+def _tool_code_single_arg_name(
+    tools: list[EmulatedToolDef], forced_tool_name: str
+) -> str | None:
+    for tool in tools:
+        if tool.name != forced_tool_name:
+            continue
+        properties = tool.input_schema.get("properties")
+        if not isinstance(properties, dict) or len(properties) != 1:
+            return None
+        only_name = next(iter(properties.keys()), None)
+        return only_name if isinstance(only_name, str) and only_name.strip() else None
+    return None
+
+
+def _tool_code_object_from_text(
+    text: str,
+    forced_tool_name: str,
+    *,
+    single_arg_name: str | None = None,
+) -> dict[str, Any] | None:
+    raw = (text or "").strip()
+    if not raw.startswith("```") or not raw.endswith("```"):
+        return None
+    lines = raw.splitlines()
+    if len(lines) < 2:
+        return None
+    fence = lines[0].strip().lower()
+    language = fence[3:].strip()
+    if language and language not in {"tool_code", "python", "py"}:
+        return None
+    body = "\n".join(lines[1:-1]).strip()
+    call_match = re.fullmatch(rf"{re.escape(forced_tool_name)}\((.*)\)", body, flags=re.S)
+    if not call_match:
+        return None
+    arguments_text = call_match.group(1).strip()
+    if not arguments_text:
+        return {"arguments": {}}
+    if single_arg_name and not re.search(r"\w+\s*=", arguments_text):
+        try:
+            value = json.loads(arguments_text)
+        except Exception:
+            value = arguments_text.strip('"\'')
+        return {"arguments": {single_arg_name: value}}
+    arguments: dict[str, Any] = {}
+    for part in [p.strip() for p in arguments_text.split(",") if p.strip()]:
+        if "=" not in part:
+            return None
+        key, value_text = part.split("=", 1)
+        key = key.strip()
+        value_text = value_text.strip()
+        try:
+            value = json.loads(value_text)
+        except Exception:
+            value = value_text.strip('"\'')
+        arguments[key] = value
+    return {"arguments": arguments}
+
+
+def _parse_tool_call_json(raw: str) -> tuple[str, dict[str, Any]] | None:
+    try:
+        obj = json.loads(_normalize_json(raw))
+    except Exception:
+        return None
+    if not isinstance(obj, dict):
+        return None
+    name = str(obj.get("tool") or obj.get("name") or "").strip()
+    fn = obj.get("function")
+    if not name and isinstance(fn, dict):
+        name = str(fn.get("name") or "").strip()
+    if not name:
+        return None
+    arguments = obj.get("parameters")
+    if arguments is None:
+        arguments = obj.get("arguments")
+    if arguments is None:
+        arguments = obj.get("input")
+    if arguments is None and isinstance(fn, dict):
+        arguments = fn.get("arguments")
+    if isinstance(arguments, str):
+        try:
+            arguments = json.loads(arguments)
+        except Exception:
+            arguments = {}
+    if arguments is None:
+        arguments = {k: v for k, v in obj.items() if k not in {"tool", "name"}}
+    if not isinstance(arguments, dict):
+        return None
+    return name, arguments
+
+
+def _normalize_tool_name(raw: str, available: dict[str, str]) -> str:
+    name = raw.strip()
+    if not name:
+        return ""
+    exact = available.get(name.lower())
+    if exact:
+        return exact
+    key = name.lower().replace("-", "_").replace(" ", "_")
+    aliases = {
+        "bash": "terminal",
+        "shell": "terminal",
+        "read": "read_file",
+        "grep": "search_files",
+        "glob": "search_files",
+        "edit": "patch",
+        "write": "write_file",
+    }
+    mapped = aliases.get(key)
+    if mapped and mapped in available:
+        return available[mapped]
+    return name
+
+
+def _filter_args_by_schema(args: dict[str, Any], schema: dict[str, Any]) -> dict[str, Any]:
+    properties = schema.get("properties")
+    if not isinstance(properties, dict) or not properties:
+        return args
+    return {k: v for k, v in args.items() if k in properties}
+
+
+def _has_required_args(args: dict[str, Any], schema: dict[str, Any]) -> bool:
+    required = schema.get("required")
+    if not isinstance(required, list):
+        return True
+    for key in required:
+        if not isinstance(key, str):
+            continue
+        if key not in args:
+            return False
+        value = args.get(key)
+        if isinstance(value, str) and not value.strip():
+            return False
+    return True
+
+
+def _compact_schema(schema: dict[str, Any]) -> str:
+    properties = schema.get("properties")
+    if not isinstance(properties, dict) or not properties:
+        return ""
+    required = {item for item in schema.get("required", []) if isinstance(item, str)}
+    parts: list[str] = []
+    for key in sorted(properties.keys()):
+        parts.append(key if key in required else f"{key}?")
+    return ", ".join(parts)
+
+
+def _truncate(text: str, max_len: int) -> str:
+    text = text.strip()
+    if len(text) <= max_len:
+        return text
+    return text[:max_len] + "..."
+
+
+def _force_constraint(choice: EmulatedToolChoice) -> str:
+    if choice.mode == "any":
+        return "- You must output at least one ```json action``` block in this reply."
+    if choice.mode == "tool" and choice.name.strip():
+        return f'- You must call "{choice.name.strip()}" in this reply.'
+    return ""
+
+
+def _normalize_json(text: str) -> str:
+    return (
+        text.strip()
+        .replace("“", '"')
+        .replace("”", '"')
+        .replace(",\n}", "\n}")
+        .replace(",\n]", "\n]")
+    )
+
+
+def _tool_call_key(name: str, arguments: dict[str, Any]) -> str:
+    return f"{name.lower()}\0{json.dumps(arguments, ensure_ascii=False, sort_keys=True)}"
+
+
+def _stable_call_id(name: str, arguments: dict[str, Any]) -> str:
+    key = _tool_call_key(name, arguments)
+    return "call_" + uuid.uuid5(uuid.NAMESPACE_OID, key).hex[:16]
--- a/app/lingma_client.py
+++ b/app/lingma_client.py
@@ -419,6 +419,17 @@ class LspWsRpcClient:
        method = msg.get("method")
        params = msg.get("params") or {}

+        if method and (
+            method.startswith("tool/")
+            or method.startswith("mcp/")
+            or method in {"chat/answer", "chat/finish"}
+        ):
+            logger.info(
+                "lingma server message method=%s params=%s",
+                method,
+                params,
+            )
+
        if method == "chat/answer":
            req_id = params.get("requestId")
            stream = self._chat_streams.get(req_id)
--- a/app/main.py
+++ b/app/main.py
@@ -36,6 +36,20 @@ from .http.execution_core import (
    release_execution,
    start_execution,
 )
+from .http.tool_emulation import (
+    action_output_prompt,
+    extract_anthropic_tool_choice as _em_extract_anthropic_tool_choice,
+    extract_anthropic_tools as _em_extract_anthropic_tools,
+    extract_openai_tool_choice as _em_extract_openai_tool_choice,
+    extract_openai_tools as _em_extract_openai_tools,
+    force_tooling_prompt,
+    has_tool_request as _em_has_tool_request,
+    infer_declared_tool_call_from_text,
+    infer_tool_calls_from_text,
+    inject_tooling,
+    openai_tool_call_from_emulated,
+    parse_action_blocks,
+)
 from .http.openai_responses import handle_responses
 from .http.tool_bridge import (
    _allowed_stream_tool_event,
@@ -44,8 +58,10 @@ from .http.tool_bridge import (
    _anthropic_tool_result_block,
    _anthropic_tool_use_block,
    _extract_function_call_event_from_text,
+    _extract_hash_tool_call_event_from_text,
    _extract_tool_calls_from_text,
    _forced_tool_fallback_event,
+    _infer_tool_event_from_declared_tools,
    _json_string,
    _openai_forced_tool_name,
    _openai_tool_call,
@@ -417,6 +433,82 @@ def _messages_to_prompt(messages: list[dict]) -> str:
    return "\n".join(parts).strip()


+def _messages_to_emulation_prompt(
+    messages: list[dict[str, Any]],
+    *,
+    system_text: str,
+    tools: list[dict[str, Any]] | None,
+    tool_choice: Any,
+) -> str:
+    filtered: list[tuple[str, str]] = []
+    for message in messages:
+        role = str(message.get("role") or "").strip().lower()
+        if role in {"system", "developer"}:
+            continue
+        text = flatten_content(message.get("content"))
+        if role == "tool":
+            text = action_output_prompt(message.get("tool_call_id"), text)
+            role = "user"
+        if not text:
+            continue
+        if role not in {"user", "assistant"}:
+            continue
+        filtered.append((role, text))
+
+    if not filtered:
+        return system_text.strip()
+
+    em_tools = _em_extract_openai_tools(tools)
+    em_choice = _em_extract_openai_tool_choice(tool_choice)
+    injected_system = inject_tooling(system_text, em_tools, em_choice)
+
+    parts: list[str] = []
+    for role, text in filtered:
+        label = "User" if role == "user" else "Assistant"
+        parts.append(f"{label}: {text}")
+    if injected_system:
+        parts.append(injected_system)
+    parts.append("Assistant:")
+    return "\n\n".join(parts).strip()
+
+
+def _anthropic_messages_to_emulation_prompt(
+    messages: list[dict[str, Any]],
+    *,
+    system_text: str,
+    tools: list[dict[str, Any]] | None,
+    tool_choice: Any,
+) -> str:
+    filtered: list[tuple[str, str]] = []
+    for message in messages:
+        role = str(message.get("role") or "").strip().lower()
+        text = str(message.get("content") or "").strip()
+        if role == "tool":
+            text = action_output_prompt(message.get("tool_call_id"), text)
+            role = "user"
+        if not text:
+            continue
+        if role not in {"user", "assistant"}:
+            continue
+        filtered.append((role, text))
+
+    if not filtered:
+        return system_text.strip()
+
+    em_tools = _em_extract_anthropic_tools(tools)
+    em_choice = _em_extract_anthropic_tool_choice(tool_choice)
+    injected_system = inject_tooling(system_text, em_tools, em_choice)
+
+    parts: list[str] = []
+    for role, text in filtered:
+        label = "User" if role == "user" else "Assistant"
+        parts.append(f"{label}: {text}")
+    if injected_system:
+        parts.append(injected_system)
+    parts.append("Assistant:")
+    return "\n\n".join(parts).strip()
+
+
 def _include_usage(stream_options: dict | None) -> bool:
    if not isinstance(stream_options, dict):
        return False
@@ -525,6 +617,20 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
    is_reply = execution.is_reply

    include_usage = _include_usage(req.stream_options)
+    em_tools = _em_extract_openai_tools(req.tools)
+    em_choice = _em_extract_openai_tool_choice(req.tool_choice)
+    if _em_has_tool_request(em_tools, em_choice):
+        system_parts = [
+            flatten_content(m.content)
+            for m in req.messages
+            if m.role in {"system", "developer"} and flatten_content(m.content)
+        ]
+        prompt = _messages_to_emulation_prompt(
+            messages_dump,
+            system_text="\n\n".join(system_parts),
+            tools=req.tools,
+            tool_choice=req.tool_choice,
+        )

    try:
        started = await start_execution(
@@ -709,6 +815,11 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
                            merged_text,
                            forced_tool_name=forced_tool_name,
                        )
+                        if inferred is None:
+                            inferred = _extract_hash_tool_call_event_from_text(
+                                merged_text,
+                                forced_tool_name=forced_tool_name,
+                            )
                        if inferred is None:
                            inferred = _forced_tool_fallback_event(
                                merged_text,
@@ -747,6 +858,72 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
                    if buffered_text_parts and forced_tool_name and saw_tool_call:
                        buffered_text_parts.clear()

+                    if buffered_text_parts and req.tools and not saw_tool_call:
+                        merged_text = "".join(buffered_text_parts)
+                        inferred = _infer_tool_event_from_declared_tools(
+                            merged_text,
+                            tools=req.tools,
+                        )
+                        if inferred is not None:
+                            tool_id = "call_inferred_0"
+                            tool_call_indexes[tool_id] = 0
+                            saw_tool_call = True
+                            payload = {
+                                "id": completion_id,
+                                "object": "chat.completion.chunk",
+                                "created": created,
+                                "model": model,
+                                "choices": [
+                                    {
+                                        "index": 0,
+                                        "delta": {
+                                            "tool_calls": [
+                                                {
+                                                    "index": 0,
+                                                    **_openai_tool_call(
+                                                        inferred, forced_id=tool_id
+                                                    ),
+                                                }
+                                            ]
+                                        },
+                                        "finish_reason": None,
+                                    }
+                                ],
+                            }
+                            buffered_text_parts.clear()
+                            yield f"data: {json.dumps(payload, ensure_ascii=False)}\n\n"
+
+                    if buffered_text_parts and req.tools and not saw_tool_call:
+                        merged_text = "".join(buffered_text_parts)
+                        parsed_calls, remaining = parse_action_blocks(merged_text, em_tools)
+                        if parsed_calls:
+                            saw_tool_call = True
+                            for i, call in enumerate(parsed_calls):
+                                tool_id = call.id or f"call_inferred_{i}"
+                                tool_call_indexes[tool_id] = i
+                                payload = {
+                                    "id": completion_id,
+                                    "object": "chat.completion.chunk",
+                                    "created": created,
+                                    "model": model,
+                                    "choices": [
+                                        {
+                                            "index": 0,
+                                            "delta": {
+                                                "tool_calls": [
+                                                    {
+                                                        "index": i,
+                                                        **openai_tool_call_from_emulated(call),
+                                                    }
+                                                ]
+                                            },
+                                            "finish_reason": None,
+                                        }
+                                    ],
+                                }
+                                yield f"data: {json.dumps(payload, ensure_ascii=False)}\n\n"
+                            buffered_text_parts = [remaining] if remaining else []
+
                    done_payload = {
                        "id": completion_id,
                        "object": "chat.completion.chunk",
@@ -866,6 +1043,11 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
                message_content,
                forced_tool_name=forced_tool_name,
            )
+            if inferred is None:
+                inferred = _extract_hash_tool_call_event_from_text(
+                    message_content,
+                    forced_tool_name=forced_tool_name,
+                )
            if inferred is None:
                inferred = _forced_tool_fallback_event(
                    message_content,
@@ -878,6 +1060,59 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
                )
                saw_tool_call = True
                message_content = ""
+        if not saw_tool_call and req.tools:
+            inferred = _infer_tool_event_from_declared_tools(
+                message_content,
+                tools=req.tools,
+            )
+            if inferred is not None:
+                tool_calls.append(
+                    _openai_tool_call(inferred, forced_id="call_inferred_0")
+                )
+                saw_tool_call = True
+                message_content = ""
+        if not saw_tool_call and em_tools:
+            parsed_calls, remaining = parse_action_blocks(message_content, em_tools)
+            if parsed_calls:
+                for call in parsed_calls:
+                    tool_calls.append(openai_tool_call_from_emulated(call))
+                saw_tool_call = True
+                message_content = remaining
+        if not saw_tool_call and em_tools:
+            inferred_call = infer_declared_tool_call_from_text(message_content, em_tools)
+            if inferred_call is None:
+                inferred_calls = infer_tool_calls_from_text(message_content, em_tools)
+                inferred_call = inferred_calls[0] if inferred_calls else None
+            if inferred_call is not None:
+                tool_calls.append(openai_tool_call_from_emulated(inferred_call))
+                saw_tool_call = True
+                message_content = ""
+        if not saw_tool_call and em_tools:
+            retry_prompt = f"{prompt}\n\n{force_tooling_prompt(em_choice)}"
+            retry_result = await inst.client.chat_complete(
+                retry_prompt,
+                model,
+                ask_mode,
+                session_id=None,
+                is_reply=False,
+                tool_config=tool_config,
+            )
+            retry_text = retry_result.get("text") or ""
+            parsed_calls, remaining = parse_action_blocks(retry_text, em_tools)
+            if parsed_calls:
+                for call in parsed_calls:
+                    tool_calls.append(openai_tool_call_from_emulated(call))
+                saw_tool_call = True
+                message_content = remaining
+            else:
+                inferred_call = infer_declared_tool_call_from_text(retry_text, em_tools)
+                if inferred_call is None:
+                    inferred_calls = infer_tool_calls_from_text(retry_text, em_tools)
+                    inferred_call = inferred_calls[0] if inferred_calls else None
+                if inferred_call is not None:
+                    tool_calls.append(openai_tool_call_from_emulated(inferred_call))
+                    saw_tool_call = True
+                    message_content = ""
        response = ChatCompletionResponse(
            id=f"chatcmpl-{uuid.uuid4().hex}",
            created=int(time.time()),
@@ -1049,6 +1284,16 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
    model = execution.model
    prompt = execution.prompt
    is_reply = execution.is_reply
+    em_anthropic_tools = _em_extract_anthropic_tools(req.tools)
+    em_anthropic_choice = _em_extract_anthropic_tool_choice(req.tool_choice)
+    if _em_has_tool_request(em_anthropic_tools, em_anthropic_choice):
+        system_text = flatten_anthropic_content(req.system) if req.system else ""
+        prompt = _anthropic_messages_to_emulation_prompt(
+            messages_dump,
+            system_text=system_text,
+            tools=req.tools,
+            tool_choice=req.tool_choice,
+        )

    try:
        started = await start_execution(
@@ -1090,12 +1335,14 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
            stream_meta: dict = {}
            max_tokens = req.max_tokens
            forced_tool_name = _anthropic_forced_tool_name(req.tool_choice)
+            aggregate_emulated_tools = bool(em_anthropic_tools)

            async def event_stream(_ticket=ticket, _inst=inst, _meta=stream_meta):
                success = False
                block_index = 0
                text_block_open = False
                saw_pending_tool_use = False
+                buffered_text_parts: list[str] = []
                try:
                    # 1) message_start — Anthropic SDKs read this first to get
                    #    the message envelope (id/model/initial usage).
@@ -1196,7 +1443,78 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
                        text = _stream_text(chunk)
                        if not text:
                            continue
-                        completion_tokens_holder["n"] += estimate_tokens(text)
+                        if aggregate_emulated_tools:
+                            buffered_text_parts.append(text)
+                            completion_tokens_holder["n"] += estimate_tokens(text)
+                            continue
+
+                        buffered_text_parts.append(text)
+                        merged_text = "".join(buffered_text_parts)
+
+                        parsed_calls, remaining = parse_action_blocks(
+                            merged_text, em_anthropic_tools
+                        )
+                        if not parsed_calls:
+                            inferred = infer_declared_tool_call_from_text(
+                                merged_text,
+                                em_anthropic_tools,
+                            )
+                            if inferred is None:
+                                inferred_calls = infer_tool_calls_from_text(
+                                    merged_text,
+                                    em_anthropic_tools,
+                                )
+                                inferred = inferred_calls[0] if inferred_calls else None
+                            if inferred is not None:
+                                parsed_calls = [inferred]
+                                remaining = ""
+
+                        if parsed_calls:
+                            if text_block_open:
+                                yield _sse(
+                                    "content_block_stop",
+                                    {"type": "content_block_stop", "index": block_index},
+                                )
+                                block_index += 1
+                                text_block_open = False
+                            saw_pending_tool_use = True
+                            for call in parsed_calls:
+                                yield _sse(
+                                    "content_block_start",
+                                    {
+                                        "type": "content_block_start",
+                                        "index": block_index,
+                                        "content_block": {
+                                            "type": "tool_use",
+                                            "id": call.id,
+                                            "name": call.name,
+                                            "input": {},
+                                        },
+                                    },
+                                )
+                                yield _sse(
+                                    "content_block_delta",
+                                    {
+                                        "type": "content_block_delta",
+                                        "index": block_index,
+                                        "delta": {
+                                            "type": "input_json_delta",
+                                            "partial_json": json.dumps(call.arguments, ensure_ascii=False),
+                                        },
+                                    },
+                                )
+                                yield _sse(
+                                    "content_block_stop",
+                                    {"type": "content_block_stop", "index": block_index},
+                                )
+                                block_index += 1
+                            buffered_text_parts = [remaining] if remaining else []
+                            if not buffered_text_parts:
+                                continue
+
+                        text_to_emit = "".join(buffered_text_parts)
+                        buffered_text_parts.clear()
+                        completion_tokens_holder["n"] += estimate_tokens(text_to_emit)
                        if not text_block_open:
                            yield _sse(
                                "content_block_start",
@@ -1213,10 +1531,106 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
                            {
                                "type": "content_block_delta",
                                "index": block_index,
-                                "delta": {"type": "text_delta", "text": text},
+                                "delta": {"type": "text_delta", "text": text_to_emit},
                            },
                        )

+                    if aggregate_emulated_tools:
+                        merged_text = "".join(buffered_text_parts)
+                        parsed_calls, remaining = parse_action_blocks(
+                            merged_text, em_anthropic_tools
+                        )
+                        if not parsed_calls:
+                            inferred = infer_declared_tool_call_from_text(
+                                merged_text,
+                                em_anthropic_tools,
+                            )
+                            if inferred is None:
+                                inferred_calls = infer_tool_calls_from_text(
+                                    merged_text,
+                                    em_anthropic_tools,
+                                )
+                                inferred = inferred_calls[0] if inferred_calls else None
+                            if inferred is not None:
+                                parsed_calls = [inferred]
+                                remaining = ""
+
+                        if parsed_calls:
+                            if remaining.strip():
+                                yield _sse(
+                                    "content_block_start",
+                                    {
+                                        "type": "content_block_start",
+                                        "index": block_index,
+                                        "content_block": {"type": "text", "text": ""},
+                                    },
+                                )
+                                yield _sse(
+                                    "content_block_delta",
+                                    {
+                                        "type": "content_block_delta",
+                                        "index": block_index,
+                                        "delta": {"type": "text_delta", "text": remaining},
+                                    },
+                                )
+                                yield _sse(
+                                    "content_block_stop",
+                                    {"type": "content_block_stop", "index": block_index},
+                                )
+                                block_index += 1
+                            for call in parsed_calls:
+                                saw_pending_tool_use = True
+                                yield _sse(
+                                    "content_block_start",
+                                    {
+                                        "type": "content_block_start",
+                                        "index": block_index,
+                                        "content_block": {
+                                            "type": "tool_use",
+                                            "id": call.id,
+                                            "name": call.name,
+                                            "input": {},
+                                        },
+                                    },
+                                )
+                                yield _sse(
+                                    "content_block_delta",
+                                    {
+                                        "type": "content_block_delta",
+                                        "index": block_index,
+                                        "delta": {
+                                            "type": "input_json_delta",
+                                            "partial_json": json.dumps(call.arguments, ensure_ascii=False),
+                                        },
+                                    },
+                                )
+                                yield _sse(
+                                    "content_block_stop",
+                                    {"type": "content_block_stop", "index": block_index},
+                                )
+                                block_index += 1
+                        elif merged_text.strip():
+                            yield _sse(
+                                "content_block_start",
+                                {
+                                    "type": "content_block_start",
+                                    "index": block_index,
+                                    "content_block": {"type": "text", "text": ""},
+                                },
+                            )
+                            yield _sse(
+                                "content_block_delta",
+                                {
+                                    "type": "content_block_delta",
+                                    "index": block_index,
+                                    "delta": {"type": "text_delta", "text": merged_text},
+                                },
+                            )
+                            yield _sse(
+                                "content_block_stop",
+                                {"type": "content_block_stop", "index": block_index},
+                            )
+
                    if text_block_open:
                        yield _sse(
                            "content_block_stop",
@@ -1323,6 +1737,135 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
            else:
                saw_pending_tool_use = True

+        if not saw_tool_event and em_anthropic_tools:
+            parsed_calls, remaining = parse_action_blocks(text, em_anthropic_tools)
+            if parsed_calls:
+                content_blocks = []
+                if remaining:
+                    content_blocks.append({"type": "text", "text": remaining})
+                for call in parsed_calls:
+                    content_blocks.append(
+                        {
+                            "type": "tool_use",
+                            "id": call.id,
+                            "name": call.name,
+                            "input": call.arguments,
+                        }
+                    )
+                saw_tool_event = True
+                saw_pending_tool_use = True
+                text = remaining
+
+        if not saw_tool_event and em_anthropic_tools:
+            inferred_call = infer_declared_tool_call_from_text(text, em_anthropic_tools)
+            if inferred_call is None:
+                inferred_calls = infer_tool_calls_from_text(text, em_anthropic_tools)
+                inferred_call = inferred_calls[0] if inferred_calls else None
+            if inferred_call is not None:
+                content_blocks = [
+                    {
+                        "type": "tool_use",
+                        "id": inferred_call.id,
+                        "name": inferred_call.name,
+                        "input": inferred_call.arguments,
+                    }
+                ]
+                saw_tool_event = True
+                saw_pending_tool_use = True
+                text = ""
+
+        if not saw_tool_event and em_anthropic_tools:
+            retry_prompt = f"{prompt}\n\n{force_tooling_prompt(em_anthropic_choice)}"
+            retry_result = await inst.client.chat_complete(
+                retry_prompt,
+                model,
+                ask_mode,
+                session_id=None,
+                is_reply=False,
+                tool_config=tool_config,
+            )
+            retry_text = retry_result.get("text") or ""
+            parsed_calls, remaining = parse_action_blocks(retry_text, em_anthropic_tools)
+            if parsed_calls:
+                content_blocks = []
+                if remaining:
+                    content_blocks.append({"type": "text", "text": remaining})
+                for call in parsed_calls:
+                    content_blocks.append(
+                        {
+                            "type": "tool_use",
+                            "id": call.id,
+                            "name": call.name,
+                            "input": call.arguments,
+                        }
+                    )
+                saw_tool_event = True
+                saw_pending_tool_use = True
+                text = remaining
+            else:
+                inferred_call = infer_declared_tool_call_from_text(retry_text, em_anthropic_tools)
+                if inferred_call is None:
+                    inferred_calls = infer_tool_calls_from_text(retry_text, em_anthropic_tools)
+                    inferred_call = inferred_calls[0] if inferred_calls else None
+                if inferred_call is not None:
+                    content_blocks = [
+                        {
+                            "type": "tool_use",
+                            "id": inferred_call.id,
+                            "name": inferred_call.name,
+                            "input": inferred_call.arguments,
+                        }
+                    ]
+                    saw_tool_event = True
+                    saw_pending_tool_use = True
+                    text = ""
+
+        if not saw_tool_event and em_anthropic_tools and text.strip():
+            retry_prompt = f"{prompt}\n\n{force_tooling_prompt(em_anthropic_choice)}"
+            retry_result = await inst.client.chat_complete(
+                retry_prompt,
+                model,
+                ask_mode,
+                session_id=None,
+                is_reply=False,
+                tool_config=tool_config,
+            )
+            retry_text = retry_result.get("text") or ""
+            parsed_calls, remaining = parse_action_blocks(retry_text, em_anthropic_tools)
+            if parsed_calls:
+                content_blocks = []
+                if remaining:
+                    content_blocks.append({"type": "text", "text": remaining})
+                for call in parsed_calls:
+                    content_blocks.append(
+                        {
+                            "type": "tool_use",
+                            "id": call.id,
+                            "name": call.name,
+                            "input": call.arguments,
+                        }
+                    )
+                saw_tool_event = True
+                saw_pending_tool_use = True
+                text = remaining
+            else:
+                inferred_call = infer_declared_tool_call_from_text(retry_text, em_anthropic_tools)
+                if inferred_call is None:
+                    inferred_calls = infer_tool_calls_from_text(retry_text, em_anthropic_tools)
+                    inferred_call = inferred_calls[0] if inferred_calls else None
+                if inferred_call is not None:
+                    content_blocks = [
+                        {
+                            "type": "tool_use",
+                            "id": inferred_call.id,
+                            "name": inferred_call.name,
+                            "input": inferred_call.arguments,
+                        }
+                    ]
+                    saw_tool_event = True
+                    saw_pending_tool_use = True
+                    text = ""
+
        if not saw_tool_event and forced_tool_name:
            inferred = _extract_function_call_event_from_text(
                text,
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,3 +3,4 @@ uvicorn[standard]==0.30.6
 websockets==13.1
 pydantic==2.9.2
 playwright==1.52.0
+mcp==1.12.4
--- a/scripts/smoke_tool_calls.sh
+++ b/scripts/smoke_tool_calls.sh
@@ -0,0 +1,117 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
+ENV_FILE="$ROOT_DIR/.env"
+
+if [[ ! -f "$ENV_FILE" ]]; then
+  printf 'missing .env: %s\n' "$ENV_FILE" >&2
+  exit 1
+fi
+
+PORT="$(python3 - <<'PY'
+from pathlib import Path
+env = Path("/root/lingma-openai-gateway/.env")
+vals = {}
+for line in env.read_text().splitlines():
+    line = line.strip()
+    if not line or line.startswith('#') or '=' not in line:
+        continue
+    k, v = line.split('=', 1)
+    vals[k.strip()] = v.strip()
+print(vals.get('PORT', '13013'))
+PY
+)"
+
+API_KEY="$(python3 - <<'PY'
+from pathlib import Path
+env = Path("/root/lingma-openai-gateway/.env")
+vals = {}
+for line in env.read_text().splitlines():
+    line = line.strip()
+    if not line or line.startswith('#') or '=' not in line:
+        continue
+    k, v = line.split('=', 1)
+    vals[k.strip()] = v.strip()
+keys = vals.get('API_KEYS', '')
+print(keys.split(',')[0].strip())
+PY
+)"
+
+BASE_URL="http://127.0.0.1:${PORT}"
+
+printf '\n[1/5] /v1/models\n'
+curl -fsS "$BASE_URL/v1/models" \
+  -H "Authorization: Bearer ${API_KEY}" | python3 -m json.tool
+
+printf '\n[2/5] OpenAI non-stream tool call\n'
+curl -fsS "$BASE_URL/v1/chat/completions" \
+  -H "Authorization: Bearer ${API_KEY}" \
+  -H 'Content-Type: application/json' \
+  -d '{
+    "model": "org_auto",
+    "stream": false,
+    "messages": [
+      {"role": "system", "content": "Use tools when available."},
+      {"role": "user", "content": "Use fetch_weather for Hangzhou and return the tool call."}
+    ],
+    "tools": [
+      {"type": "function", "function": {"name": "fetch_weather", "description": "Get weather for a city", "parameters": {"type": "object", "properties": {"city": {"type": "string"}}, "required": ["city"]}}}
+    ],
+    "tool_choice": {"type": "function", "function": {"name": "fetch_weather"}}
+  }' | python3 -m json.tool
+
+printf '\n[3/5] Anthropic non-stream tool use\n'
+curl -fsS "$BASE_URL/v1/messages" \
+  -H "x-api-key: ${API_KEY}" \
+  -H 'anthropic-version: 2023-06-01' \
+  -H 'Content-Type: application/json' \
+  -d '{
+    "model": "claude-3-5-sonnet-20241022",
+    "max_tokens": 256,
+    "stream": false,
+    "messages": [
+      {"role": "user", "content": "Use fetch_weather for Hangzhou and return the tool call."}
+    ],
+    "tools": [
+      {"name": "fetch_weather", "description": "Get weather for a city", "input_schema": {"type": "object", "properties": {"city": {"type": "string"}}, "required": ["city"]}}
+    ],
+    "tool_choice": {"type": "tool", "name": "fetch_weather"}
+  }' | python3 -m json.tool
+
+printf '\n[4/5] OpenAI stream tool call\n'
+curl -fsS -N "$BASE_URL/v1/chat/completions" \
+  -H "Authorization: Bearer ${API_KEY}" \
+  -H 'Content-Type: application/json' \
+  -d '{
+    "model": "org_auto",
+    "stream": true,
+    "messages": [
+      {"role": "system", "content": "Use tools when available."},
+      {"role": "user", "content": "Use fetch_weather for Hangzhou and return the tool call."}
+    ],
+    "tools": [
+      {"type": "function", "function": {"name": "fetch_weather", "description": "Get weather for a city", "parameters": {"type": "object", "properties": {"city": {"type": "string"}}, "required": ["city"]}}}
+    ],
+    "tool_choice": {"type": "function", "function": {"name": "fetch_weather"}}
+  }'
+
+printf '\n[5/5] Anthropic stream tool use\n'
+curl -fsS -N "$BASE_URL/v1/messages" \
+  -H "x-api-key: ${API_KEY}" \
+  -H 'anthropic-version: 2023-06-01' \
+  -H 'Content-Type: application/json' \
+  -d '{
+    "model": "claude-3-5-sonnet-20241022",
+    "max_tokens": 256,
+    "stream": true,
+    "messages": [
+      {"role": "user", "content": "Use fetch_weather for Hangzhou and return the tool call."}
+    ],
+    "tools": [
+      {"name": "fetch_weather", "description": "Get weather for a city", "input_schema": {"type": "object", "properties": {"city": {"type": "string"}}, "required": ["city"]}}
+    ],
+    "tool_choice": {"type": "tool", "name": "fetch_weather"}
+  }'
+
+printf '\nsmoke tool-call checks completed\n'
--- a/tests/TEST_PLAN.md
+++ b/tests/TEST_PLAN.md
@@ -42,6 +42,7 @@
 1. 定点执行新增测试文件。
 2. 全量执行 `tests/` 下 `test_*.py`。
 3. 汇总通过率与失败项（若失败，给出定位与修复建议）。
+4. Docker 运行态执行 `bash scripts/smoke_tool_calls.sh`，验证 OpenAI / Anthropic 的 stream / non-stream 工具调用。

 ## 6. 执行命令
 ```bash
@@ -50,4 +51,5 @@ python3 -m unittest tests/test_session_cache_tooling.py
 python3 -m unittest tests/test_schema_normalization.py
 python3 -m unittest tests/test_tool_call_bridge.py
 python3 -m unittest discover -s tests -p "test_*.py"
+bash scripts/smoke_tool_calls.sh
 ```
--- a/tests/test_pool_stats_config.py
+++ b/tests/test_pool_stats_config.py
@@ -3,10 +3,12 @@ from __future__ import annotations
 import json
 import os
 import sys
+import tempfile
 import types
 import unittest
 from types import SimpleNamespace
 from unittest.mock import patch
+import zipfile

 # app.lingma_pool imports auto_login; tests here don't execute Playwright paths.
 # Stub module import so test environments without playwright can import pool code.
@@ -28,6 +30,7 @@ sys.modules.setdefault("playwright", _playwright)
 sys.modules.setdefault("playwright.async_api", _playwright_async)

 from app.config import _parse_accounts, load_settings
+from app.bootstrap_lingma import bootstrap_from_vsix
 from app.lingma_pool import LingmaPool
 from app.stats import StatsCollector, estimate_tokens

@@ -212,5 +215,57 @@ class ConfigParsingTests(unittest.TestCase):
        self.assertEqual(settings.tool_allowlist, [])


+class BootstrapLingmaTests(unittest.TestCase):
+    def _make_test_vsix(self, root: str) -> str:
+        nested_zip_path = os.path.join(root, "nested.zip")
+        with zipfile.ZipFile(nested_zip_path, "w") as nested:
+            nested.writestr("2.5.20/x86_64_linux/Lingma", b"new-binary")
+            nested.writestr("2.5.20/extension/main.js", b"console.log('ok')")
+
+        vsix_path = os.path.join(root, "test.vsix")
+        with zipfile.ZipFile(vsix_path, "w") as vsix:
+            with open(nested_zip_path, "rb") as nested_file:
+                vsix.writestr(
+                    "extension/dist/bin/lingma-2.5.20.zip",
+                    nested_file.read(),
+                )
+        return vsix_path
+
+    def test_bootstrap_refreshes_when_extension_assets_missing(self) -> None:
+        with tempfile.TemporaryDirectory() as tmpdir:
+            bin_dir = os.path.join(tmpdir, "data", "bin")
+            release_dir = os.path.join(bin_dir, "2.5.20")
+            os.makedirs(release_dir, exist_ok=True)
+
+            lingma_bin = os.path.join(bin_dir, "Lingma")
+            with open(lingma_bin, "wb") as f:
+                f.write(b"old-binary")
+
+            marker = {
+                "version": "2.5.20",
+                "release_root": "2.5.20",
+            }
+            with open(os.path.join(bin_dir, ".lingma-bootstrap.json"), "w", encoding="utf-8") as f:
+                json.dump(marker, f)
+
+            vsix_path = self._make_test_vsix(tmpdir)
+
+            env = {
+                "LINGMA_BIN": lingma_bin,
+                "LINGMA_SOURCE_TYPE": "vsix",
+                "LINGMA_VSIX_URL": f"file://{vsix_path}",
+                "LINGMA_BOOTSTRAP_ALWAYS": "false",
+                "LINGMA_FORCE_REFRESH": "false",
+            }
+            with patch.dict(os.environ, env, clear=False):
+                bootstrap_from_vsix()
+
+            with open(lingma_bin, "rb") as f:
+                self.assertEqual(f.read(), b"new-binary")
+            self.assertTrue(
+                os.path.exists(os.path.join(release_dir, "extension", "main.js"))
+            )
+
+
 if __name__ == "__main__":
    unittest.main()
--- a/tests/test_tool_call_bridge.py
+++ b/tests/test_tool_call_bridge.py
@@ -388,6 +388,169 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
            {"query": "gateway"},
        )

+    async def test_openai_non_stream_synthesizes_tool_call_from_hash_tool_call_block(
+        self,
+    ) -> None:
+        fake_client = _FakeClient(
+            stream_events=[],
+            complete_result={
+                "text": '#Tool Call\n```fetch_weather\n{"city": "Hangzhou"}\n```\n',
+                "toolEvents": [],
+                "sessionId": "sess-fallback-hash-tool-call-openai",
+            },
+        )
+        req = ChatCompletionsRequest(
+            model="org_auto",
+            messages=[{"role": "user", "content": "hi"}],
+            stream=False,
+            tools=[
+                {
+                    "type": "function",
+                    "function": {
+                        "name": "fetch_weather",
+                        "parameters": {
+                            "type": "object",
+                            "properties": {"city": {"type": "string"}},
+                            "required": ["city"],
+                        },
+                    },
+                }
+            ],
+            tool_choice={"type": "function", "function": {"name": "fetch_weather"}},
+        )
+
+        with (
+            patch.object(main, "pool", _FakePool(_FakeInstance(fake_client))),
+            patch.object(main, "chat_guard", _FakeGuard()),
+            patch.object(
+                main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"})
+            ),
+            patch.object(
+                main.stats_collector, "record_chat", AsyncMock(return_value=None)
+            ),
+        ):
+            response = await main.v1_chat_completions(
+                req, _make_request("/v1/chat/completions")
+            )
+
+        payload = json.loads(response.body)
+        message = payload["choices"][0]["message"]
+        self.assertEqual(payload["choices"][0]["finish_reason"], "tool_calls")
+        self.assertEqual(message["content"], "")
+        self.assertEqual(message["tool_calls"][0]["function"]["name"], "fetch_weather")
+        self.assertEqual(
+            json.loads(message["tool_calls"][0]["function"]["arguments"]),
+            {"city": "Hangzhou"},
+        )
+
+    async def test_openai_non_stream_synthesizes_tool_call_from_hash_tool_call_block_without_tool_choice(
+        self,
+    ) -> None:
+        fake_client = _FakeClient(
+            stream_events=[],
+            complete_result={
+                "text": '#Tool Call\n```fetch_weather\n{"city": "Hangzhou"}\n```\n',
+                "toolEvents": [],
+                "sessionId": "sess-fallback-hash-tool-call-openai-no-choice",
+            },
+        )
+        req = ChatCompletionsRequest(
+            model="org_auto",
+            messages=[{"role": "user", "content": "hi"}],
+            stream=False,
+            tools=[
+                {
+                    "type": "function",
+                    "function": {
+                        "name": "fetch_weather",
+                        "parameters": {
+                            "type": "object",
+                            "properties": {"city": {"type": "string"}},
+                            "required": ["city"],
+                        },
+                    },
+                }
+            ],
+        )
+
+        with (
+            patch.object(main, "pool", _FakePool(_FakeInstance(fake_client))),
+            patch.object(main, "chat_guard", _FakeGuard()),
+            patch.object(
+                main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"})
+            ),
+            patch.object(
+                main.stats_collector, "record_chat", AsyncMock(return_value=None)
+            ),
+        ):
+            response = await main.v1_chat_completions(
+                req, _make_request("/v1/chat/completions")
+            )
+
+        payload = json.loads(response.body)
+        message = payload["choices"][0]["message"]
+        self.assertEqual(payload["choices"][0]["finish_reason"], "tool_calls")
+        self.assertEqual(message["content"], "")
+        self.assertEqual(message["tool_calls"][0]["function"]["name"], "fetch_weather")
+        self.assertEqual(
+            json.loads(message["tool_calls"][0]["function"]["arguments"]),
+            {"city": "Hangzhou"},
+        )
+
+    async def test_openai_non_stream_synthesizes_tool_call_from_json_action_block(
+        self,
+    ) -> None:
+        fake_client = _FakeClient(
+            stream_events=[],
+            complete_result={
+                "text": '```json action\n{"tool":"fetch_weather","parameters":{"city":"Hangzhou"}}\n```',
+                "toolEvents": [],
+                "sessionId": "sess-action-block-openai",
+            },
+        )
+        req = ChatCompletionsRequest(
+            model="org_auto",
+            messages=[{"role": "user", "content": "hi"}],
+            stream=False,
+            tools=[
+                {
+                    "type": "function",
+                    "function": {
+                        "name": "fetch_weather",
+                        "parameters": {
+                            "type": "object",
+                            "properties": {"city": {"type": "string"}},
+                            "required": ["city"],
+                        },
+                    },
+                }
+            ],
+        )
+
+        with (
+            patch.object(main, "pool", _FakePool(_FakeInstance(fake_client))),
+            patch.object(main, "chat_guard", _FakeGuard()),
+            patch.object(
+                main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"})
+            ),
+            patch.object(
+                main.stats_collector, "record_chat", AsyncMock(return_value=None)
+            ),
+        ):
+            response = await main.v1_chat_completions(
+                req, _make_request("/v1/chat/completions")
+            )
+
+        payload = json.loads(response.body)
+        message = payload["choices"][0]["message"]
+        self.assertEqual(payload["choices"][0]["finish_reason"], "tool_calls")
+        self.assertEqual(message["content"], "")
+        self.assertEqual(message["tool_calls"][0]["function"]["name"], "fetch_weather")
+        self.assertEqual(
+            json.loads(message["tool_calls"][0]["function"]["arguments"]),
+            {"city": "Hangzhou"},
+        )
+
    async def test_openai_stream_synthesizes_tool_call_from_tool_code(
        self,
    ) -> None:
@@ -439,6 +602,55 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
        self.assertIn('"finish_reason": "tool_calls"', body)
        self.assertIn("data: [DONE]", body)

+    async def test_openai_stream_synthesizes_tool_call_from_hash_tool_call_block_without_tool_choice(
+        self,
+    ) -> None:
+        fake_client = _FakeClient(
+            stream_events=[
+                {"type": "text", "text": "#Tool Call\n```fetch_weather\n"},
+                {"type": "text", "text": '{"city": "Hangzhou"}\n'},
+                {"type": "text", "text": "```\n"},
+            ],
+            complete_result={},
+        )
+        req = ChatCompletionsRequest(
+            model="org_auto",
+            messages=[{"role": "user", "content": "hi"}],
+            stream=True,
+            tools=[
+                {
+                    "type": "function",
+                    "function": {
+                        "name": "fetch_weather",
+                        "parameters": {
+                            "type": "object",
+                            "properties": {"city": {"type": "string"}},
+                            "required": ["city"],
+                        },
+                    },
+                }
+            ],
+        )
+
+        with (
+            patch.object(main, "pool", _FakePool(_FakeInstance(fake_client))),
+            patch.object(main, "chat_guard", _FakeGuard()),
+            patch.object(
+                main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"})
+            ),
+            patch.object(
+                main.stats_collector, "record_chat", AsyncMock(return_value=None)
+            ),
+        ):
+            response = await main.v1_chat_completions(
+                req, _make_request("/v1/chat/completions")
+            )
+            body = await _collect_stream(response)
+
+        self.assertIn('"tool_calls"', body)
+        self.assertIn('"fetch_weather"', body)
+        self.assertIn('"finish_reason": "tool_calls"', body)
+
    async def test_openai_non_stream_synthesizes_tool_call_from_json_array(self) -> None:
        fake_client = _FakeClient(
            stream_events=[],
@@ -1918,6 +2130,117 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
        self.assertEqual(messages_dump[3]["role"], "user")
        self.assertEqual(messages_dump[3]["content"], "follow up")

+    async def test_openai_tool_result_is_emulated_into_followup_prompt(self) -> None:
+        spy_client = _SpyClient(
+            stream_events=[],
+            complete_result={
+                "text": "done",
+                "toolEvents": [],
+                "sessionId": "sess-emulated-tool-result",
+            },
+        )
+        req = ChatCompletionsRequest(
+            model="org_auto",
+            messages=[
+                {"role": "assistant", "content": None, "tool_calls": [{
+                    "id": "call_1",
+                    "type": "function",
+                    "function": {"name": "fetch_weather", "arguments": '{"city":"Hangzhou"}'},
+                }]},
+                {"role": "tool", "tool_call_id": "call_1", "content": '{"temperature":"22C"}'},
+                {"role": "user", "content": "continue"},
+            ],
+            stream=False,
+            tools=[
+                {
+                    "type": "function",
+                    "function": {
+                        "name": "fetch_weather",
+                        "parameters": {
+                            "type": "object",
+                            "properties": {"city": {"type": "string"}},
+                            "required": ["city"],
+                        },
+                    },
+                }
+            ],
+        )
+
+        with (
+            patch.object(main, "pool", _FakePool(_FakeInstance(spy_client))),
+            patch.object(main, "chat_guard", _FakeGuard()),
+            patch.object(
+                main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"})
+            ),
+            patch.object(
+                main.stats_collector, "record_chat", AsyncMock(return_value=None)
+            ),
+        ):
+            await main.v1_chat_completions(req, _make_request("/v1/chat/completions"))
+
+        prompt = spy_client.last_complete_args[0]
+        self.assertIn("Tool result for call_1:", prompt)
+        self.assertIn('{"temperature":"22C"}', prompt)
+        self.assertIn("Assistant:", prompt)
+
+    async def test_anthropic_non_stream_synthesizes_tool_use_from_json_action_block(
+        self,
+    ) -> None:
+        fake_client = _FakeClient(
+            stream_events=[],
+            complete_result={
+                "text": '```json action\n{"tool":"fetch_weather","parameters":{"city":"Hangzhou"}}\n```',
+                "toolEvents": [],
+                "sessionId": "sess-anthropic-action-block",
+            },
+        )
+        req = AnthropicMessagesRequest(
+            model="claude-3-5-sonnet-20241022",
+            max_tokens=64,
+            messages=[{"role": "user", "content": "weather"}],
+            stream=False,
+            tools=[
+                {
+                    "name": "fetch_weather",
+                    "description": "Get weather for a city",
+                    "input_schema": {
+                        "type": "object",
+                        "properties": {"city": {"type": "string"}},
+                        "required": ["city"],
+                    },
+                }
+            ],
+            tool_choice={"type": "tool", "name": "fetch_weather"},
+        )
+
+        with (
+            patch.object(main, "pool", _FakePool(_FakeInstance(fake_client))),
+            patch.object(main, "chat_guard", _FakeGuard()),
+            patch.object(
+                main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"})
+            ),
+            patch.object(
+                main.stats_collector, "record_chat", AsyncMock(return_value=None)
+            ),
+            patch.object(main.settings, "api_keys", ["test-key"]),
+        ):
+            response = await main.v1_messages(
+                req,
+                _make_request(
+                    "/v1/messages",
+                    headers={
+                        "x-api-key": "test-key",
+                        "anthropic-version": "2023-06-01",
+                    },
+                ),
+            )
+
+        payload = json.loads(response.body)
+        tool_blocks = [item for item in payload["content"] if item["type"] == "tool_use"]
+        self.assertEqual(payload["stop_reason"], "tool_use")
+        self.assertEqual(tool_blocks[0]["name"], "fetch_weather")
+        self.assertEqual(tool_blocks[0]["input"], {"city": "Hangzhou"})
+
    async def test_responses_stream_bridges_text_tool_and_completed_events(
        self,
    ) -> None: