Add experimental Lingma remote backend

2026-04-30 12:09:51 +08:00
parent 1c188fcf17
commit 2bcb0a6715
15 changed files with 1543 additions and 37 deletions
--- a/README.md
+++ b/README.md
@@ -6,9 +6,14 @@ Lingma IPC Proxy exposes Tongyi Lingma's local IDE plugin capability as standard
 The project is designed for tools such as Claude Code, Cline, Continue, OpenCode, custom agents, and any client that can talk to OpenAI or Anthropic style APIs.
 The proxy now supports two backend modes:
 - **IPC plugin mode (default)**: connects to the local Lingma IDE plugin over WebSocket / Named Pipe. This is the safest daily mode and keeps behavior closest to the IDE plugin.
 - **Remote API mode (experimental)**: imports the local Lingma login cache or an explicit credential file and calls Lingma remote APIs directly. This can feel more like an official API and does not depend on an IDE IPC session, but it relies on non-public login and signing details that may change.
 ## Current Version
-The current desktop line is `v1.3.2`.
+The current desktop line is `v1.4.0`.
 Release builds are produced by GitHub Actions for:
@@ -63,9 +68,11 @@ Narrow window layout:
 | API | Endpoint | Support |
 | --- | --- | --- |
-| Health | `GET /` and `GET /health` | supported |
+| Health | `GET /`, `HEAD /`, `GET /health`, `HEAD /health` | supported |
 | Models | `GET /v1/models` | supported |
 | Capability Discovery | `GET /capabilities`, `GET /v1/capabilities` | supported |
 | Debug Requests | `GET /debug/requests`, `GET /debug/logs` | recent HTTP request history |
 | Debug Aliases | `GET /api/requests`, `GET /api/logs` | aliases for request/log inspection |
 | LM Studio / Ollama Discovery | `GET /api/v1/models`, `GET /api/tags`, `GET /props` | supported |
 | OpenAI Chat Completions | `POST /v1/chat/completions` | streaming and non-streaming |
 | OpenAI Chat Alias | `POST /api/v1/chat/completions` | supported |
@@ -78,6 +85,7 @@ Compared with the original protocol proof of concept, this repository focuses on
 - **Function Calling / Tools** for both OpenAI and Anthropic clients.
 - **Tool result continuation** for multi-step agent loops.
 - **Tool stability hardening** with proxy-side routing hints, core tool examples, missed-tool retry, and common alias mapping such as `Bash` to `terminal` and `Read` to `read_file`.
 - **Anthropic streaming tool-call hardening** so streaming clients such as Claude Code receive final `tool_use` events instead of premature refusal text when tools are present.
 - **Image input** for OpenAI `image_url` and Anthropic image blocks.
 - **Local and remote image normalization** for data URLs, HTTP URLs, `file://` URLs, and absolute local paths, with automatic JPEG downscaling for large images.
 - **Request log image redaction** so large base64 payloads are visible as image markers instead of breaking the desktop log view.
@@ -120,11 +128,14 @@ flowchart LR
  Service --> Session["Session Manager"]
  Service --> Tools["Tool Emulation"]
  Service --> Models["Model Discovery"]
-  Service --> Transport["Lingma Transport"]
+  Service --> Backend{"Backend Mode"}
  Backend --> Transport["IPC Plugin Transport"]
  Backend --> Remote["Remote API Client"]
  Transport --> Pipe["Windows Named Pipe"]
  Transport --> WS["macOS / Windows WebSocket"]
  Pipe --> Lingma["Tongyi Lingma IDE Plugin"]
  WS --> Lingma
  Remote --> Cloud["Lingma Remote API"]
 ```
 ### Module Layout
@@ -135,6 +146,7 @@ flowchart LR
 | `internal/httpapi` | OpenAI/Anthropic HTTP routes, streaming SSE responses, request recording |
 | `internal/service` | request orchestration, sessions, model discovery, proxy lifecycle |
 | `internal/lingmaipc` | Lingma JSON-RPC transport over Named Pipe and WebSocket |
 | `internal/remote` | remote Lingma login-cache import, signing, model list, and SSE parsing |
 | `internal/toolemulation` | tool definition injection, action block parsing, tool result projection |
 | `desktop` | Wails desktop shell, native window commands, proxy control bridge |
 | `desktop/frontend` | Vue UI for dashboard, requests, models, settings, and logs |
@@ -155,6 +167,66 @@ lingma-ipc-proxy --transport websocket --ws-url ws://127.0.0.1:36510 --port 8095
 lingma-ipc-proxy --transport pipe --pipe '\\.\pipe\lingma-ipc'
 ```
 ## Backend Modes
 ### IPC Plugin Mode (Default)
 IPC mode talks to the local Lingma IDE plugin:
 ```bash
 lingma-ipc-proxy --backend ipc --transport auto --port 8095
 ```
 Use this when VS Code / the Lingma plugin is already running, when you want plugin session behavior, or when you want the model list exposed by the local plugin.
 ### Remote API Mode (Experimental)
 Remote mode calls Lingma's remote API directly:
 ```bash
 lingma-ipc-proxy --backend remote --port 8095
 ```
 By default it reads the local Lingma login cache in read-only mode:
 ```text
 ~/.lingma/cache/user
 ~/.lingma/cache/id
 ~/.lingma/logs/lingma.log
 ```
 You can also pass an explicit credential file:
 ```bash
 lingma-ipc-proxy \
  --backend remote \
  --remote-base-url https://lingma.alibabacloud.com \
  --remote-auth-file ~/.config/lingma-ipc-proxy/credentials.json
 ```
 Credential file format:
 ```json
 {
  "source": "manual",
  "token_expire_time": "1777520000000",
  "auth": {
    "cosy_key": "xxx",
    "encrypt_user_info": "xxx",
    "user_id": "123",
    "machine_id": "xxxxxxxxxxxxxxxx"
  }
 }
 ```
 Notes:
 - Remote mode does not write or migrate login state. It only reads the local Lingma cache or the credential file you provide.
 - If your Lingma plugin uses a dedicated domain, set `--remote-base-url`, `LINGMA_REMOTE_BASE_URL`, or the JSON config field explicitly.
 - `/v1/models` in remote mode returns remote API model keys, which may not match the IPC plugin display IDs such as `MiniMax-M2.7` or `Kimi-K2.6`.
 - Local validation passed `/health`, `/v1/models`, OpenAI streaming/non-streaming chat, and Claude Code Anthropic + Bash tool use. Claude Code full tool runs are much slower than simple OpenAI requests because the client sends a large context and performs a second tool-result turn.
 - This mode is inspired by the remote API and credential-signing research in [ZipperCode/lingma2api](https://github.com/ZipperCode/lingma2api), integrated here as a switchable backend under the existing OpenAI / Anthropic / desktop app architecture.
 ## Quick Start
 ### Desktop App
@@ -262,7 +334,11 @@ Example:
 {
  "host": "127.0.0.1",
  "port": 8095,
  "backend": "ipc",
  "transport": "auto",
  "remote_base_url": "",
  "remote_auth_file": "",
  "remote_version": "",
  "mode": "agent",
  "shell_type": "zsh",
  "session_mode": "auto",
@@ -311,9 +387,36 @@ Current proxy hardening includes:
 - dedicated examples for `read_file`, `search_files`, `terminal`, and `web_search`
 - automatic retry when the model says it cannot access files, terminal, or web despite tools being present
 - common tool alias normalization such as `Bash` -> `terminal`, `Read` -> `read_file`, `Grep` -> `search_files`, and `Edit` -> `patch`
 - Anthropic `stream=true` requests with tools are resolved internally before streaming the final `tool_use` blocks, which avoids sending premature "please run this command yourself" text to clients such as Claude Code.
 In local smoke tests after this hardening, `MiniMax-M2.7`, `Kimi-K2.6`, `Qwen3.6-Plus`, and `Qwen3-Coder` all completed read/search/terminal/web/patch/vision checks, with `MiniMax-M2.7` having the lowest average latency in the tested set.
 ## Request And Log Inspection
 The desktop app keeps a visual request stream, and the HTTP server also exposes a small read-only debug history for CLI troubleshooting.
 Useful endpoints:
 ```bash
 curl http://127.0.0.1:8095/health
 curl -I http://127.0.0.1:8095/
 curl 'http://127.0.0.1:8095/debug/requests?limit=20'
 curl 'http://127.0.0.1:8095/debug/logs?limit=20'
 ```
 `/debug/requests` and `/debug/logs` return the newest records first. Each record includes:
 - request time
 - HTTP method and path
 - status code
 - duration in milliseconds
 - sanitized request body
 - sanitized response body
 The server keeps the most recent 200 HTTP records in memory. Image payloads and large base64 strings are redacted before recording, and very large bodies are truncated to keep the desktop UI responsive.
 These debug endpoints are intended for local development and client-adapter troubleshooting. They should only be exposed on trusted localhost networks.
 ## Local Desktop Build
 Install Wails:
@@ -344,7 +447,7 @@ The desktop bundle name is always `Lingma IPC Proxy`.
 The release workflow is triggered by:
- pushing a tag such as `v1.3.2`
+- pushing a tag such as `v1.4.0`
 - manually running the `Release` workflow with a tag input
 Planned improvements:
--- a/README.zh-CN.md
+++ b/README.zh-CN.md
@@ -9,9 +9,14 @@
 - **CLI 代理服务**：适合后台常驻、脚本化和服务器式运行。
 - **跨平台桌面 App**：适合日常可视化管理，支持 macOS 和 Windows。
 代理后端支持两种模式：
 - **IPC 插件模式（默认）**：连接本机 Lingma IDE 插件的 WebSocket / Named Pipe。优点是更接近 IDE 插件上下文，适合日常稳定使用。
 - **远端 API 模式（实验）**：读取 Lingma 本地登录缓存或显式凭据，直接调用 Lingma 远端接口。优点是不依赖 IDE 插件窗口和 IPC 会话，体验更像官方 API；缺点是依赖本地登录态字段和非公开接口，未来可能失效。
 ## 当前版本
-当前桌面端版本线：`v1.3.2`
+当前桌面端版本线：`v1.4.0`
 GitHub Actions 会在 Release 中产出：
@@ -47,6 +52,7 @@ GitHub Actions 会在 Release 中产出：
 | 多轮 Agent 工具循环 | 支持 |
 | 图片输入 | 支持 base64、data URL、HTTP URL |
 | 请求 / 响应完整日志 | 桌面端支持完整查看和复制 |
 | 后端模式切换 | 支持 IPC 插件模式 / 远端 API 模式 |
 | macOS WebSocket 自动探测 | 支持 |
 | Windows Named Pipe / WebSocket 探测 | 支持 |
 | 日间 / 夜间 / 跟随系统主题 | 桌面端支持 |
@@ -85,10 +91,12 @@ GitHub Actions 会在 Release 中产出：
 | 端点 | 方法 | 说明 |
 | --- | --- | --- |
-| `/` | GET | 健康检查 |
+| `/` | GET / HEAD | 健康检查；`HEAD /` 用于兼容 Claude Code 等客户端的基础探测 |
-| `/health` | GET | 健康检查 |
+| `/health` | GET / HEAD | 健康检查 |
 | `/v1/models` | GET | 获取 Lingma 可用模型列表 |
 | `/capabilities` / `/v1/capabilities` | GET | 能力探测，给第三方 Agent 识别协议、工具、图片能力 |
 | `/debug/requests` / `/debug/logs` | GET | 查询最近 HTTP 请求记录，用于本地调试 |
 | `/api/requests` / `/api/logs` | GET | 请求 / 日志调试接口别名 |
 | `/api/v1/models` / `/api/tags` / `/props` | GET | LM Studio / Ollama / llama.cpp / vLLM 风格探测兼容 |
 | `/v1/chat/completions` | POST | OpenAI Chat Completions 兼容接口 |
 | `/api/v1/chat/completions` | POST | OpenAI Chat Completions 别名 |
@@ -102,6 +110,7 @@ GitHub Actions 会在 Release 中产出：
 - **工具结果接力**：支持多轮 Agent 工具调用，把工具结果继续回灌给 Lingma 生成最终回答。
 - **工具稳定性增强**：代理层自动生成工具路由表，给 `read_file` / `search_files` / `terminal` / `web_search` 注入专门示例；当模型说“无法访问 / 请手动运行 / 请粘贴文件”时自动重试工具调用。
 - **工具别名映射**：兼容常见模型输出的 `Bash` -> `terminal`、`Read` -> `read_file`、`Grep` -> `search_files`、`Edit` -> `patch`。
 - **Anthropic 流式工具调用增强**：当 Claude Code 这类客户端使用 `stream=true` 并携带 tools 时，代理会先在内部完成工具 action block 解析和拒绝重试，再输出标准 `tool_use` 流，避免提前把“请你自己运行命令”这类文本发给客户端。
 - **图片输入**：兼容 OpenAI `image_url` 和 Anthropic base64 image block。
 - **本地图片路径兼容**：OpenAI `image_url.url` 支持 data URL、HTTP URL、`file://`、绝对路径和 `~/` 路径。
 - **图片自动压缩**：大图会自动缩放并转 JPEG，避免 Lingma 被超大 base64 卡死。
@@ -167,11 +176,14 @@ flowchart LR
  Service --> Tooling["工具调用模拟"]
  Service --> Model["模型探测"]
  Service --> Recorder["请求 / 日志记录"]
-  Service --> Transport["Lingma 传输层"]
+  Service --> Backend{"后端模式"}
  Backend --> Transport["IPC 插件传输层"]
  Backend --> Remote["远端 API 客户端"]
  Transport --> Pipe["Windows Named Pipe"]
  Transport --> WS["WebSocket"]
  Pipe --> Lingma["通义灵码 IDE 插件"]
  WS --> Lingma
  Remote --> Cloud["Lingma 远端接口"]
 ```
 ### 目录结构
@@ -182,6 +194,7 @@ flowchart LR
 | `internal/httpapi` | OpenAI / Anthropic 路由、请求解析、SSE 流式响应、请求记录 |
 | `internal/service` | 业务编排、会话生命周期、模型探测、代理运行状态 |
 | `internal/lingmaipc` | Lingma JSON-RPC 通信，Named Pipe / WebSocket 传输 |
 | `internal/remote` | Lingma 远端 API 登录态读取、签名、模型列表和流式响应解析 |
 | `internal/toolemulation` | 工具定义注入、动作块解析、工具结果回灌 |
 | `desktop` | Wails 桌面壳、窗口命令、代理生命周期桥接 |
 | `desktop/frontend` | Vue 前端页面，包含仪表盘、请求流、模型、设置、日志 |
@@ -194,8 +207,8 @@ flowchart LR
 2. HTTP 层识别 OpenAI / Anthropic 请求格式。
 3. Service 层归一化消息、图片、工具定义和参数。
 4. Session 管理层决定复用会话、创建新会话或使用自动策略。
-5. Transport 层连接 Lingma 插件的 Named Pipe 或 WebSocket。
+5. Service 根据 `backend` 选择 IPC 插件传输或 Lingma 远端 API。
-6. Lingma 返回增量事件或最终响应。
+6. Lingma 插件或远端接口返回增量事件 / 最终响应。
 7. HTTP 层转换成 OpenAI SSE、Anthropic SSE 或普通 JSON。
 8. 桌面端同步记录请求、响应、耗时、状态码和日志。
@@ -220,6 +233,66 @@ lingma-ipc-proxy --transport websocket --ws-url ws://127.0.0.1:36510 --port 8095
 lingma-ipc-proxy --transport pipe --pipe '\\.\pipe\lingma-ipc'
 ```
 ## 后端模式
 ### IPC 插件模式（默认）
 IPC 模式通过本机 Lingma IDE 插件通信：
 ```bash
 lingma-ipc-proxy --backend ipc --transport auto --port 8095
 ```
 适合已经打开 VS Code / Lingma 插件、希望使用插件当前会话环境、并优先使用插件探测模型列表的场景。
 ### 远端 API 模式（实验）
 远端模式直接调用 Lingma 远端接口：
 ```bash
 lingma-ipc-proxy --backend remote --port 8095
 ```
 默认会只读导入：
 ```text
 ~/.lingma/cache/user
 ~/.lingma/cache/id
 ~/.lingma/logs/lingma.log
 ```
 也可以指定显式凭据文件：
 ```bash
 lingma-ipc-proxy \
  --backend remote \
  --remote-base-url https://lingma.alibabacloud.com \
  --remote-auth-file ~/.config/lingma-ipc-proxy/credentials.json
 ```
 `credentials.json` 格式：
 ```json
 {
  "source": "manual",
  "token_expire_time": "1777520000000",
  "auth": {
    "cosy_key": "xxx",
    "encrypt_user_info": "xxx",
    "user_id": "123",
    "machine_id": "xxxxxxxxxxxxxxxx"
  }
 }
 ```
 说明：
 - 远端模式不会写入或迁移你的登录态，只会读取本机 Lingma 缓存或你指定的凭据文件。
 - 如果 Lingma 插件配置过专属域名，可以通过 `--remote-base-url`、`LINGMA_REMOTE_BASE_URL` 或配置文件显式指定。
 - 远端模式的 `/v1/models` 返回的是远端接口模型 key，不一定等同于 IPC 插件模式里看到的 `MiniMax-M2.7`、`Kimi-K2.6` 等展示名。
 - 当前本机实测：`/health`、`/v1/models`、OpenAI 流式 / 非流式、Claude Code Anthropic + Bash 工具调用均可用；Claude Code 完整工具链耗时明显高于简单 OpenAI 请求。
 - 该模式参考了 [ZipperCode/lingma2api](https://github.com/ZipperCode/lingma2api) 对 Lingma 远端接口、签名和登录态结构的探索，本仓库将其作为可切换后端集成到现有 OpenAI / Anthropic / 桌面 App 架构中。
 ## 快速开始
 ### 前置条件
@@ -343,7 +416,11 @@ export ANTHROPIC_API_KEY="any"
 {
  "host": "127.0.0.1",
  "port": 8095,
  "backend": "ipc",
  "transport": "auto",
  "remote_base_url": "",
  "remote_auth_file": "",
  "remote_version": "",
  "mode": "agent",
  "shell_type": "zsh",
  "session_mode": "auto",
@@ -399,6 +476,7 @@ Lingma 插件本身没有公开标准 OpenAI / Anthropic Tools 协议，所以
 - 对 `read_file`、`search_files`、`terminal`、`web_search` 注入专门示例。
 - 当模型回答“无法访问文件 / 无法联网 / 请手动运行 / 请粘贴内容”时，代理会自动追加强制工具调用提示并重试一次。
 - 自动归一化常见工具名别名：`Bash`、`Shell`、`Read`、`Grep`、`Edit`、`Fetch` 等。
 - Anthropic `stream=true` 且请求包含 tools 时，会先内部完成生成和重试，再流式输出最终 `tool_use` 事件，避免 Claude Code 这类客户端先收到普通拒绝文本。
 本地压测结果：`MiniMax-M2.7`、`Kimi-K2.6`、`Qwen3.6-Plus`、`Qwen3-Coder` 均通过 read/search/terminal/web/patch/vision 烟测；其中 `MiniMax-M2.7` 平均延迟最低，所以作为默认推荐。
@@ -418,6 +496,23 @@ Lingma 插件本身没有公开标准 OpenAI / Anthropic Tools 协议，所以
 请求体和响应体不会再用无意义的展开 / 收起按钮截断展示；内容过长时会在详情区域内部滚动，并隐藏滚动条，便于小窗口下查看完整内容。
 除了桌面端页面，HTTP 服务本身也提供只读调试接口，方便后续排查 Claude Code、Hermes、Cline 等客户端到底传了什么请求：
 ```bash
 curl http://127.0.0.1:8095/health
 curl -I http://127.0.0.1:8095/
 curl 'http://127.0.0.1:8095/debug/requests?limit=20'
 curl 'http://127.0.0.1:8095/debug/logs?limit=20'
 ```
 说明：
 - `/debug/requests` 和 `/debug/logs` 返回最新记录在前。
 - 每条记录包含时间、HTTP 方法、路径、状态码、耗时、脱敏后的请求体和响应体。
 - 服务端最多保留最近 200 条 HTTP 记录，只保存在内存中，重启后清空。
 - 图片 payload 和大段 base64 会被标记脱敏，超长请求 / 响应会截断，避免日志页面被撑爆。
 - 这些接口用于本机调试，不建议暴露到不可信网络。
 ## 本地构建桌面端
 安装 Wails：
@@ -455,8 +550,8 @@ Lingma IPC Proxy
 发布方式：
 ```bash
-git tag v1.3.2
+git tag v1.4.0
-git push origin v1.3.2
+git push origin v1.4.0
 ```
 也可以在 GitHub Actions 页面手动运行 `Release` workflow，并输入 tag。
--- a/cmd/lingma-ipc-proxy/main.go
+++ b/cmd/lingma-ipc-proxy/main.go
@@ -24,9 +24,13 @@ import (
 type fileConfig struct {
 	Host            string `json:"host"`
 	Port            int    `json:"port"`
 	Backend         string `json:"backend"`
 	Transport       string `json:"transport"`
 	Pipe            string `json:"pipe"`
 	WebSocketURL    string `json:"websocket_url"`
 	RemoteBaseURL   string `json:"remote_base_url"`
 	RemoteAuthFile  string `json:"remote_auth_file"`
 	RemoteVersion   string `json:"remote_version"`
 	Cwd             string `json:"cwd"`
 	CurrentFilePath string `json:"current_file_path"`
 	Mode            string `json:"mode"`
@@ -87,6 +91,7 @@ func loadConfig() (service.Config, string) {
 	cfg := service.Config{
 		Host:        "127.0.0.1",
 		Port:        8095,
 		Backend:     service.BackendIPC,
 		Transport:   lingmaipc.TransportAuto,
 		Cwd:         currentDir(),
 		Mode:        "agent",
@@ -110,8 +115,12 @@ func loadConfig() (service.Config, string) {
 	host := flag.String("host", cfg.Host, "Listen host")
 	port := flag.Int("port", cfg.Port, "Listen port")
 	transport := flag.String("transport", string(cfg.Transport), "Lingma transport: auto, pipe, websocket")
 	backend := flag.String("backend", string(cfg.Backend), "Backend mode: ipc or remote")
 	pipe := flag.String("pipe", cfg.Pipe, "Explicit Lingma named pipe path")
 	wsURL := flag.String("ws-url", cfg.WebSocketURL, "Explicit Lingma local websocket URL")
 	remoteBaseURL := flag.String("remote-base-url", cfg.RemoteBaseURL, "Remote Lingma API base URL")
 	remoteAuthFile := flag.String("remote-auth-file", cfg.RemoteAuthFile, "Remote Lingma credentials.json path; empty reads ~/.lingma cache")
 	remoteVersion := flag.String("remote-version", cfg.RemoteVersion, "Remote Lingma cosy version")
 	cwd := flag.String("cwd", cfg.Cwd, "Working directory used when creating Lingma sessions")
 	currentFilePath := flag.String("current-file-path", cfg.CurrentFilePath, "Current file path sent through ACP meta")
 	mode := flag.String("mode", cfg.Mode, "Lingma ACP mode value")
@@ -128,9 +137,13 @@ func loadConfig() (service.Config, string) {
 	cfg.Host = strings.TrimSpace(*host)
 	cfg.Port = *port
 	cfg.Backend = parseBackend(*backend)
 	cfg.Transport = parsedTransport
 	cfg.Pipe = strings.TrimSpace(*pipe)
 	cfg.WebSocketURL = strings.TrimSpace(*wsURL)
 	cfg.RemoteBaseURL = strings.TrimSpace(*remoteBaseURL)
 	cfg.RemoteAuthFile = strings.TrimSpace(*remoteAuthFile)
 	cfg.RemoteVersion = strings.TrimSpace(*remoteVersion)
 	cfg.Cwd = strings.TrimSpace(*cwd)
 	cfg.CurrentFilePath = strings.TrimSpace(*currentFilePath)
 	cfg.Mode = strings.TrimSpace(*mode)
@@ -184,12 +197,24 @@ func overlayFileConfig(dst *service.Config, src fileConfig) {
 	if strings.TrimSpace(src.Transport) != "" {
 		dst.Transport = parseTransport(src.Transport)
 	}
 	if strings.TrimSpace(src.Backend) != "" {
 		dst.Backend = parseBackend(src.Backend)
 	}
 	if strings.TrimSpace(src.Pipe) != "" {
 		dst.Pipe = strings.TrimSpace(src.Pipe)
 	}
 	if strings.TrimSpace(src.WebSocketURL) != "" {
 		dst.WebSocketURL = strings.TrimSpace(src.WebSocketURL)
 	}
 	if strings.TrimSpace(src.RemoteBaseURL) != "" {
 		dst.RemoteBaseURL = strings.TrimSpace(src.RemoteBaseURL)
 	}
 	if strings.TrimSpace(src.RemoteAuthFile) != "" {
 		dst.RemoteAuthFile = strings.TrimSpace(src.RemoteAuthFile)
 	}
 	if strings.TrimSpace(src.RemoteVersion) != "" {
 		dst.RemoteVersion = strings.TrimSpace(src.RemoteVersion)
 	}
 	if strings.TrimSpace(src.Cwd) != "" {
 		dst.Cwd = strings.TrimSpace(src.Cwd)
 	}
@@ -223,12 +248,24 @@ func overlayEnvConfig(dst *service.Config) {
 	if value := strings.TrimSpace(os.Getenv("LINGMA_PROXY_TRANSPORT")); value != "" {
 		dst.Transport = parseTransport(value)
 	}
 	if value := strings.TrimSpace(os.Getenv("LINGMA_PROXY_BACKEND")); value != "" {
 		dst.Backend = parseBackend(value)
 	}
 	if value := strings.TrimSpace(os.Getenv("LINGMA_IPC_PIPE")); value != "" {
 		dst.Pipe = value
 	}
 	if value := strings.TrimSpace(os.Getenv("LINGMA_PROXY_WS_URL")); value != "" {
 		dst.WebSocketURL = value
 	}
 	if value := strings.TrimSpace(os.Getenv("LINGMA_REMOTE_BASE_URL")); value != "" {
 		dst.RemoteBaseURL = value
 	}
 	if value := strings.TrimSpace(os.Getenv("LINGMA_REMOTE_AUTH_FILE")); value != "" {
 		dst.RemoteAuthFile = value
 	}
 	if value := strings.TrimSpace(os.Getenv("LINGMA_REMOTE_VERSION")); value != "" {
 		dst.RemoteVersion = value
 	}
 	if value := strings.TrimSpace(os.Getenv("LINGMA_PROXY_CWD")); value != "" {
 		dst.Cwd = value
 	}
@@ -263,6 +300,19 @@ func parseSessionMode(value string) service.SessionMode {
 	}
 }
 func parseBackend(value string) service.BackendMode {
 	mode := service.BackendMode(strings.ToLower(strings.TrimSpace(value)))
 	switch mode {
 	case "", service.BackendIPC:
 		return service.BackendIPC
 	case service.BackendRemote:
 		return service.BackendRemote
 	default:
 		log.Fatalf("invalid backend %q; expected ipc or remote", value)
 		return service.BackendIPC
 	}
 }
 func parseTransport(value string) lingmaipc.Transport {
 	transport, err := lingmaipc.ParseTransport(value)
 	if err != nil {
--- a/desktop/app.go
+++ b/desktop/app.go
@@ -260,9 +260,13 @@ func (a *App) saveConfig(cfg service.Config) error {
 	fileCfg := map[string]any{
 		"host":              cfg.Host,
 		"port":              cfg.Port,
 		"backend":           string(cfg.Backend),
 		"transport":         string(cfg.Transport),
 		"pipe":              cfg.Pipe,
 		"websocket_url":     cfg.WebSocketURL,
 		"remote_base_url":   cfg.RemoteBaseURL,
 		"remote_auth_file":  cfg.RemoteAuthFile,
 		"remote_version":    cfg.RemoteVersion,
 		"cwd":               cfg.Cwd,
 		"current_file_path": cfg.CurrentFilePath,
 		"mode":              cfg.Mode,
@@ -510,6 +514,7 @@ func defaultConfig() service.Config {
 	cfg := service.Config{
 		Host:        "127.0.0.1",
 		Port:        8095,
 		Backend:     service.BackendIPC,
 		Transport:   lingmaipc.TransportAuto,
 		Cwd:         defaultCwd(),
 		Mode:        "agent",
@@ -527,9 +532,13 @@ func defaultConfig() service.Config {
 				var fileCfg struct {
 					Host            string `json:"host"`
 					Port            int    `json:"port"`
 					Backend         string `json:"backend"`
 					Transport       string `json:"transport"`
 					Pipe            string `json:"pipe"`
 					WebSocketURL    string `json:"websocket_url"`
 					RemoteBaseURL   string `json:"remote_base_url"`
 					RemoteAuthFile  string `json:"remote_auth_file"`
 					RemoteVersion   string `json:"remote_version"`
 					Cwd             string `json:"cwd"`
 					CurrentFilePath string `json:"current_file_path"`
 					Mode            string `json:"mode"`
@@ -545,6 +554,9 @@ func defaultConfig() service.Config {
 					if fileCfg.Port > 0 {
 						cfg.Port = fileCfg.Port
 					}
 					if fileCfg.Backend != "" {
 						cfg.Backend = service.BackendMode(fileCfg.Backend)
 					}
 					if fileCfg.Transport != "" {
 						if t, err := lingmaipc.ParseTransport(fileCfg.Transport); err == nil {
 							cfg.Transport = t
@@ -556,6 +568,15 @@ func defaultConfig() service.Config {
 					if fileCfg.WebSocketURL != "" {
 						cfg.WebSocketURL = fileCfg.WebSocketURL
 					}
 					if fileCfg.RemoteBaseURL != "" {
 						cfg.RemoteBaseURL = fileCfg.RemoteBaseURL
 					}
 					if fileCfg.RemoteAuthFile != "" {
 						cfg.RemoteAuthFile = fileCfg.RemoteAuthFile
 					}
 					if fileCfg.RemoteVersion != "" {
 						cfg.RemoteVersion = fileCfg.RemoteVersion
 					}
 					if fileCfg.Cwd != "" {
 						cfg.Cwd = fileCfg.Cwd
 					}
--- a/desktop/frontend/src/App.vue
+++ b/desktop/frontend/src/App.vue
@@ -222,7 +222,7 @@ onUnmounted(() => {
        <span class="status-dot" :class="{ running: status.running }"></span>
        <div>
          <strong>{{ status.running ? 'Proxy Running' : 'Proxy Stopped' }}</strong>
-          <small>v1.3.2</small>
+          <small>v1.4.0</small>
        </div>
      </div>
    </aside>
--- a/desktop/frontend/src/views/Settings.vue
+++ b/desktop/frontend/src/views/Settings.vue
@@ -9,6 +9,10 @@ const saving = ref(false)
 const openSelect = ref('')
 const selectOptions = {
  Backend: [
    { value: 'ipc', label: 'IPC 插件' },
    { value: 'remote', label: '远端 API' },
  ],
  Transport: [
    { value: 'auto', label: '自动' },
    { value: 'pipe', label: '命名管道' },
@@ -88,6 +92,26 @@ async function save() {
          </div>
        </div>
        <div class="form-grid">
          <div class="field">
            <label>连接模式</label>
            <div class="custom-select" :class="{ open: openSelect === 'Backend' }">
              <button type="button" @click="toggleSelect('Backend')">
                <span>{{ selectLabel('Backend') }}</span>
                <i class="bi bi-chevron-down" aria-hidden="true"></i>
              </button>
              <div v-if="openSelect === 'Backend'" class="select-menu">
                <button
                  v-for="option in selectOptions.Backend"
                  :key="option.value"
                  :class="{ selected: option.value === config.Backend }"
                  type="button"
                  @click="chooseOption('Backend', option.value)"
                >
                  {{ option.label }}
                </button>
              </div>
            </div>
          </div>
          <div class="field">
            <label>主机</label>
            <input v-model="config.Host" type="text" placeholder="127.0.0.1" />
@@ -128,10 +152,22 @@ async function save() {
            <label>命名管道</label>
            <input v-model="config.Pipe" type="text" placeholder="留空自动探测 Windows Named Pipe" />
          </div>
          <div class="field span-2">
            <label>远端 API 域名</label>
            <input v-model="config.RemoteBaseURL" type="text" placeholder="留空自动探测，默认 https://lingma.alibabacloud.com" />
          </div>
          <div class="field span-2">
            <label>远端认证文件</label>
            <input v-model="config.RemoteAuthFile" type="text" placeholder="可选 credentials.json；留空只读 ~/.lingma/cache/user" />
          </div>
          <div class="field span-2">
            <label>远端 Cosy 版本</label>
            <input v-model="config.RemoteVersion" type="text" placeholder="默认 2.11.2" />
          </div>
        </div>
        <div class="hint-box">
          <strong>自动探测失败时</strong>
-          <span>先确认 VS Code / Lingma 插件已启动并登录。macOS 通常填写 WebSocket，例如 <code>ws://127.0.0.1:36510/</code>；Windows 可填写命名管道，例如 <code>\\.\pipe\lingma-xxxx</code>，也可填写 WebSocket，例如 <code>ws://127.0.0.1:36510/</code>。</span>
+          <span>IPC 模式先确认 VS Code / Lingma 插件已启动并登录。远端 API 模式会优先读取认证文件；留空时只读 <code>~/.lingma/cache/user</code>，不会写入或上传登录态。</span>
        </div>
      </div>
--- a/desktop/frontend/wailsjs/go/models.ts
+++ b/desktop/frontend/wailsjs/go/models.ts
@@ -66,9 +66,13 @@ export namespace service {
 	export class Config {
 	    Host: string;
 	    Port: number;
 	    Backend: string;
 	    Transport: string;
 	    Pipe: string;
 	    WebSocketURL: string;
 	    RemoteBaseURL: string;
 	    RemoteAuthFile: string;
 	    RemoteVersion: string;
 	    Cwd: string;
 	    CurrentFilePath: string;
 	    Mode: string;
@@ -85,9 +89,13 @@ export namespace service {
 	        if ('string' === typeof source) source = JSON.parse(source);
 	        this.Host = source["Host"];
 	        this.Port = source["Port"];
 	        this.Backend = source["Backend"];
 	        this.Transport = source["Transport"];
 	        this.Pipe = source["Pipe"];
 	        this.WebSocketURL = source["WebSocketURL"];
 	        this.RemoteBaseURL = source["RemoteBaseURL"];
 	        this.RemoteAuthFile = source["RemoteAuthFile"];
 	        this.RemoteVersion = source["RemoteVersion"];
 	        this.Cwd = source["Cwd"];
 	        this.CurrentFilePath = source["CurrentFilePath"];
 	        this.Mode = source["Mode"];
--- a/desktop/wails.json
+++ b/desktop/wails.json
@@ -11,6 +11,6 @@
    "email": "lutc5@asiainfo.com"
  },
  "info": {
-    "productVersion": "1.3.2"
+    "productVersion": "1.4.0"
  }
 }
--- a/internal/httpapi/server.go
+++ b/internal/httpapi/server.go
@@ -16,6 +16,7 @@ import (
 	"os"
 	"strconv"
 	"strings"
 	"sync"
 	"time"
 	"lingma-ipc-proxy/internal/service"
@@ -26,6 +27,8 @@ type Server struct {
 	svc     *service.Service
 	http    *http.Server
 	sem     chan struct{}
 	recMu   sync.RWMutex
 	records []debugRequestRecord
 	// OnRequest is called after each request completes with summary info.
 	// method, path, statusCode, duration, requestBody, responseBody
 	OnRequest func(method, path string, statusCode int, duration time.Duration, reqBody, respBody string)
@@ -84,6 +87,16 @@ type modelResponse struct {
 	Name    string `json:"name,omitempty"`
 }
 type debugRequestRecord struct {
 	Time       string `json:"time"`
 	Method     string `json:"method"`
 	Path       string `json:"path"`
 	StatusCode int    `json:"statusCode"`
 	DurationMS int64  `json:"durationMs"`
 	Request    string `json:"request,omitempty"`
 	Response   string `json:"response,omitempty"`
 }
 func NewServer(addr string, svc *service.Service) *Server {
 	s := &Server{
 		svc: svc,
@@ -92,6 +105,10 @@ func NewServer(addr string, svc *service.Service) *Server {
 	mux := http.NewServeMux()
 	mux.HandleFunc("/", s.handleRoot)
 	mux.HandleFunc("/health", s.handleRoot)
 	mux.HandleFunc("/debug/requests", s.handleDebugRequests)
 	mux.HandleFunc("/debug/logs", s.handleDebugRequests)
 	mux.HandleFunc("/api/requests", s.handleDebugRequests)
 	mux.HandleFunc("/api/logs", s.handleDebugRequests)
 	mux.HandleFunc("/capabilities", s.handleCapabilities)
 	mux.HandleFunc("/v1/capabilities", s.handleCapabilities)
 	mux.HandleFunc("/v1/models", s.handleModels)
@@ -151,6 +168,10 @@ func (s *Server) handleRoot(w http.ResponseWriter, r *http.Request) {
 		w.WriteHeader(http.StatusNoContent)
 		return
 	}
 	if r.Method == http.MethodHead {
 		w.WriteHeader(http.StatusOK)
 		return
 	}
 	if r.Method != http.MethodGet {
 		writeOpenAIError(w, http.StatusMethodNotAllowed, "invalid_request_error", "method not allowed")
 		return
@@ -162,6 +183,44 @@ func (s *Server) handleRoot(w http.ResponseWriter, r *http.Request) {
 	})
 }
 func (s *Server) handleDebugRequests(w http.ResponseWriter, r *http.Request) {
 	if r.Method == http.MethodOptions {
 		w.WriteHeader(http.StatusNoContent)
 		return
 	}
 	if r.Method == http.MethodHead {
 		w.WriteHeader(http.StatusOK)
 		return
 	}
 	if r.Method != http.MethodGet {
 		writeOpenAIError(w, http.StatusMethodNotAllowed, "invalid_request_error", "method not allowed")
 		return
 	}
 	limit := 50
 	if raw := strings.TrimSpace(r.URL.Query().Get("limit")); raw != "" {
 		if parsed, err := strconv.Atoi(raw); err == nil {
 			switch {
 			case parsed < 1:
 				limit = 1
 			case parsed > 200:
 				limit = 200
 			default:
 				limit = parsed
 			}
 		}
 	}
 	records := s.debugRecords(limit)
 	writeJSON(w, http.StatusOK, map[string]any{
 		"ok":       true,
 		"service":  "lingma-ipc-proxy",
 		"count":    len(records),
 		"requests": records,
 		"state":    s.svc.State(),
 	})
 }
 func (s *Server) handleModels(w http.ResponseWriter, r *http.Request) {
 	if r.Method == http.MethodOptions {
 		w.WriteHeader(http.StatusNoContent)
@@ -552,6 +611,101 @@ func (s *Server) handleAnthropicStream(w http.ResponseWriter, r *http.Request, r
 	}
 	msgID := fmt.Sprintf("msg_%d", time.Now().UnixNano())
 	if len(req.Tools) > 0 {
 		result, err := s.svc.Generate(r.Context(), req)
 		if err != nil {
 			writeAnthropicError(w, http.StatusInternalServerError, "api_error", err.Error())
 			return
 		}
 		streamingHeaders(w)
 		if err := writeSSEEvent(w, flusher, "message_start", map[string]any{
 			"type": "message_start",
 			"message": map[string]any{
 				"id":            msgID,
 				"type":          "message",
 				"role":          "assistant",
 				"content":       []any{},
 				"model":         model,
 				"stop_reason":   nil,
 				"stop_sequence": nil,
 				"usage": map[string]any{
 					"input_tokens":  result.InputTokens,
 					"output_tokens": 0,
 				},
 			},
 		}); err != nil {
 			return
 		}
 		index := 0
 		if strings.TrimSpace(result.Text) != "" {
 			if err := writeSSEEvent(w, flusher, "content_block_start", map[string]any{
 				"type":          "content_block_start",
 				"index":         index,
 				"content_block": map[string]any{"type": "text", "text": ""},
 			}); err != nil {
 				return
 			}
 			if err := writeSSEEvent(w, flusher, "content_block_delta", map[string]any{
 				"type":  "content_block_delta",
 				"index": index,
 				"delta": map[string]any{"type": "text_delta", "text": result.Text},
 			}); err != nil {
 				return
 			}
 			if err := writeSSEEvent(w, flusher, "content_block_stop", map[string]any{
 				"type":  "content_block_stop",
 				"index": index,
 			}); err != nil {
 				return
 			}
 			index++
 		}
 		for _, tc := range result.ToolCalls {
 			if err := writeSSEEvent(w, flusher, "content_block_start", map[string]any{
 				"type":          "content_block_start",
 				"index":         index,
 				"content_block": map[string]any{"type": "tool_use", "id": tc.ID, "name": tc.Name, "input": map[string]any{}},
 			}); err != nil {
 				return
 			}
 			argsJSON, _ := json.Marshal(tc.Arguments)
 			if err := writeSSEEvent(w, flusher, "content_block_delta", map[string]any{
 				"type":  "content_block_delta",
 				"index": index,
 				"delta": map[string]any{"type": "input_json_delta", "partial_json": string(argsJSON)},
 			}); err != nil {
 				return
 			}
 			if err := writeSSEEvent(w, flusher, "content_block_stop", map[string]any{
 				"type":  "content_block_stop",
 				"index": index,
 			}); err != nil {
 				return
 			}
 			index++
 		}
 		stopReason := "end_turn"
 		if len(result.ToolCalls) > 0 {
 			stopReason = "tool_use"
 		}
 		_ = writeSSEEvent(w, flusher, "message_delta", map[string]any{
 			"type": "message_delta",
 			"delta": map[string]any{
 				"stop_reason":   stopReason,
 				"stop_sequence": nil,
 			},
 			"usage": map[string]any{
 				"output_tokens": result.OutputTokens,
 			},
 		})
 		_ = writeSSEEvent(w, flusher, "message_stop", map[string]any{"type": "message_stop"})
 		return
 	}
 	events, done, err := s.svc.GenerateStream(r.Context(), req)
 	if err != nil {
 		writeAnthropicError(w, http.StatusInternalServerError, "api_error", err.Error())
@@ -1141,10 +1295,11 @@ func (rw *recordingResponseWriter) Flush() {
 func (s *Server) withRecorder(next http.Handler) http.Handler {
 	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		if s.OnRequest == nil {
+		if isDebugInspectionPath(r.URL.Path) {
 			next.ServeHTTP(w, r)
 			return
 		}
 		start := time.Now()
 		// Read request body for recording, then restore for downstream handler
@@ -1161,10 +1316,54 @@ func (s *Server) withRecorder(next http.Handler) http.Handler {
 		respBody := sanitizeRecordedBody(rw.body)
 		s.recordRequest(r.Method, r.URL.Path, rw.statusCode, duration, reqBody, respBody)
 		if s.OnRequest != nil {
 			go s.OnRequest(r.Method, r.URL.Path, rw.statusCode, duration, reqBody, respBody)
 		}
 	})
 }
 func isDebugInspectionPath(path string) bool {
 	switch path {
 	case "/debug/requests", "/debug/logs", "/api/requests", "/api/logs":
 		return true
 	default:
 		return false
 	}
 }
 func (s *Server) recordRequest(method, path string, statusCode int, duration time.Duration, reqBody, respBody string) {
 	s.recMu.Lock()
 	defer s.recMu.Unlock()
 	s.records = append(s.records, debugRequestRecord{
 		Time:       time.Now().Format(time.RFC3339),
 		Method:     method,
 		Path:       path,
 		StatusCode: statusCode,
 		DurationMS: duration.Milliseconds(),
 		Request:    reqBody,
 		Response:   respBody,
 	})
 	if len(s.records) > 200 {
 		s.records = s.records[len(s.records)-200:]
 	}
 }
 func (s *Server) debugRecords(limit int) []debugRequestRecord {
 	s.recMu.RLock()
 	defer s.recMu.RUnlock()
 	if limit > len(s.records) {
 		limit = len(s.records)
 	}
 	out := make([]debugRequestRecord, 0, limit)
 	for i := len(s.records) - 1; i >= 0 && len(out) < limit; i-- {
 		out = append(out, s.records[i])
 	}
 	return out
 }
 func sanitizeRecordedBody(body []byte) string {
 	if len(body) == 0 {
 		return ""
@@ -1254,7 +1453,7 @@ func truncateRecordedString(value string) string {
 func withCORS(next http.Handler) http.Handler {
 	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		w.Header().Set("Access-Control-Allow-Origin", "*")
-		w.Header().Set("Access-Control-Allow-Methods", "GET, POST, OPTIONS")
+		w.Header().Set("Access-Control-Allow-Methods", "GET, HEAD, POST, OPTIONS")
 		w.Header().Set("Access-Control-Allow-Headers", "Content-Type, Authorization, x-api-key, anthropic-version")
 		if r.Method == http.MethodOptions {
 			w.WriteHeader(http.StatusNoContent)
--- a/internal/remote/client.go
+++ b/internal/remote/client.go
@@ -0,0 +1,464 @@
 package remote
 import (
 	"bufio"
 	"context"
 	"crypto/md5"
 	"encoding/base64"
 	"encoding/json"
 	"fmt"
 	"io"
 	"net/http"
 	"os"
 	"path/filepath"
 	"strconv"
 	"strings"
 	"time"
 )
 const (
 	DefaultBaseURL = "https://lingma.alibabacloud.com"
 	chatPath       = "/algo/api/v2/service/pro/sse/agent_chat_generation"
 	chatQuery      = "?FetchKeys=llm_model_result&AgentId=agent_common"
 	modelListPath  = "/algo/api/v2/model/list"
 )
 type Config struct {
 	BaseURL     string
 	AuthFile    string
 	CosyVersion string
 	Timeout     time.Duration
 }
 type Client struct {
 	cfg    Config
 	client *http.Client
 }
 type Model struct {
 	Key         string `json:"key"`
 	DisplayName string `json:"display_name"`
 	Model       string `json:"model"`
 	Enable      bool   `json:"enable"`
 }
 type ChatRequest struct {
 	Model       string
 	Prompt      string
 	Stream      bool
 	Temperature *float64
 }
 type ChatResult struct {
 	Text          string
 	InputTokens   int
 	OutputTokens  int
 	RequestID     string
 	CredentialSrc string
 }
 type StreamEvent struct {
 	Delta string
 }
 func New(cfg Config) *Client {
 	if cfg.BaseURL == "" {
 		cfg.BaseURL = ResolveBaseURL("")
 	}
 	if cfg.CosyVersion == "" {
 		cfg.CosyVersion = "2.11.2"
 	}
 	if cfg.Timeout <= 0 {
 		cfg.Timeout = 120 * time.Second
 	}
 	cfg.BaseURL = strings.TrimRight(cfg.BaseURL, "/")
 	return &Client{cfg: cfg, client: &http.Client{Timeout: cfg.Timeout}}
 }
 func ResolveBaseURL(explicit string) string {
 	if strings.TrimSpace(explicit) != "" {
 		return strings.TrimRight(strings.TrimSpace(explicit), "/")
 	}
 	if value := strings.TrimSpace(os.Getenv("LINGMA_REMOTE_BASE_URL")); value != "" {
 		return strings.TrimRight(value, "/")
 	}
 	for _, path := range candidateConfigFiles() {
 		if value := readBaseURLHint(path); value != "" {
 			return strings.TrimRight(value, "/")
 		}
 	}
 	return DefaultBaseURL
 }
 func (c *Client) Warmup(ctx context.Context) error {
 	_, err := LoadCredential(c.cfg.AuthFile)
 	if err != nil {
 		return err
 	}
 	ctx, cancel := context.WithTimeout(ctx, 15*time.Second)
 	defer cancel()
 	_, err = c.ListModels(ctx)
 	return err
 }
 func (c *Client) ListModels(ctx context.Context) ([]Model, error) {
 	cred, err := LoadCredential(c.cfg.AuthFile)
 	if err != nil {
 		return nil, err
 	}
 	headers, err := c.headers(cred, modelListPath, "")
 	if err != nil {
 		return nil, err
 	}
 	req, err := http.NewRequestWithContext(ctx, http.MethodGet, c.cfg.BaseURL+modelListPath, nil)
 	if err != nil {
 		return nil, err
 	}
 	for k, v := range headers {
 		req.Header.Set(k, v)
 	}
 	resp, err := c.client.Do(req)
 	if err != nil {
 		return nil, err
 	}
 	defer resp.Body.Close()
 	body, _ := io.ReadAll(resp.Body)
 	if resp.StatusCode >= 400 {
 		return nil, fmt.Errorf("remote model list status %d: %s", resp.StatusCode, truncate(string(body), 500))
 	}
 	var payload struct {
 		Chat   []Model `json:"chat"`
 		Inline []Model `json:"inline"`
 	}
 	if err := json.Unmarshal(body, &payload); err != nil {
 		return nil, err
 	}
 	return append(payload.Chat, payload.Inline...), nil
 }
 func (c *Client) Chat(ctx context.Context, request ChatRequest, onDelta func(string)) (*ChatResult, error) {
 	cred, err := LoadCredential(c.cfg.AuthFile)
 	if err != nil {
 		return nil, err
 	}
 	requestID := newHexID()
 	body, err := c.buildBody(requestID, request)
 	if err != nil {
 		return nil, err
 	}
 	headers, err := c.headers(cred, chatPath, body)
 	if err != nil {
 		return nil, err
 	}
 	req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.cfg.BaseURL+chatPath+chatQuery, strings.NewReader(body))
 	if err != nil {
 		return nil, err
 	}
 	for k, v := range headers {
 		req.Header.Set(k, v)
 	}
 	resp, err := c.client.Do(req)
 	if err != nil {
 		return nil, err
 	}
 	defer resp.Body.Close()
 	if resp.StatusCode >= 400 {
 		respBody, _ := io.ReadAll(resp.Body)
 		return nil, fmt.Errorf("remote chat status %d: %s", resp.StatusCode, truncate(string(respBody), 1000))
 	}
 	var builder strings.Builder
 	if err := scanSSE(resp.Body, func(event sseEvent) error {
 		if event.Done {
 			return nil
 		}
 		if event.Content == "" {
 			return nil
 		}
 		builder.WriteString(event.Content)
 		if onDelta != nil {
 			onDelta(event.Content)
 		}
 		return nil
 	}); err != nil {
 		return nil, err
 	}
 	text := builder.String()
 	return &ChatResult{
 		Text:          text,
 		InputTokens:   estimateTokens(request.Prompt),
 		OutputTokens:  estimateTokens(text),
 		RequestID:     requestID,
 		CredentialSrc: cred.Source,
 	}, nil
 }
 func (c *Client) buildBody(requestID string, request ChatRequest) (string, error) {
 	temperature := 0.1
 	if request.Temperature != nil {
 		temperature = *request.Temperature
 	}
 	model := strings.TrimSpace(request.Model)
 	if strings.EqualFold(model, "auto") {
 		model = ""
 	}
 	payload := map[string]any{
 		"request_id":       requestID,
 		"request_set_id":   "",
 		"chat_record_id":   requestID,
 		"stream":           true,
 		"image_urls":       nil,
 		"is_reply":         false,
 		"is_retry":         false,
 		"session_id":       "",
 		"code_language":    "",
 		"source":           0,
 		"version":          "3",
 		"chat_prompt":      "",
 		"parameters":       map[string]float64{"temperature": temperature},
 		"aliyun_user_type": "personal_standard",
 		"agent_id":         "agent_common",
 		"task_id":          "question_refine",
 		"model_config": map[string]any{
 			"key":          model,
 			"display_name": "",
 			"model":        model,
 			"format":       "",
 			"is_vl":        false,
 			"is_reasoning": false,
 			"api_key":      "",
 			"url":          "",
 			"source":       "",
 			"enable":       false,
 		},
 		"messages": []map[string]any{{
 			"role":    "user",
 			"content": request.Prompt,
 			"response_meta": map[string]any{
 				"id": "",
 				"usage": map[string]int{
 					"prompt_tokens":     0,
 					"completion_tokens": 0,
 					"total_tokens":      0,
 				},
 			},
 			"reasoning_content_signature": "",
 		}},
 		"business": map[string]any{
 			"product":  "jb_plugin",
 			"version":  c.cfg.CosyVersion,
 			"type":     "memory",
 			"id":       newUUID(),
 			"begin_at": time.Now().UnixMilli(),
 			"stage":    "start",
 			"name":     "memory_intent_recognition_" + requestID,
 		},
 	}
 	body, err := json.Marshal(payload)
 	return string(body), err
 }
 func (c *Client) headers(cred Credential, path string, body string) (map[string]string, error) {
 	if err := validateCredential(cred); err != nil {
 		return nil, err
 	}
 	date := strconv.FormatInt(time.Now().Unix(), 10)
 	authPayload := map[string]string{
 		"cosyVersion": c.cfg.CosyVersion,
 		"ideVersion":  "",
 		"info":        cred.EncryptUserInfo,
 		"requestId":   newUUID(),
 		"version":     "v1",
 	}
 	authPayloadBytes, err := json.Marshal(authPayload)
 	if err != nil {
 		return nil, err
 	}
 	payloadBase64 := base64.StdEncoding.EncodeToString(authPayloadBytes)
 	preimage := strings.Join([]string{
 		payloadBase64,
 		cred.CosyKey,
 		date,
 		body,
 		normalizePath(path),
 	}, "\n")
 	signature := md5.Sum([]byte(preimage))
 	return map[string]string{
 		"Authorization":     fmt.Sprintf("Bearer COSY.%s.%x", payloadBase64, signature),
 		"Content-Type":      "application/json",
 		"Appcode":           "cosy",
 		"Cosy-Date":         date,
 		"Cosy-Key":          cred.CosyKey,
 		"Cosy-Machineid":    cred.MachineID,
 		"Cosy-User":         cred.UserID,
 		"Cosy-Clientip":     "198.18.0.1",
 		"Cosy-Clienttype":   "2",
 		"Cosy-Machineos":    "x86_64_windows",
 		"Cosy-Machinetoken": "",
 		"Cosy-Machinetype":  "",
 		"Cosy-Version":      c.cfg.CosyVersion,
 		"Login-Version":     "v2",
 		"User-Agent":        "lingma-ipc-proxy/remote",
 		"Accept":            "text/event-stream",
 		"Cache-Control":     "no-cache",
 	}, nil
 }
 func normalizePath(path string) string {
 	return strings.TrimPrefix(path, "/algo")
 }
 type outerSSE struct {
 	Body       string `json:"body"`
 	StatusCode int    `json:"statusCodeValue"`
 }
 type innerSSE struct {
 	Choices []struct {
 		Delta struct {
 			Content string `json:"content"`
 		} `json:"delta"`
 	} `json:"choices"`
 }
 type sseEvent struct {
 	Content string
 	Done    bool
 }
 func scanSSE(reader io.Reader, onEvent func(sseEvent) error) error {
 	scanner := bufio.NewScanner(reader)
 	scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024)
 	for scanner.Scan() {
 		line := strings.TrimSpace(scanner.Text())
 		if line == "" || !strings.HasPrefix(line, "data:") {
 			continue
 		}
 		payload := strings.TrimSpace(strings.TrimPrefix(line, "data:"))
 		if payload == "[DONE]" {
 			return onEvent(sseEvent{Done: true})
 		}
 		event, ok, err := parseSSEPayload(payload)
 		if err != nil {
 			return err
 		}
 		if !ok {
 			continue
 		}
 		if err := onEvent(event); err != nil {
 			return err
 		}
 	}
 	return scanner.Err()
 }
 func parseSSEPayload(payload string) (sseEvent, bool, error) {
 	var outer outerSSE
 	if err := json.Unmarshal([]byte(payload), &outer); err != nil {
 		return sseEvent{}, false, err
 	}
 	if outer.StatusCode >= 400 {
 		return sseEvent{}, false, fmt.Errorf("remote sse status %d", outer.StatusCode)
 	}
 	if outer.Body == "" {
 		return sseEvent{}, false, nil
 	}
 	if outer.Body == "[DONE]" {
 		return sseEvent{Done: true}, true, nil
 	}
 	var inner innerSSE
 	if err := json.Unmarshal([]byte(outer.Body), &inner); err != nil {
 		return sseEvent{}, false, err
 	}
 	var builder strings.Builder
 	for _, choice := range inner.Choices {
 		builder.WriteString(choice.Delta.Content)
 	}
 	return sseEvent{Content: builder.String()}, true, nil
 }
 func candidateConfigFiles() []string {
 	home, err := os.UserHomeDir()
 	if err != nil {
 		return nil
 	}
 	return []string{
 		filepath.Join(home, ".lingma", "extension", "server", "config.json"),
 		filepath.Join(home, ".lingma", "extension", "local", "config.json"),
 		filepath.Join(home, ".lingma", "bin", "config.json"),
 		filepath.Join(home, ".config", "lingma-ipc-proxy", "config.json"),
 	}
 }
 func readBaseURLHint(path string) string {
 	body, err := os.ReadFile(path)
 	if err != nil {
 		return ""
 	}
 	var value any
 	if err := json.Unmarshal(body, &value); err != nil {
 		text := string(body)
 		if strings.Contains(text, "lingma.alibabacloud.com") {
 			return DefaultBaseURL
 		}
 		return ""
 	}
 	return findBaseURL(value)
 }
 func findBaseURL(value any) string {
 	switch typed := value.(type) {
 	case map[string]any:
 		for key, item := range typed {
 			lower := strings.ToLower(key)
 			if strings.Contains(lower, "base") || strings.Contains(lower, "domain") || strings.Contains(lower, "url") {
 				if text, ok := item.(string); ok && strings.HasPrefix(strings.TrimSpace(text), "http") && strings.Contains(text, "lingma") {
 					return strings.TrimSpace(text)
 				}
 			}
 			if nested := findBaseURL(item); nested != "" {
 				return nested
 			}
 		}
 	case []any:
 		for _, item := range typed {
 			if nested := findBaseURL(item); nested != "" {
 				return nested
 			}
 		}
 	}
 	return ""
 }
 func estimateTokens(text string) int {
 	text = strings.TrimSpace(text)
 	if text == "" {
 		return 0
 	}
 	return len([]rune(text)) / 4
 }
 func truncate(value string, max int) string {
 	value = strings.TrimSpace(value)
 	if len(value) <= max {
 		return value
 	}
 	return value[:max] + "... [truncated]"
 }
 func expandHome(path string) string {
 	if strings.HasPrefix(path, "~/") {
 		if home, err := os.UserHomeDir(); err == nil {
 			return filepath.Join(home, strings.TrimPrefix(path, "~/"))
 		}
 	}
 	return path
 }
 func valueOr(value string, fallback string) string {
 	if strings.TrimSpace(value) != "" {
 		return value
 	}
 	return fallback
 }
 var hexCounter uint64
--- a/internal/remote/credentials.go
+++ b/internal/remote/credentials.go
@@ -0,0 +1,205 @@
 package remote
 import (
 	"crypto/aes"
 	"crypto/cipher"
 	"encoding/base64"
 	"encoding/json"
 	"errors"
 	"fmt"
 	"os"
 	"path/filepath"
 	"strconv"
 	"strings"
 	"time"
 )
 type Credential struct {
 	CosyKey         string
 	EncryptUserInfo string
 	UserID          string
 	MachineID       string
 	Source          string
 	TokenExpireTime int64
 }
 type storedCredentialFile struct {
 	Source          string `json:"source"`
 	TokenExpireTime string `json:"token_expire_time"`
 	Auth            struct {
 		CosyKey         string `json:"cosy_key"`
 		EncryptUserInfo string `json:"encrypt_user_info"`
 		UserID          string `json:"user_id"`
 		MachineID       string `json:"machine_id"`
 	} `json:"auth"`
 }
 func LoadCredential(authFile string) (Credential, error) {
 	if path := strings.TrimSpace(authFile); path != "" {
 		return loadCredentialFile(expandHome(path))
 	}
 	return importLingmaCacheCredential()
 }
 func loadCredentialFile(path string) (Credential, error) {
 	body, err := os.ReadFile(path)
 	if err != nil {
 		return Credential{}, fmt.Errorf("read remote auth file: %w", err)
 	}
 	var stored storedCredentialFile
 	if err := json.Unmarshal(body, &stored); err != nil {
 		return Credential{}, fmt.Errorf("parse remote auth file: %w", err)
 	}
 	cred := Credential{
 		CosyKey:         stored.Auth.CosyKey,
 		EncryptUserInfo: stored.Auth.EncryptUserInfo,
 		UserID:          stored.Auth.UserID,
 		MachineID:       stored.Auth.MachineID,
 		Source:          valueOr(stored.Source, path),
 		TokenExpireTime: parseExpire(stored.TokenExpireTime),
 	}
 	return cred, validateCredential(cred)
 }
 func importLingmaCacheCredential() (Credential, error) {
 	home, err := os.UserHomeDir()
 	if err != nil {
 		return Credential{}, err
 	}
 	lingmaDir := filepath.Join(home, ".lingma")
 	machineID, err := loadMachineID(lingmaDir)
 	if err != nil {
 		return Credential{}, err
 	}
 	encrypted, err := os.ReadFile(filepath.Join(lingmaDir, "cache", "user"))
 	if err != nil {
 		return Credential{}, fmt.Errorf("read ~/.lingma/cache/user: %w", err)
 	}
 	ciphertext, err := base64.StdEncoding.DecodeString(strings.TrimSpace(string(encrypted)))
 	if err != nil {
 		return Credential{}, fmt.Errorf("decode ~/.lingma/cache/user: %w", err)
 	}
 	plaintext, err := decryptCacheUser(machineID, ciphertext)
 	if err != nil {
 		return Credential{}, err
 	}
 	var payload struct {
 		Key             string `json:"key"`
 		EncryptUserInfo string `json:"encrypt_user_info"`
 		UserID          string `json:"uid"`
 		ExpireTime      any    `json:"expire_time"`
 	}
 	if err := json.Unmarshal(plaintext, &payload); err != nil {
 		return Credential{}, fmt.Errorf("parse ~/.lingma/cache/user: %w", err)
 	}
 	cred := Credential{
 		CosyKey:         payload.Key,
 		EncryptUserInfo: payload.EncryptUserInfo,
 		UserID:          payload.UserID,
 		MachineID:       machineID,
 		Source:          "~/.lingma/cache/user",
 		TokenExpireTime: parseExpireAny(payload.ExpireTime),
 	}
 	return cred, validateCredential(cred)
 }
 func loadMachineID(lingmaDir string) (string, error) {
 	if body, err := os.ReadFile(filepath.Join(lingmaDir, "cache", "id")); err == nil {
 		if value := strings.TrimSpace(string(body)); value != "" {
 			return value, nil
 		}
 	}
 	logBody, err := os.ReadFile(filepath.Join(lingmaDir, "logs", "lingma.log"))
 	if err != nil {
 		return "", fmt.Errorf("remote credential requires ~/.lingma/cache/id or lingma.log machine id: %w", err)
 	}
 	markers := []string{"using machine id from file:", "machine id:"}
 	text := string(logBody)
 	for _, marker := range markers {
 		index := strings.LastIndex(strings.ToLower(text), marker)
 		if index < 0 {
 			continue
 		}
 		line := text[index+len(marker):]
 		if newline := strings.IndexByte(line, '\n'); newline >= 0 {
 			line = line[:newline]
 		}
 		if value := strings.TrimSpace(line); value != "" {
 			return value, nil
 		}
 	}
 	return "", errors.New("machine id not found in ~/.lingma cache")
 }
 func decryptCacheUser(machineID string, ciphertext []byte) ([]byte, error) {
 	if len(machineID) < aes.BlockSize {
 		return nil, errors.New("machine id too short for cache decryption")
 	}
 	if len(ciphertext) == 0 || len(ciphertext)%aes.BlockSize != 0 {
 		return nil, errors.New("invalid cache/user ciphertext size")
 	}
 	key := []byte(machineID[:aes.BlockSize])
 	block, err := aes.NewCipher(key)
 	if err != nil {
 		return nil, err
 	}
 	plaintext := make([]byte, len(ciphertext))
 	cipher.NewCBCDecrypter(block, key).CryptBlocks(plaintext, ciphertext)
 	return unpadPKCS7(plaintext)
 }
 func unpadPKCS7(data []byte) ([]byte, error) {
 	if len(data) == 0 {
 		return nil, errors.New("empty plaintext")
 	}
 	padLen := int(data[len(data)-1])
 	if padLen <= 0 || padLen > aes.BlockSize || padLen > len(data) {
 		return nil, errors.New("invalid cache/user padding")
 	}
 	for _, b := range data[len(data)-padLen:] {
 		if int(b) != padLen {
 			return nil, errors.New("invalid cache/user padding bytes")
 		}
 	}
 	return data[:len(data)-padLen], nil
 }
 func validateCredential(cred Credential) error {
 	if strings.TrimSpace(cred.CosyKey) == "" {
 		return errors.New("remote credential missing cosy_key")
 	}
 	if strings.TrimSpace(cred.EncryptUserInfo) == "" {
 		return errors.New("remote credential missing encrypt_user_info")
 	}
 	if strings.TrimSpace(cred.UserID) == "" {
 		return errors.New("remote credential missing user_id")
 	}
 	if strings.TrimSpace(cred.MachineID) == "" {
 		return errors.New("remote credential missing machine_id")
 	}
 	return nil
 }
 func parseExpire(value string) int64 {
 	parsed, _ := strconv.ParseInt(strings.TrimSpace(value), 10, 64)
 	return parsed
 }
 func parseExpireAny(value any) int64 {
 	switch typed := value.(type) {
 	case string:
 		return parseExpire(typed)
 	case float64:
 		return int64(typed)
 	case int64:
 		return typed
 	case int:
 		return int64(typed)
 	default:
 		return 0
 	}
 }
 func IsExpired(cred Credential, margin time.Duration) bool {
 	return cred.TokenExpireTime > 0 && time.Now().Add(margin).UnixMilli() > cred.TokenExpireTime
 }
--- a/internal/remote/id.go
+++ b/internal/remote/id.go
@@ -0,0 +1,28 @@
 package remote
 import (
 	"crypto/rand"
 	"encoding/hex"
 	"fmt"
 	"sync/atomic"
 	"time"
 )
 func newUUID() string {
 	var data [16]byte
 	if _, err := rand.Read(data[:]); err != nil {
 		return fmt.Sprintf("fallback-%d", time.Now().UnixNano())
 	}
 	data[6] = (data[6] & 0x0f) | 0x40
 	data[8] = (data[8] & 0x3f) | 0x80
 	return fmt.Sprintf("%x-%x-%x-%x-%x", data[0:4], data[4:6], data[6:8], data[8:10], data[10:16])
 }
 func newHexID() string {
 	var data [16]byte
 	if _, err := rand.Read(data[:]); err != nil {
 		seq := atomic.AddUint64(&hexCounter, 1)
 		return fmt.Sprintf("fallback%x%x", time.Now().UnixNano(), seq)
 	}
 	return hex.EncodeToString(data[:])
 }
--- a/internal/service/service.go
+++ b/internal/service/service.go
@@ -15,9 +15,17 @@ import (
 	"time"
 	"lingma-ipc-proxy/internal/lingmaipc"
 	"lingma-ipc-proxy/internal/remote"
 	"lingma-ipc-proxy/internal/toolemulation"
 )
 type BackendMode string
 const (
 	BackendIPC    BackendMode = "ipc"
 	BackendRemote BackendMode = "remote"
 )
 type SessionMode string
 const (
@@ -29,9 +37,13 @@ const (
 type Config struct {
 	Host            string
 	Port            int
 	Backend         BackendMode
 	Transport       lingmaipc.Transport
 	Pipe            string
 	WebSocketURL    string
 	RemoteBaseURL   string
 	RemoteAuthFile  string
 	RemoteVersion   string
 	Cwd             string
 	CurrentFilePath string
 	Mode            string
@@ -129,6 +141,7 @@ type Service struct {
 	stickySessionID string
 	stickyModelID   string
 	modelMap        map[string]string // official name -> internal id
 	remoteClient    *remote.Client
 }
 type promptRunResult struct {
@@ -158,6 +171,9 @@ func New(cfg Config) *Service {
 	if cfg.Transport == "" {
 		cfg.Transport = lingmaipc.TransportAuto
 	}
 	if cfg.Backend == "" {
 		cfg.Backend = BackendIPC
 	}
 	if cfg.SessionMode == "" {
 		cfg.SessionMode = SessionModeAuto
 	}
@@ -177,6 +193,9 @@ func (s *Service) DefaultModel() string {
 }
 func (s *Service) Warmup(ctx context.Context) error {
 	if s.backend() == BackendRemote {
 		return s.remoteClientLocked().Warmup(ctx)
 	}
 	_, err := s.ensureConnected(ctx)
 	return err
 }
@@ -190,6 +209,14 @@ func (s *Service) Close() error {
 func (s *Service) State() State {
 	s.mu.Lock()
 	defer s.mu.Unlock()
 	if s.cfg.Backend == BackendRemote {
 		return State{
 			Endpoint:    remote.ResolveBaseURL(s.cfg.RemoteBaseURL),
 			Transport:   "remote",
 			Connected:   s.remoteClient != nil,
 			SessionMode: s.cfg.SessionMode,
 		}
 	}
 	return State{
 		PipePath:        s.pipePath,
 		Endpoint:        s.endpoint,
@@ -201,6 +228,29 @@ func (s *Service) State() State {
 }
 func (s *Service) ListModels(ctx context.Context) ([]Model, error) {
 	if s.backend() == BackendRemote {
 		models, err := s.remoteClientLocked().ListModels(ctx)
 		if err != nil {
 			return nil, err
 		}
 		out := make([]Model, 0, len(models)+1)
 		seen := map[string]bool{"Auto": true}
 		out = append(out, Model{ID: "Auto", Name: "Auto"})
 		for _, model := range models {
 			id := strings.TrimSpace(model.Key)
 			if id == "" || seen[id] {
 				continue
 			}
 			seen[id] = true
 			name := strings.TrimSpace(model.DisplayName)
 			if name == "" {
 				name = id
 			}
 			out = append(out, Model{ID: id, Name: name})
 		}
 		return out, nil
 	}
 	ipcClient, err := s.ensureConnected(ctx)
 	if err != nil {
 		return nil, err
@@ -229,6 +279,9 @@ func (s *Service) ListModels(ctx context.Context) ([]Model, error) {
 }
 func (s *Service) Generate(ctx context.Context, req ChatRequest) (*ChatResult, error) {
 	if s.backend() == BackendRemote {
 		return s.generateRemote(ctx, req, nil)
 	}
 	return s.generateWithReconnect(ctx, req, nil)
 }
@@ -237,7 +290,11 @@ func (s *Service) GenerateStream(ctx context.Context, req ChatRequest) (<-chan S
 	done := make(chan StreamResult, 1)
 	go func() {
-		result, err := s.generateWithReconnect(ctx, req, func(delta string) {
+		generate := s.generateWithReconnect
 		if s.backend() == BackendRemote {
 			generate = s.generateRemote
 		}
 		result, err := generate(ctx, req, func(delta string) {
 			if delta == "" {
 				return
 			}
@@ -269,6 +326,67 @@ func (s *Service) generateWithReconnect(
 	return s.generateLocked(ctx, req, onDelta)
 }
 func (s *Service) generateRemote(
 	ctx context.Context,
 	req ChatRequest,
 	onDelta func(string),
 ) (*ChatResult, error) {
 	requestCtx, cancel := context.WithTimeout(ctx, s.cfg.Timeout)
 	defer cancel()
 	if strings.TrimSpace(req.Model) == "" {
 		req.Model = s.DefaultModel()
 	}
 	prompt, err := buildLingmaPrompt(req, SessionModeFresh)
 	if err != nil {
 		return nil, err
 	}
 	if strings.TrimSpace(prompt) == "" {
 		return nil, errors.New("empty user message")
 	}
 	client := s.remoteClientLocked()
 	remoteResult, err := client.Chat(requestCtx, remote.ChatRequest{
 		Model:       req.Model,
 		Prompt:      prompt,
 		Stream:      onDelta != nil,
 		Temperature: req.Temperature,
 	}, onDelta)
 	if err != nil {
 		return nil, err
 	}
 	result := &ChatResult{
 		Text:             remoteResult.Text,
 		Model:            valueOr(strings.TrimSpace(req.Model), "lingma"),
 		InputTokens:      remoteResult.InputTokens,
 		OutputTokens:     remoteResult.OutputTokens,
 		SessionID:        "",
 		RequestID:        remoteResult.RequestID,
 		FinishReason:     "stop",
 		StopReason:       "stop",
 		Endpoint:         remote.ResolveBaseURL(s.cfg.RemoteBaseURL),
 		Transport:        "remote",
 		EffectiveSession: SessionModeFresh,
 	}
 	s.applyToolEmulation(requestCtx, req, prompt, result, onDelta, func(hintPrompt string) (string, int, error) {
 		retryResult, retryErr := client.Chat(requestCtx, remote.ChatRequest{
 			Model:       req.Model,
 			Prompt:      hintPrompt,
 			Stream:      onDelta != nil,
 			Temperature: req.Temperature,
 		}, onDelta)
 		if retryErr != nil {
 			return "", 0, retryErr
 		}
 		if retryResult == nil {
 			return "", 0, nil
 		}
 		return retryResult.Text, retryResult.OutputTokens, nil
 	})
 	return result, nil
 }
 func (s *Service) generateLocked(
 	ctx context.Context,
 	req ChatRequest,
@@ -361,14 +479,8 @@ func (s *Service) generateLocked(
 	result = s.buildChatResult(req, sessionID, requestID, prompt, runResult, effectiveMode)
-	if len(req.Tools) > 0 {
+	s.applyToolEmulation(requestCtx, req, prompt, result, onDelta, func(hintPrompt string) (string, int, error) {
-		calls, remaining, parseErr := toolemulation.ParseActionBlocks(result.Text, req.Tools, toolemulation.Config{})
+		retryRequestID := lingmaipc.CreateRequestID("serve-tool")
 		if parseErr == nil && len(calls) > 0 {
 			result.Text = remaining
 			result.ToolCalls = calls
 		} else if shouldRetryTooling(req.ToolChoice, result.Text) {
 			hintPrompt := prompt + "\n\n" + toolemulation.ForceToolingPrompt(req.ToolChoice)
 			retryRequestID := lingmaipc.CreateRequestID("retry")
 		retryMeta := lingmaipc.CreateMeta(lingmaipc.MetaOptions{
 			RequestID:       retryRequestID,
 			Mode:            s.cfg.Mode,
@@ -377,19 +489,83 @@ func (s *Service) generateLocked(
 			CurrentFilePath: s.cfg.CurrentFilePath,
 			EnabledMCP:      []any{},
 		})
-			retryResult, retryErr := s.runPromptLocked(requestCtx, ipcClient, sessionID, hintPrompt, nil, retryRequestID, retryMeta, onDelta)
+		retryRunResult, retryErr := s.runPromptLocked(requestCtx, ipcClient, sessionID, hintPrompt, images, retryRequestID, retryMeta, onDelta)
-			if retryErr == nil && retryResult != nil {
+		if retryErr != nil {
-				retryCalls, retryRemaining, retryParseErr := toolemulation.ParseActionBlocks(retryResult.AssistantText, req.Tools, toolemulation.Config{})
+			return "", 0, retryErr
 		}
 		return retryRunResult.AssistantText, estimateTokens(retryRunResult.AssistantText), nil
 	})
 	return result, nil
 }
 func (s *Service) backend() BackendMode {
 	s.mu.Lock()
 	defer s.mu.Unlock()
 	if s.cfg.Backend == "" {
 		return BackendIPC
 	}
 	return s.cfg.Backend
 }
 func (s *Service) remoteClientLocked() *remote.Client {
 	s.mu.Lock()
 	defer s.mu.Unlock()
 	if s.remoteClient == nil {
 		s.remoteClient = remote.New(remote.Config{
 			BaseURL:     s.cfg.RemoteBaseURL,
 			AuthFile:    s.cfg.RemoteAuthFile,
 			CosyVersion: s.cfg.RemoteVersion,
 			Timeout:     s.cfg.Timeout,
 		})
 	}
 	return s.remoteClient
 }
 func (s *Service) applyToolEmulation(
 	ctx context.Context,
 	req ChatRequest,
 	prompt string,
 	result *ChatResult,
 	onDelta func(string),
 	retry func(string) (string, int, error),
 ) {
 	if len(req.Tools) > 0 {
 		calls, remaining, parseErr := toolemulation.ParseActionBlocks(result.Text, req.Tools, toolemulation.Config{})
 		if parseErr == nil && len(calls) > 0 {
 			result.Text = remaining
 			result.ToolCalls = calls
 		} else if shouldRetryTooling(req.ToolChoice, result.Text) {
 			hintPrompt := prompt + "\n\n" + toolemulation.ForceToolingPrompt(req.ToolChoice)
 			retryText := ""
 			if retry != nil {
 				text, outputTokens, retryErr := retry(hintPrompt)
 				if retryErr == nil {
 					retryText = text
 					if outputTokens > 0 {
 						result.OutputTokens = outputTokens
 					}
 				}
 			}
 			if retryText != "" {
 				retryCalls, retryRemaining, retryParseErr := toolemulation.ParseActionBlocks(retryText, req.Tools, toolemulation.Config{})
 				if retryParseErr == nil && len(retryCalls) > 0 {
 					result.Text = retryRemaining
 					result.ToolCalls = retryCalls
-					result.OutputTokens = estimateTokens(retryResult.AssistantText)
+					result.OutputTokens = estimateTokens(retryText)
 				} else if inferred := toolemulation.InferToolCallsFromText(retryText, req.Tools); len(inferred) > 0 {
 					result.Text = ""
 					result.ToolCalls = inferred
 					result.OutputTokens = estimateTokens(retryText)
 				}
 			}
 			if len(result.ToolCalls) == 0 {
 				if inferred := toolemulation.InferToolCallsFromText(result.Text, req.Tools); len(inferred) > 0 {
 					result.Text = ""
 					result.ToolCalls = inferred
 				}
 			}
 		}
 	}
 	return result, nil
 }
 func shouldRetryTooling(choice toolemulation.ToolChoice, text string) bool {
--- a/internal/toolemulation/toolemulation.go
+++ b/internal/toolemulation/toolemulation.go
@@ -402,6 +402,7 @@ func ForceToolingPrompt(choice ToolChoice) string {
 		"You must respond with at least one valid action block now. " +
 		"Select the single most appropriate available tool for the user request. " +
 		"The proxy tools from the previous system message are available even if native Lingma tools are not. " +
 		"If the user asked to inspect the local computer, run a shell command, read files, search files, or check current data, call the matching tool immediately. " +
 		"Do not explain. Do not say tools are unavailable. Output the action block directly."
 	if choice.Mode == "tool" && strings.TrimSpace(choice.Name) != "" {
 		prompt += " You must call \"" + strings.TrimSpace(choice.Name) + "\"."
@@ -420,12 +421,28 @@ func LooksLikeRefusal(text string) bool {
 		"tools are unavailable",
 		"cannot call tools",
 		"can't call tools",
 		"cannot execute",
 		"can't execute",
 		"cannot run commands",
 		"can't run commands",
 		"cannot access your computer",
 		"can't access your computer",
 		"cannot access your local machine",
 		"can't access your local machine",
 		"没有可用的工具",
 		"无法调用",
 		"工具不可用",
 		"不能调用工具",
 		"我不具备",
 		"受限于当前环境",
 		"当前环境限制",
 		"无法直接执行",
 		"不能直接执行",
 		"无法执行系统命令",
 		"不能执行系统命令",
 		"无法访问你的电脑",
 		"无法访问本机",
 		"没有权限访问",
 	}
 	for _, needle := range needles {
 		if strings.Contains(t, needle) {
@@ -455,9 +472,16 @@ func LooksLikeMissedToolUse(text string) bool {
 		"i will search",
 		"please run",
 		"manually run",
 		"run the following command",
 		"you can run",
 		"you could run",
 		"paste the file",
 		"无法直接访问",
 		"无法直接查询",
 		"无法直接查看",
 		"无法直接执行",
 		"不能直接执行",
 		"无法执行系统命令",
 		"没有可用",
 		"no tools available",
 		"native lingma tools",
@@ -470,6 +494,10 @@ func LooksLikeMissedToolUse(text string) bool {
 		"查看文件",
 		"查询天气",
 		"手动运行",
 		"你可以在终端中运行",
 		"你可以运行",
 		"请你运行",
 		"请手动运行",
 		"粘贴给我",
 		"切换到计划模式",
 	}
@@ -481,6 +509,60 @@ func LooksLikeMissedToolUse(text string) bool {
 	return false
 }
 func InferToolCallsFromText(text string, tools []ToolDef) []ToolCall {
 	if !LooksLikeRefusal(text) && !LooksLikeMissedToolUse(text) {
 		return nil
 	}
 	commandTool, ok := selectCommandTool(tools)
 	if !ok {
 		return nil
 	}
 	if command := inferLocalCommand(text); command != "" {
 		return []ToolCall{{
 			ID:   newCallID(),
 			Name: commandTool.Name,
 			Arguments: filterArgsBySchema(map[string]any{
 				"command": command,
 			}, commandTool.InputSchema),
 		}}
 	}
 	return nil
 }
 func selectCommandTool(tools []ToolDef) (ToolDef, bool) {
 	for _, tool := range tools {
 		name := strings.ToLower(strings.TrimSpace(tool.Name))
 		if name == "bash" || name == "terminal" || name == "shell" || strings.Contains(name, "bash") || strings.Contains(name, "terminal") || strings.Contains(name, "shell") {
 			if toolHasCommandArg(tool.InputSchema) {
 				return tool, true
 			}
 		}
 	}
 	for _, tool := range tools {
 		if toolHasCommandArg(tool.InputSchema) {
 			return tool, true
 		}
 	}
 	return ToolDef{}, false
 }
 func toolHasCommandArg(schema map[string]any) bool {
 	props, _ := schema["properties"].(map[string]any)
 	_, ok := props["command"]
 	return ok
 }
 func inferLocalCommand(text string) string {
 	t := strings.ToLower(strings.TrimSpace(text))
 	switch {
 	case strings.Contains(t, "内存") || strings.Contains(t, "memory") || strings.Contains(t, "physmem") || strings.Contains(t, "vm_stat"):
 		return `vm_stat && echo "---" && memory_pressure && echo "---" && top -l 1 -s 0 | head -n 15`
 	}
 	return ""
 }
 func ParseActionBlocks(text string, tools []ToolDef, cfg Config) ([]ToolCall, string, error) {
 	if strings.TrimSpace(text) == "" {
 		return nil, "", nil
--- a/internal/toolemulation/toolemulation_test.go
+++ b/internal/toolemulation/toolemulation_test.go
@@ -9,8 +9,11 @@ func TestLooksLikeMissedToolUseDetectsLocalToolAvoidance(t *testing.T) {
 	cases := []string{
 		"我需要使用终端工具来查看内存。",
 		"由于当前环境限制，请手动运行 top。",
 		"当前环境限制，我无法直接执行系统命令查看你的内存占用。",
 		"你可以在终端中运行 top -l 1 | grep PhysMem。",
 		"I need to read the file first.",
 		"Let me use the web search tool.",
 		"You can run the following command in your terminal.",
 		"现在我需要切换到计划模式。",
 	}
 	for _, tc := range cases {
@@ -20,6 +23,42 @@ func TestLooksLikeMissedToolUseDetectsLocalToolAvoidance(t *testing.T) {
 	}
 }
 func TestLooksLikeRefusalDetectsLocalAccessRefusals(t *testing.T) {
 	cases := []string{
 		"当前环境限制，我无法直接执行系统命令查看你的内存占用。",
 		"我无法访问你的电脑或本机文件。",
 		"I cannot execute commands in your local machine.",
 		"I can't access your computer directly.",
 	}
 	for _, tc := range cases {
 		if !LooksLikeRefusal(tc) {
 			t.Fatalf("LooksLikeRefusal(%q) = false", tc)
 		}
 	}
 }
 func TestInferToolCallsFromTextConvertsMemoryRefusalToBash(t *testing.T) {
 	calls := InferToolCallsFromText("当前无法执行系统命令。你可以运行 vm_stat 查看内存占用。", []ToolDef{{
 		Name: "Bash",
 		InputSchema: map[string]any{
 			"properties": map[string]any{
 				"command": map[string]any{"type": "string"},
 			},
 			"required": []any{"command"},
 		},
 	}})
 	if len(calls) != 1 {
 		t.Fatalf("call count = %d", len(calls))
 	}
 	if calls[0].Name != "Bash" {
 		t.Fatalf("tool name = %q", calls[0].Name)
 	}
 	command, _ := calls[0].Arguments["command"].(string)
 	if !strings.Contains(command, "vm_stat") || !strings.Contains(command, "memory_pressure") {
 		t.Fatalf("unexpected command = %q", command)
 	}
 }
 func TestLooksLikeMissedToolUseIgnoresFinalAnswers(t *testing.T) {
 	text := "这个文件负责 HTTP API 路由和 OpenAI 兼容响应。"
 	if LooksLikeMissedToolUse(text) {