Release v1.4.3

2026-04-30 18:20:04 +08:00
parent a2f777a1a8
commit a02fd51c19
24 changed files with 1909 additions and 1176 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,22 @@
 # Changelog
 ## Unreleased
 - Nothing yet.
 ## v1.4.3 - 2026-04-30
 - Added remote API timeout fallback with a configurable model order. The default order is Kimi-K2.6, MiniMax-M2.7, Qwen3-Coder, Qwen3.6-Plus, Qwen3-Max, and Qwen3-Thinking.
 - Fallback only runs before any streaming bytes are sent and only uses models returned by the active `/v1/models` response.
 - Changed the default request timeout from 120 seconds to 300 seconds.
 - Added a desktop Settings switch and fallback model list editor.
 - Added persistent desktop app state for request history, app logs, and cumulative token usage.
 - Added a Dashboard token usage card and model-list specification chips for context window and capability summaries.
 - Added model display to the desktop request stream table and model-aware request search.
 - Fixed Dashboard "recent model" tracking so health/model-list requests no longer override the last real chat model.
 - Updated architecture documentation to cover the IPC and Remote API dual-backend design.
 - Disabled desktop Inspector and default context menu in production builds; local development can opt in with `LINGMA_DESKTOP_DEBUG=1`.
 ## v1.4.2 - 2026-04-30
 - Default backend changed to remote API mode for new CLI and desktop configurations.
--- a/README.md
+++ b/README.md
@@ -13,7 +13,7 @@ The proxy now supports two backend modes:
 ## Current Version
-The current desktop line is `v1.4.2`.
+The current desktop line is `v1.4.3`.
 See [CHANGELOG.md](./CHANGELOG.md) for release history.
@@ -326,6 +326,10 @@ The proxy only reports models actually exposed by your Lingma plugin. The table
 Default model when the client omits `model`: `kmodel` (`Kimi-K2.6` in the remote model list).
 Remote mode enables timeout fallback by default. On timeout, upstream 5xx/429, or network interruption, the proxy only switches models if no streaming bytes have been sent to the client yet. Fallback candidates are filtered against the actual `/v1/models` response, so unavailable models are skipped. Default order:
 `Kimi-K2.6 -> MiniMax-M2.7 -> Qwen3-Coder -> Qwen3.6-Plus -> Qwen3-Max -> Qwen3-Thinking`
 ## Configuration
 Default config file:
@@ -348,7 +352,16 @@ Example:
  "mode": "agent",
  "shell_type": "zsh",
  "session_mode": "auto",
-  "timeout": 120,
+  "timeout": 300,
  "remote_fallback_enabled": true,
  "remote_fallback_models": [
    "kmodel",
    "mmodel",
    "dashscope_qwen3_coder",
    "dashscope_qmodel",
    "dashscope_qwen_max_latest",
    "dashscope_qwen_plus_20250428_thinking"
  ],
  "cwd": "/Users/you/project",
  "current_file_path": ""
 }
--- a/README.zh-CN.md
+++ b/README.zh-CN.md
@@ -16,7 +16,7 @@
 ## 当前版本
-当前桌面端版本线：`v1.4.2`
+当前桌面端版本线：`v1.4.3`
 版本更新记录见 [CHANGELOG.md](./CHANGELOG.md)。
@@ -408,6 +408,10 @@ export ANTHROPIC_API_KEY="any"
 当客户端请求没有携带 `model` 字段时，代理默认使用：`kmodel`（远端模型列表里的 Kimi-K2.6）。
 远端模式默认开启超时兜底。遇到请求超时、上游 5xx/429 或网络中断时，代理只会在尚未向客户端输出任何流式内容的情况下切换模型。兜底候选会先和实际 `/v1/models` 返回结果求交集，不存在或当前账号不可用的模型会自动跳过。默认顺序：
 `Kimi-K2.6 -> MiniMax-M2.7 -> Qwen3-Coder -> Qwen3.6-Plus -> Qwen3-Max -> Qwen3-Thinking`
 ## 配置文件
 默认读取：
@@ -430,7 +434,16 @@ export ANTHROPIC_API_KEY="any"
  "mode": "agent",
  "shell_type": "zsh",
  "session_mode": "auto",
-  "timeout": 120,
+  "timeout": 300,
  "remote_fallback_enabled": true,
  "remote_fallback_models": [
    "kmodel",
    "mmodel",
    "dashscope_qwen3_coder",
    "dashscope_qmodel",
    "dashscope_qwen_max_latest",
    "dashscope_qwen_plus_20250428_thinking"
  ],
  "cwd": "/Users/tiancheng/project",
  "current_file_path": ""
 }
--- a/cmd/lingma-ipc-proxy/main.go
+++ b/cmd/lingma-ipc-proxy/main.go
@@ -38,6 +38,8 @@ type fileConfig struct {
 	ShellType             string   `json:"shell_type"`
 	SessionMode           string   `json:"session_mode"`
 	TimeoutSeconds        int      `json:"timeout"`
 	RemoteFallbackEnabled *bool    `json:"remote_fallback_enabled"`
 	RemoteFallbackModels  []string `json:"remote_fallback_models"`
 }
 func main() {
@@ -98,7 +100,9 @@ func loadConfig() (service.Config, string) {
 		Model:                 "kmodel",
 		ShellType:             defaultShellType(),
 		SessionMode:           service.SessionModeAuto,
-		Timeout:     120 * time.Second,
+		Timeout:               300 * time.Second,
 		RemoteFallbackEnabled: true,
 		RemoteFallbackModels:  service.DefaultRemoteFallbackModels(),
 	}
 	configPath, configLoaded := resolveConfigPath()
@@ -127,6 +131,8 @@ func loadConfig() (service.Config, string) {
 	model := flag.String("model", cfg.Model, "Default Lingma model when API request omits model")
 	shellType := flag.String("shell-type", cfg.ShellType, "Shell type sent through ACP meta")
 	timeoutSeconds := flag.Int("timeout", int(cfg.Timeout/time.Second), "Per-request timeout in seconds")
 	remoteFallbackEnabled := flag.Bool("remote-fallback", cfg.RemoteFallbackEnabled, "Enable remote timeout/5xx fallback to the next available model")
 	remoteFallbackModels := flag.String("remote-fallback-models", strings.Join(cfg.RemoteFallbackModels, ","), "Comma-separated remote fallback model IDs")
 	sessionMode := flag.String("session-mode", string(cfg.SessionMode), "Session mode: auto, fresh, reuse")
 	config := flag.String("config", valueOr(configPath, filepath.Join(currentDir(), "lingma-ipc-proxy.json")), "Path to JSON config file")
 	flag.Parse()
@@ -151,6 +157,8 @@ func loadConfig() (service.Config, string) {
 	cfg.ShellType = strings.TrimSpace(*shellType)
 	cfg.SessionMode = parsedSessionMode
 	cfg.Timeout = time.Duration(*timeoutSeconds) * time.Second
 	cfg.RemoteFallbackEnabled = *remoteFallbackEnabled
 	cfg.RemoteFallbackModels = splitCSV(*remoteFallbackModels)
 	if configLoaded {
 		configPath = finalConfigPath
@@ -236,6 +244,12 @@ func overlayFileConfig(dst *service.Config, src fileConfig) {
 	if src.TimeoutSeconds > 0 {
 		dst.Timeout = time.Duration(src.TimeoutSeconds) * time.Second
 	}
 	if src.RemoteFallbackEnabled != nil {
 		dst.RemoteFallbackEnabled = *src.RemoteFallbackEnabled
 	}
 	if len(src.RemoteFallbackModels) > 0 {
 		dst.RemoteFallbackModels = cleanStringSlice(src.RemoteFallbackModels)
 	}
 }
 func overlayEnvConfig(dst *service.Config) {
@@ -287,6 +301,12 @@ func overlayEnvConfig(dst *service.Config) {
 	if value := envInt("LINGMA_PROXY_TIMEOUT_SECONDS", 0); value > 0 {
 		dst.Timeout = time.Duration(value) * time.Second
 	}
 	if value, ok := envBool("LINGMA_REMOTE_FALLBACK_ENABLED"); ok {
 		dst.RemoteFallbackEnabled = value
 	}
 	if value := strings.TrimSpace(os.Getenv("LINGMA_REMOTE_FALLBACK_MODELS")); value != "" {
 		dst.RemoteFallbackModels = splitCSV(value)
 	}
 }
 func parseSessionMode(value string) service.SessionMode {
@@ -349,6 +369,36 @@ func envInt(key string, fallback int) int {
 	return fallback
 }
 func envBool(key string) (bool, bool) {
 	value := strings.ToLower(strings.TrimSpace(os.Getenv(key)))
 	switch value {
 	case "1", "true", "yes", "on":
 		return true, true
 	case "0", "false", "no", "off":
 		return false, true
 	default:
 		return false, false
 	}
 }
 func splitCSV(value string) []string {
 	return cleanStringSlice(strings.Split(value, ","))
 }
 func cleanStringSlice(values []string) []string {
 	out := make([]string, 0, len(values))
 	seen := map[string]bool{}
 	for _, value := range values {
 		item := strings.TrimSpace(value)
 		if item == "" || seen[item] {
 			continue
 		}
 		seen[item] = true
 		out = append(out, item)
 	}
 	return out
 }
 func currentDir() string {
 	if wd, err := os.Getwd(); err == nil {
 		return wd
--- a/config.example.json
+++ b/config.example.json
@@ -6,7 +6,16 @@
  "mode": "chat",
  "model": "kmodel",
  "session_mode": "auto",
-  "timeout": 120,
+  "timeout": 300,
  "remote_fallback_enabled": true,
  "remote_fallback_models": [
    "kmodel",
    "mmodel",
    "dashscope_qwen3_coder",
    "dashscope_qmodel",
    "dashscope_qwen_max_latest",
    "dashscope_qwen_plus_20250428_thinking"
  ],
  "cwd": "C:/Workspace/Personal/lingma-ipc-proxy",
  "shell_type": "powershell",
  "current_file_path": "",
--- a/desktop/app.go
+++ b/desktop/app.go
@@ -32,10 +32,30 @@ type RequestRecord struct {
 	StatusCode   int    `json:"statusCode"`
 	Duration     string `json:"duration"`
 	Size         string `json:"size,omitempty"`
 	InputTokens  int    `json:"inputTokens,omitempty"`
 	OutputTokens int    `json:"outputTokens,omitempty"`
 	TotalTokens  int    `json:"totalTokens,omitempty"`
 	ReqBody      string `json:"reqBody,omitempty"`
 	RespBody     string `json:"respBody,omitempty"`
 }
 type AppLog struct {
 	Time    string `json:"time"`
 	Level   string `json:"level"`
 	Message string `json:"message"`
 }
 type TokenStats struct {
 	TotalRequests   int            `json:"totalRequests"`
 	SuccessRequests int            `json:"successRequests"`
 	InputTokens     int            `json:"inputTokens"`
 	OutputTokens    int            `json:"outputTokens"`
 	TotalTokens     int            `json:"totalTokens"`
 	ByModel         map[string]int `json:"byModel,omitempty"`
 	LastModel       string         `json:"lastModel,omitempty"`
 	LastUpdated     string         `json:"lastUpdated,omitempty"`
 }
 type App struct {
 	ctx context.Context
@@ -49,6 +69,8 @@ type App struct {
 	quitHint  time.Time
 	models    []ModelInfo
 	requests  []RequestRecord
 	logs      []AppLog
 	stats     TokenStats
 }
 // ModelInfo represents a model returned by /v1/models
@@ -96,6 +118,9 @@ func NewApp() *App {
 func (a *App) startup(ctx context.Context) {
 	a.ctx = ctx
 	a.cfg = defaultConfig()
 	if err := a.loadAppState(); err != nil {
 		runtime.LogWarningf(a.ctx, "failed to load app state: %v", err)
 	}
 	// Auto-save default config on first run so users can find/edit it later
 	if err := a.saveConfig(a.cfg); err != nil {
@@ -208,10 +233,19 @@ func (a *App) forceQuit() {
 }
 func (a *App) emitLog(level string, message string) {
-	runtime.EventsEmit(a.ctx, "log", map[string]string{
+	entry := AppLog{
-		"level":   level,
+		Time:    time.Now().Format("15:04:05"),
-		"message": message,
+		Level:   level,
-	})
+		Message: message,
 	}
 	a.mu.Lock()
 	a.logs = append(a.logs, entry)
 	if len(a.logs) > 2000 {
 		a.logs = a.logs[len(a.logs)-2000:]
 	}
 	a.saveAppStateLocked()
 	a.mu.Unlock()
 	runtime.EventsEmit(a.ctx, "log", entry)
 }
 // GetStatus returns the current proxy status
@@ -347,6 +381,8 @@ func (a *App) saveConfig(cfg service.Config) error {
 		"shell_type":              cfg.ShellType,
 		"session_mode":            string(cfg.SessionMode),
 		"timeout":                 timeoutSec,
 		"remote_fallback_enabled": cfg.RemoteFallbackEnabled,
 		"remote_fallback_models":  cfg.RemoteFallbackModels,
 	}
 	data, err := json.MarshalIndent(fileCfg, "", "  ")
@@ -361,14 +397,16 @@ func (a *App) saveConfig(cfg service.Config) error {
 // StartProxy starts the lingma-ipc-proxy HTTP server
 func (a *App) StartProxy() error {
 	a.mu.Lock()
 	defer a.mu.Unlock()
 	if a.running {
 		a.mu.Unlock()
 		return fmt.Errorf("proxy already running")
 	}
 	addr := fmt.Sprintf("%s:%d", a.cfg.Host, a.cfg.Port)
-	svc := service.New(a.cfg)
+	cfg := a.cfg
 	a.mu.Unlock()
 	svc := service.New(cfg)
 	warmupCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
 	if err := svc.Warmup(warmupCtx); err != nil {
@@ -382,23 +420,32 @@ func (a *App) StartProxy() error {
 	server := httpapi.NewServer(addr, svc)
 	server.OnRequest = func(method, path string, statusCode int, duration time.Duration, reqBody, respBody string) {
-		a.mu.Lock()
+		inputTokens, outputTokens := extractTokenUsage(respBody)
-		a.requests = append(a.requests, RequestRecord{
+		model := extractRequestModel(reqBody)
 		record := RequestRecord{
 			Time:         time.Now().Format("15:04:05"),
 			Method:       method,
 			Path:         path,
-			Model:      extractRequestModel(reqBody),
+			Model:        model,
 			StatusCode:   statusCode,
 			Duration:     duration.Round(time.Millisecond).String(),
 			Size:         formatPayloadSize(len(reqBody) + len(respBody)),
 			InputTokens:  inputTokens,
 			OutputTokens: outputTokens,
 			TotalTokens:  inputTokens + outputTokens,
 			ReqBody:      reqBody,
 			RespBody:     respBody,
 		})
 		if len(a.requests) > 100 {
 			a.requests = a.requests[len(a.requests)-100:]
 		}
 		a.mu.Lock()
 		a.requests = append(a.requests, record)
 		if len(a.requests) > 2000 {
 			a.requests = a.requests[len(a.requests)-2000:]
 		}
 		a.accumulateTokenStatsLocked(record)
 		a.saveAppStateLocked()
 		a.mu.Unlock()
 		runtime.EventsEmit(a.ctx, "requests:updated", a.GetRequests())
 		runtime.EventsEmit(a.ctx, "usage:updated", a.GetTokenStats())
 	}
 	// Check if the port is available before claiming we're running
@@ -420,10 +467,16 @@ func (a *App) StartProxy() error {
 		}
 	}()
 	a.mu.Lock()
 	if a.running {
 		a.mu.Unlock()
 		return fmt.Errorf("proxy already running")
 	}
 	a.server = server
 	a.addr = addr
 	a.running = true
 	a.startedAt = time.Now()
 	a.mu.Unlock()
 	msg := fmt.Sprintf("Proxy started on http://%s", addr)
 	runtime.LogInfof(a.ctx, msg)
@@ -435,8 +488,24 @@ func (a *App) StartProxy() error {
 	return nil
 }
-// ClearLogs is a no-op backend helper (logs are kept in frontend memory)
+func (a *App) GetLogs() []AppLog {
-func (a *App) ClearLogs() {}
+	a.mu.RLock()
 	defer a.mu.RUnlock()
 	out := make([]AppLog, len(a.logs))
 	copy(out, a.logs)
 	for i, j := 0, len(out)-1; i < j; i, j = i+1, j-1 {
 		out[i], out[j] = out[j], out[i]
 	}
 	return out
 }
 func (a *App) ClearLogs() {
 	a.mu.Lock()
 	a.logs = nil
 	a.saveAppStateLocked()
 	a.mu.Unlock()
 	runtime.EventsEmit(a.ctx, "logs:updated", a.GetLogs())
 }
 // StopProxy stops the proxy server
 func (a *App) StopProxy() error {
@@ -493,10 +562,21 @@ func (a *App) GetRequests() []RequestRecord {
 func (a *App) ClearRequests() {
 	a.mu.Lock()
 	a.requests = nil
 	a.saveAppStateLocked()
 	a.mu.Unlock()
 	a.emitLog("info", "Request history cleared")
 }
 func (a *App) GetTokenStats() TokenStats {
 	a.mu.RLock()
 	defer a.mu.RUnlock()
 	stats := a.stats
 	if stats.ByModel != nil {
 		stats.ByModel = cloneIntMap(stats.ByModel)
 	}
 	return stats
 }
 // RefreshModels probes the running proxy for the latest model list.
 func (a *App) RefreshModels() ([]ModelInfo, error) {
 	a.mu.RLock()
@@ -614,6 +694,207 @@ func formatPayloadSize(bytes int) string {
 	return fmt.Sprintf("%d B", bytes)
 }
 type appStateFile struct {
 	Requests []RequestRecord `json:"requests"`
 	Logs     []AppLog        `json:"logs"`
 	Stats    TokenStats      `json:"stats"`
 }
 func (a *App) loadAppState() error {
 	path, err := appStatePath()
 	if err != nil {
 		return err
 	}
 	data, err := os.ReadFile(path)
 	if err != nil {
 		if os.IsNotExist(err) {
 			return nil
 		}
 		return err
 	}
 	var state appStateFile
 	if err := json.Unmarshal(data, &state); err != nil {
 		return err
 	}
 	a.mu.Lock()
 	defer a.mu.Unlock()
 	a.requests = state.Requests
 	a.logs = state.Logs
 	a.stats = state.Stats
 	if a.stats.ByModel == nil {
 		a.stats.ByModel = map[string]int{}
 	}
 	a.reconcileTokenStatsLocked()
 	return nil
 }
 func (a *App) saveAppStateLocked() {
 	path, err := appStatePath()
 	if err != nil {
 		runtime.LogWarningf(a.ctx, "resolve app state path failed: %v", err)
 		return
 	}
 	if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
 		runtime.LogWarningf(a.ctx, "create app state dir failed: %v", err)
 		return
 	}
 	state := appStateFile{
 		Requests: a.requests,
 		Logs:     a.logs,
 		Stats:    a.stats,
 	}
 	data, err := json.MarshalIndent(state, "", "  ")
 	if err != nil {
 		runtime.LogWarningf(a.ctx, "marshal app state failed: %v", err)
 		return
 	}
 	if err := os.WriteFile(path, data, 0644); err != nil {
 		runtime.LogWarningf(a.ctx, "write app state failed: %v", err)
 	}
 }
 func appStatePath() (string, error) {
 	home, err := os.UserHomeDir()
 	if err != nil {
 		return "", err
 	}
 	return filepath.Join(home, ".config", "lingma-ipc-proxy", "app-state.json"), nil
 }
 func (a *App) accumulateTokenStatsLocked(record RequestRecord) {
 	a.stats.TotalRequests++
 	if record.StatusCode >= 200 && record.StatusCode < 300 {
 		a.stats.SuccessRequests++
 	}
 	a.stats.InputTokens += record.InputTokens
 	a.stats.OutputTokens += record.OutputTokens
 	a.stats.TotalTokens += record.TotalTokens
 	if a.stats.ByModel == nil {
 		a.stats.ByModel = map[string]int{}
 	}
 	model := strings.TrimSpace(record.Model)
 	if model == "" {
 		model = "-"
 	}
 	if record.TotalTokens > 0 {
 		a.stats.ByModel[model] += record.TotalTokens
 		if isUsageBearingRequest(record.Path) && model != "-" {
 			a.stats.LastModel = model
 		}
 	}
 	a.stats.LastUpdated = time.Now().Format(time.RFC3339)
 }
 func (a *App) reconcileTokenStatsLocked() {
 	if a.stats.ByModel == nil {
 		a.stats.ByModel = map[string]int{}
 	}
 	a.stats.LastModel = ""
 	for i := len(a.requests) - 1; i >= 0; i-- {
 		record := a.requests[i]
 		model := strings.TrimSpace(record.Model)
 		if model == "" || record.TotalTokens <= 0 || !isUsageBearingRequest(record.Path) {
 			continue
 		}
 		a.stats.LastModel = model
 		break
 	}
 }
 func isUsageBearingRequest(path string) bool {
 	switch strings.TrimSpace(path) {
 	case "/v1/messages", "/v1/chat/completions", "/v1/completions":
 		return true
 	default:
 		return false
 	}
 }
 func cloneIntMap(src map[string]int) map[string]int {
 	out := make(map[string]int, len(src))
 	for k, v := range src {
 		out[k] = v
 	}
 	return out
 }
 func extractTokenUsage(respBody string) (int, int) {
 	if strings.TrimSpace(respBody) == "" {
 		return 0, 0
 	}
 	input, output := extractUsageFromJSON(respBody)
 	if input != 0 || output != 0 {
 		return input, output
 	}
 	for _, line := range strings.Split(respBody, "\n") {
 		line = strings.TrimSpace(line)
 		if !strings.HasPrefix(line, "data:") {
 			continue
 		}
 		payload := strings.TrimSpace(strings.TrimPrefix(line, "data:"))
 		if payload == "" || payload == "[DONE]" {
 			continue
 		}
 		in, out := extractUsageFromJSON(payload)
 		if in > 0 {
 			input = in
 		}
 		if out > 0 {
 			output = out
 		}
 	}
 	return input, output
 }
 func extractUsageFromJSON(raw string) (int, int) {
 	var payload any
 	if err := json.Unmarshal([]byte(raw), &payload); err != nil {
 		return 0, 0
 	}
 	usage, ok := findUsageMap(payload)
 	if !ok {
 		return 0, 0
 	}
 	input := intFromAny(usage["input_tokens"]) + intFromAny(usage["prompt_tokens"])
 	output := intFromAny(usage["output_tokens"]) + intFromAny(usage["completion_tokens"])
 	return input, output
 }
 func findUsageMap(value any) (map[string]any, bool) {
 	switch typed := value.(type) {
 	case map[string]any:
 		if usage, ok := typed["usage"].(map[string]any); ok {
 			return usage, true
 		}
 		for _, child := range typed {
 			if usage, ok := findUsageMap(child); ok {
 				return usage, true
 			}
 		}
 	case []any:
 		for _, child := range typed {
 			if usage, ok := findUsageMap(child); ok {
 				return usage, true
 			}
 		}
 	}
 	return nil, false
 }
 func intFromAny(value any) int {
 	switch typed := value.(type) {
 	case float64:
 		return int(typed)
 	case int:
 		return typed
 	case json.Number:
 		n, _ := typed.Int64()
 		return int(n)
 	default:
 		return 0
 	}
 }
 func defaultConfig() service.Config {
 	cfg := service.Config{
 		Host:                  "127.0.0.1",
@@ -625,7 +906,9 @@ func defaultConfig() service.Config {
 		Model:                 "kmodel",
 		ShellType:             defaultShellType(),
 		SessionMode:           service.SessionModeAuto,
-		Timeout:     120 * time.Second,
+		Timeout:               300 * time.Second,
 		RemoteFallbackEnabled: true,
 		RemoteFallbackModels:  service.DefaultRemoteFallbackModels(),
 	}
 	// Try to load config file from multiple locations
@@ -650,6 +933,8 @@ func defaultConfig() service.Config {
 					ShellType             string   `json:"shell_type"`
 					SessionMode           string   `json:"session_mode"`
 					TimeoutSeconds        int      `json:"timeout"`
 					RemoteFallbackEnabled *bool    `json:"remote_fallback_enabled"`
 					RemoteFallbackModels  []string `json:"remote_fallback_models"`
 				}
 				if err := json.Unmarshal(data, &fileCfg); err == nil {
 					if fileCfg.Host != "" {
@@ -702,6 +987,12 @@ func defaultConfig() service.Config {
 					if fileCfg.TimeoutSeconds > 0 {
 						cfg.Timeout = time.Duration(fileCfg.TimeoutSeconds) * time.Second
 					}
 					if fileCfg.RemoteFallbackEnabled != nil {
 						cfg.RemoteFallbackEnabled = *fileCfg.RemoteFallbackEnabled
 					}
 					if len(fileCfg.RemoteFallbackModels) > 0 {
 						cfg.RemoteFallbackModels = cleanConfigStrings(fileCfg.RemoteFallbackModels)
 					}
 				}
 				break // loaded successfully
 			}
@@ -732,6 +1023,20 @@ func maskIdentifier(value string) string {
 	return string(runes[:4]) + "..." + string(runes[len(runes)-4:])
 }
 func cleanConfigStrings(values []string) []string {
 	out := make([]string, 0, len(values))
 	seen := map[string]bool{}
 	for _, value := range values {
 		item := strings.TrimSpace(value)
 		if item == "" || seen[item] {
 			continue
 		}
 		seen[item] = true
 		out = append(out, item)
 	}
 	return out
 }
 func configSearchPaths() []string {
 	var paths []string
 	// 1. Executable directory (for dev / portable mode)
--- a/desktop/frontend/src/App.vue
+++ b/desktop/frontend/src/App.vue
@@ -6,7 +6,7 @@ import Models from './views/Models.vue'
 import Requests from './views/Requests.vue'
 import Settings from './views/Settings.vue'
 import { EventsOff, EventsOn } from '../wailsjs/runtime'
-import { GetStatus, HideWindow, MinimizeWindow } from '../wailsjs/go/main/App.js'
+import { ClearLogs, GetLogs, GetStatus, HideWindow, MinimizeWindow } from '../wailsjs/go/main/App.js'
 import lingmaIcon from './assets/images/lingma-icon.png'
 const currentTab = ref('dashboard')
@@ -42,8 +42,13 @@ function showToast(message) {
  }, 2200)
 }
-function clearLocalLogs() {
+async function clearLocalLogs() {
  try {
    await ClearLogs()
    logs.value = []
  } catch (e) {
    logs.value = []
  }
 }
 function setStatus(nextStatus) {
@@ -158,14 +163,25 @@ onMounted(() => {
  systemThemeQuery?.addEventListener?.('change', applyTheme)
  applyTheme()
  refreshStatus()
  GetLogs().then((items) => {
    logs.value = Array.isArray(items) ? items : []
  }).catch(() => {})
  safeEventsOn('models:updated', (data) => {
    status.value.models = Array.isArray(data) ? data.length : status.value.models
    addLog('info', `模型列表已更新：${status.value.models} 个模型`)
  })
  safeEventsOn('log', (data) => {
    if (data.time && data.message !== undefined) {
      logs.value.unshift(data)
      if (logs.value.length > 500) logs.value = logs.value.slice(0, 500)
    } else {
      addLog(data.level || 'info', data.message || '')
    }
    refreshStatus()
  })
  safeEventsOn('logs:updated', (data) => {
    logs.value = Array.isArray(data) ? data : []
  })
  safeEventsOn('quit:confirm', (message) => {
    showToast(message || '再按一次退出快捷键将停止代理并退出应用')
  })
@@ -183,6 +199,7 @@ onUnmounted(() => {
  systemThemeQuery?.removeEventListener?.('change', applyTheme)
  safeEventsOff('models:updated')
  safeEventsOff('log')
  safeEventsOff('logs:updated')
  safeEventsOff('quit:confirm')
  safeEventsOff('status:updated')
  safeEventsOff('requests:updated')
@@ -222,7 +239,7 @@ onUnmounted(() => {
        <span class="status-dot" :class="{ running: status.running }"></span>
        <div>
          <strong>{{ status.running ? 'Proxy Running' : 'Proxy Stopped' }}</strong>
-          <small>v1.4.2</small>
+          <small>v1.4.3</small>
        </div>
      </div>
    </aside>
--- a/desktop/frontend/src/components/HelloWorld.vue
+++ b/desktop/frontend/src/components/HelloWorld.vue
@@ -1,71 +0,0 @@
 <script lang="ts" setup>
 import {reactive} from 'vue'
 import {Greet} from '../../wailsjs/go/main/App'
 const data = reactive({
  name: "",
  resultText: "Please enter your name below 👇",
 })
 function greet() {
  Greet(data.name).then(result => {
    data.resultText = result
  })
 }
 </script>
 <template>
  <main>
    <div id="result" class="result">{{ data.resultText }}</div>
    <div id="input" class="input-box">
      <input id="name" v-model="data.name" autocomplete="off" class="input" type="text"/>
      <button class="btn" @click="greet">Greet</button>
    </div>
  </main>
 </template>
 <style scoped>
 .result {
  height: 20px;
  line-height: 20px;
  margin: 1.5rem auto;
 }
 .input-box .btn {
  width: 60px;
  height: 30px;
  line-height: 30px;
  border-radius: 3px;
  border: none;
  margin: 0 0 0 20px;
  padding: 0 8px;
  cursor: pointer;
 }
 .input-box .btn:hover {
  background-image: linear-gradient(to top, #cfd9df 0%, #e2ebf0 100%);
  color: #333333;
 }
 .input-box .input {
  border: none;
  border-radius: 3px;
  outline: none;
  height: 30px;
  line-height: 30px;
  padding: 0 10px;
  background-color: rgba(240, 240, 240, 1);
  -webkit-font-smoothing: antialiased;
 }
 .input-box .input:hover {
  border: none;
  background-color: rgba(255, 255, 255, 1);
 }
 .input-box .input:focus {
  border: none;
  background-color: rgba(255, 255, 255, 1);
 }
 </style>
--- a/desktop/frontend/src/style.css
+++ b/desktop/frontend/src/style.css
@@ -1,5 +1,12 @@
 :root {
-  font-family: Inter, ui-sans-serif, -apple-system, BlinkMacSystemFont, "SF Pro Text", "Segoe UI", sans-serif;
+  font-family:
    Inter,
    ui-sans-serif,
    -apple-system,
    BlinkMacSystemFont,
    'SF Pro Text',
    'Segoe UI',
    sans-serif;
  color: #172033;
  background: #eef2f6;
  font-synthesis: none;
@@ -26,7 +33,7 @@
  --radius: 8px;
 }
-:root[data-theme="dark"] {
+:root[data-theme='dark'] {
  color: #edf3ff;
  background: #111827;
  --bg: #111827;
@@ -68,7 +75,7 @@ body {
  background: var(--bg);
 }
-:root[data-theme="dark"] body {
+:root[data-theme='dark'] body {
  background: var(--bg);
 }
@@ -106,7 +113,7 @@ button {
  box-shadow: none;
 }
-:root[data-theme="dark"] .app-shell {
+:root[data-theme='dark'] .app-shell {
  border-color: rgba(148, 163, 184, 0.22);
  background: rgba(16, 24, 36, 0.78);
 }
@@ -123,7 +130,7 @@ button {
  box-shadow: inset -1px 0 0 rgba(125, 139, 158, 0.16);
 }
-:root[data-theme="dark"] .sidebar {
+:root[data-theme='dark'] .sidebar {
  border-right-color: rgba(148, 163, 184, 0.14);
  background: linear-gradient(180deg, rgba(28, 39, 56, 0.7), rgba(18, 27, 40, 0.66));
  box-shadow: inset -1px 0 0 rgba(148, 163, 184, 0.12);
@@ -145,13 +152,13 @@ button {
  background: rgba(255, 255, 255, 0.58);
 }
-:root[data-theme="dark"] .brand:hover,
+:root[data-theme='dark'] .brand:hover,
-:root[data-theme="dark"] .nav-item:hover,
+:root[data-theme='dark'] .nav-item:hover,
-:root[data-theme="dark"] .sidebar-status {
+:root[data-theme='dark'] .sidebar-status {
  background: rgba(255, 255, 255, 0.08);
 }
-:root[data-theme="dark"] .nav-item {
+:root[data-theme='dark'] .nav-item {
  color: #aebbd0;
 }
@@ -224,7 +231,7 @@ button {
  box-shadow: inset 0 0 0 1px rgba(37, 99, 235, 0.12);
 }
-:root[data-theme="dark"] .nav-item.active {
+:root[data-theme='dark'] .nav-item.active {
  color: #d8e6ff;
  background: rgba(67, 111, 190, 0.24);
  box-shadow: inset 0 0 0 1px rgba(105, 161, 255, 0.18);
@@ -285,13 +292,13 @@ button {
  min-height: 46px;
  padding: 0 16px;
  border-bottom: 1px solid rgba(112, 128, 148, 0.18);
-  background: rgba(255, 255, 255, 0.58);
+  background: #f6f9fd;
-  backdrop-filter: blur(20px) saturate(1.08);
+  backdrop-filter: none;
 }
-:root[data-theme="dark"] .topbar {
+:root[data-theme='dark'] .topbar {
  border-bottom-color: rgba(148, 163, 184, 0.14);
-  background: rgba(20, 30, 45, 0.66);
+  background: #162131;
 }
 .topbar-spacer {
@@ -372,10 +379,10 @@ button {
  backdrop-filter: blur(18px) saturate(1.12);
 }
-:root[data-theme="dark"] .glass-panel,
+:root[data-theme='dark'] .glass-panel,
-:root[data-theme="dark"] .metric,
+:root[data-theme='dark'] .metric,
-:root[data-theme="dark"] .table-panel,
+:root[data-theme='dark'] .table-panel,
-:root[data-theme="dark"] .config-panel {
+:root[data-theme='dark'] .config-panel {
  border-color: rgba(148, 163, 184, 0.14);
  background: var(--surface);
 }
@@ -583,6 +590,10 @@ button {
  gap: 18px;
 }
 .settings-grid {
  align-items: start;
 }
 .grid-3 {
  display: grid;
  grid-template-columns: repeat(3, minmax(0, 1fr));
@@ -591,10 +602,11 @@ button {
 .dashboard-grid {
  display: grid;
  align-items: stretch;
  grid-template-columns: minmax(0, 1fr) minmax(0, 0.95fr) minmax(300px, 0.95fr);
  grid-template-areas:
-    "health models config"
+    'health models config'
-    "requests requests config";
+    'requests requests usage';
  gap: 12px;
 }
@@ -604,16 +616,109 @@ button {
 .area-models {
  grid-area: models;
  min-height: 0;
 }
 .area-config {
  grid-area: config;
 }
 .compact-header {
  margin-bottom: 10px;
 }
 .compact-header p {
  margin-top: 4px;
 }
 .area-usage {
  grid-area: usage;
 }
 .area-requests {
  grid-area: requests;
 }
 .usage-grid {
  display: grid;
  grid-template-columns: repeat(2, minmax(0, 1fr));
  gap: 10px;
 }
 .usage-grid div {
  min-width: 0;
  padding: 12px;
  border: 1px solid var(--line);
  border-radius: 8px;
  background: var(--surface-soft);
 }
 .usage-grid label {
  display: block;
  margin-bottom: 6px;
  color: var(--muted);
  font-size: 11px;
  font-weight: 680;
 }
 .usage-grid strong {
  display: block;
  overflow: hidden;
  color: var(--text);
  font-size: 20px;
  line-height: 1.15;
  text-overflow: ellipsis;
 }
 .usage-foot {
  display: flex;
  flex-wrap: wrap;
  gap: 8px 14px;
  margin-top: 12px;
  color: var(--muted);
  font-size: 12px;
 }
 .config-summary {
  display: grid;
  grid-template-columns: repeat(2, minmax(0, 1fr));
  gap: 10px;
 }
 .config-summary-item {
  min-width: 0;
  padding: 12px;
  border: 1px solid var(--line);
  border-radius: 8px;
  background: var(--surface-soft);
 }
 .config-summary-item label {
  display: block;
  margin-bottom: 6px;
  color: var(--muted);
  font-size: 11px;
  font-weight: 680;
 }
 .config-summary-item strong {
  display: block;
  overflow: hidden;
  color: var(--text);
  font-size: 13px;
  line-height: 1.3;
  text-overflow: ellipsis;
  white-space: nowrap;
 }
 .config-summary-item.span-2 {
  grid-column: 1 / -1;
 }
 .compact-link {
  margin-top: 10px;
 }
 .activity-chart {
  display: grid;
  grid-template-columns: repeat(36, minmax(3px, 1fr));
@@ -637,13 +742,13 @@ button {
  white-space: nowrap;
 }
-:root[data-theme="dark"] .activity-chart,
+:root[data-theme='dark'] .activity-chart,
-:root[data-theme="dark"] .data-table th,
+:root[data-theme='dark'] .data-table th,
-:root[data-theme="dark"] .field input,
+:root[data-theme='dark'] .field input,
-:root[data-theme="dark"] .field textarea,
+:root[data-theme='dark'] .field textarea,
-:root[data-theme="dark"] .search-input,
+:root[data-theme='dark'] .search-input,
-:root[data-theme="dark"] .detail-panel pre,
+:root[data-theme='dark'] .detail-panel pre,
-:root[data-theme="dark"] .code-block {
+:root[data-theme='dark'] .code-block {
  color: var(--text);
  border-color: var(--line);
  background: rgba(15, 23, 42, 0.74);
@@ -719,8 +824,8 @@ button {
  box-shadow: inset 0 0 0 1px rgba(37, 99, 235, 0.12);
 }
-:root[data-theme="dark"] .model-choice:hover,
+:root[data-theme='dark'] .model-choice:hover,
-:root[data-theme="dark"] .model-choice:focus-visible {
+:root[data-theme='dark'] .model-choice:focus-visible {
  color: #f3f7ff;
  border-color: rgba(105, 161, 255, 0.38);
  background: rgba(72, 118, 214, 0.34);
@@ -728,7 +833,48 @@ button {
 .models-list .model-row,
 .model-list-row {
-  grid-template-columns: 22px minmax(220px, 1fr) auto;
+  grid-template-columns: 22px minmax(220px, 1fr) minmax(260px, auto);
 }
 .model-specs {
  display: flex;
  flex-wrap: wrap;
  justify-content: flex-end;
  gap: 6px;
 }
 .spec-chip {
  display: inline-flex;
  min-height: 22px;
  align-items: center;
  padding: 0 8px;
  border: 1px solid var(--line);
  border-radius: 7px;
  color: var(--muted);
  background: var(--surface-soft);
  font-size: 11px;
  font-weight: 680;
  white-space: nowrap;
 }
 .spec-chip.strong {
  color: #0d6a41;
  border-color: rgba(24, 160, 88, 0.18);
  background: var(--green-soft);
 }
 .spec-chip.muted-chip {
  color: #8a5a08;
  border-color: rgba(217, 119, 6, 0.16);
  background: var(--warn-soft);
 }
 :root[data-theme='dark'] .spec-chip.strong {
  color: #7ee0aa;
 }
 :root[data-theme='dark'] .spec-chip.muted-chip {
  color: #ffd27a;
 }
 .model-brand-icon {
@@ -744,6 +890,8 @@ button {
  display: flex;
  min-height: 0;
  flex-direction: column;
  overflow: hidden;
  height: 295px;
 }
 .model-card-list,
@@ -754,7 +902,14 @@ button {
 }
 .model-card-list {
-  max-height: 248px;
+  flex: 1 1 auto;
  max-height: none;
  scrollbar-width: none;
 }
 .model-card-list::-webkit-scrollbar {
  width: 0;
  height: 0;
 }
 .model-page-list {
@@ -870,6 +1025,66 @@ button {
  border-bottom: 1px solid var(--line);
 }
 .toolbar-header {
  margin: 0;
  display: flex;
  align-items: baseline;
  gap: 8px;
 }
 .toolbar-count {
  font-size: 12px;
  font-weight: normal;
  white-space: nowrap;
 }
 .toolbar-search-wrap {
  display: flex;
  align-items: center;
  gap: 12px;
  flex: 1;
 }
 .toolbar-search-input {
  max-width: 300px;
  width: 100%;
 }
 .btn-sm-outline {
  padding: 4px 10px;
  font-size: 12px;
  background: transparent;
  border: 1px solid var(--line);
  border-radius: 6px;
  cursor: pointer;
  color: var(--text);
  display: inline-flex;
  align-items: center;
  justify-content: center;
  transition: all 0.16s ease;
 }
 .btn-sm-outline:hover {
  background: rgba(0, 0, 0, 0.05);
 }
 .btn-sm-outline:disabled {
  opacity: 0.6;
  cursor: not-allowed;
 }
 .btn-sm-outline i {
  margin-left: 2px;
 }
 :root[data-theme='dark'] .btn-sm-outline {
  color: #dce8fb;
 }
 :root[data-theme='dark'] .btn-sm-outline:hover {
  background: rgba(255, 255, 255, 0.1);
 }
 .table-scroll {
  flex: 0 0 auto;
  max-height: none;
@@ -893,7 +1108,7 @@ button {
 .area-requests .table-scroll {
  min-height: 0;
-  max-height: 260px;
+  max-height: 211px;
  overflow: auto;
 }
@@ -932,10 +1147,12 @@ button {
 }
 .data-table tbody tr {
-  height: var(--request-row-height, 64px);
+  height: var(--request-row-height, 42px);
  cursor: pointer;
  background: rgba(255, 255, 255, 0.34);
-  transition: background-color 140ms ease, box-shadow 140ms ease;
+  transition:
    background-color 140ms ease,
    box-shadow 140ms ease;
 }
 .data-table tbody tr:hover {
@@ -947,23 +1164,23 @@ button {
  box-shadow: inset 3px 0 0 var(--blue);
 }
-:root[data-theme="dark"] .data-table {
+:root[data-theme='dark'] .data-table {
  background: rgba(15, 23, 42, 0.8);
 }
-:root[data-theme="dark"] .data-table th {
+:root[data-theme='dark'] .data-table th {
  background: rgba(15, 23, 42, 0.96);
 }
-:root[data-theme="dark"] .data-table tbody tr {
+:root[data-theme='dark'] .data-table tbody tr {
  background: rgba(20, 31, 48, 0.7);
 }
-:root[data-theme="dark"] .data-table tbody tr:hover {
+:root[data-theme='dark'] .data-table tbody tr:hover {
  background: rgba(45, 65, 96, 0.9);
 }
-:root[data-theme="dark"] .data-table tbody tr.selected {
+:root[data-theme='dark'] .data-table tbody tr.selected {
  background: rgba(38, 65, 112, 0.96);
  box-shadow: inset 3px 0 0 #67a1ff;
 }
@@ -1001,8 +1218,7 @@ button {
  background: var(--red-soft);
 }
-.link-row,
+.link-row {
 .table-footer button {
  display: flex;
  align-items: center;
  justify-content: space-between;
@@ -1014,24 +1230,10 @@ button {
  cursor: pointer;
 }
-:root[data-theme="dark"] .link-row,
+:root[data-theme='dark'] .link-row {
 :root[data-theme="dark"] .table-footer button {
  color: #dce8fb;
 }
 .table-footer {
  display: flex;
  align-items: center;
  justify-content: space-between;
  padding: 10px 14px;
  color: var(--muted);
  font-size: 12px;
 }
 .table-footer button {
  width: auto;
  gap: 8px;
 }
 .method-chip {
  color: #334155;
@@ -1047,7 +1249,10 @@ button {
  min-height: 32px;
  border-radius: 8px;
  cursor: pointer;
-  transition: transform 0.16s ease, background 0.16s ease, box-shadow 0.16s ease;
+  transition:
    transform 0.16s ease,
    background 0.16s ease,
    box-shadow 0.16s ease;
 }
 .primary-button {
@@ -1132,12 +1337,92 @@ button:disabled {
  gap: 6px;
 }
 .settings-fieldset {
  min-width: 0;
  padding: 0;
  margin: 0;
  border: 0;
 }
 .settings-fieldset:disabled {
  opacity: 0.56;
 }
 .compact-hint {
  margin-bottom: 14px;
 }
 .compact-form-grid {
  row-gap: 14px;
 }
 .field label {
  color: var(--muted);
  font-size: 12px;
  font-weight: 680;
 }
 .switch-field {
  display: flex;
  align-items: center;
  justify-content: space-between;
  gap: 14px;
 }
 .switch-field p {
  margin: 4px 0 0;
  color: var(--muted);
  font-size: 12px;
  line-height: 1.45;
 }
 .switch {
  position: relative;
  flex: 0 0 auto;
  display: inline-flex;
  width: 44px;
  height: 26px;
 }
 .switch input {
  position: absolute;
  inset: 0;
  opacity: 0;
 }
 .switch span {
  position: absolute;
  inset: 0;
  border: 1px solid var(--line-strong);
  border-radius: 999px;
  background: rgba(148, 163, 184, 0.28);
  transition:
    background 0.16s ease,
    border-color 0.16s ease;
 }
 .switch span::after {
  content: '';
  position: absolute;
  top: 3px;
  left: 3px;
  width: 18px;
  height: 18px;
  border-radius: 999px;
  background: white;
  box-shadow: 0 2px 8px rgba(15, 23, 42, 0.2);
  transition: transform 0.16s ease;
 }
 .switch input:checked + span {
  border-color: rgba(37, 99, 235, 0.72);
  background: #2563eb;
 }
 .switch input:checked + span::after {
  transform: translateX(18px);
 }
 .field input,
 .field textarea,
 .search-input {
@@ -1151,6 +1436,14 @@ button:disabled {
  outline: none;
 }
 .field .switch input {
  width: auto;
  min-height: 0;
  padding: 0;
  border: 0;
  background: transparent;
 }
 .field textarea {
  min-height: 78px;
  padding-top: 9px;
@@ -1230,14 +1523,14 @@ button:disabled {
  background: var(--blue-soft);
 }
-:root[data-theme="dark"] .custom-select > button {
+:root[data-theme='dark'] .custom-select > button {
  color: var(--text);
  border-color: var(--line);
  background: rgba(15, 23, 42, 0.74);
 }
-:root[data-theme="dark"] .select-menu button:hover,
+:root[data-theme='dark'] .select-menu button:hover,
-:root[data-theme="dark"] .select-menu button.selected {
+:root[data-theme='dark'] .select-menu button.selected {
  color: #dce9ff;
  background: rgba(72, 118, 214, 0.32);
 }
@@ -1262,7 +1555,7 @@ button:disabled {
 .hint-box code {
  color: var(--text);
-  font-family: "SF Mono", ui-monospace, Menlo, Consolas, monospace;
+  font-family: 'SF Mono', ui-monospace, Menlo, Consolas, monospace;
  font-size: 12px;
 }
@@ -1276,7 +1569,7 @@ button:disabled {
  background: rgba(255, 255, 255, 0.54);
 }
-:root[data-theme="dark"] .detect-card {
+:root[data-theme='dark'] .detect-card {
  background: rgba(15, 23, 42, 0.52);
 }
@@ -1326,7 +1619,7 @@ button:disabled {
  margin: 0;
  color: var(--text);
  overflow-wrap: anywhere;
-  font-family: "SF Mono", ui-monospace, Menlo, Consolas, monospace;
+  font-family: 'SF Mono', ui-monospace, Menlo, Consolas, monospace;
  font-size: 12px;
  line-height: 1.45;
 }
@@ -1377,7 +1670,7 @@ button:disabled {
  user-select: text;
 }
-:root[data-theme="dark"] .detail-panel {
+:root[data-theme='dark'] .detail-panel {
  background: rgba(12, 18, 30, 0.96);
 }
@@ -1423,7 +1716,7 @@ button:disabled {
  -webkit-user-select: text;
  user-select: text;
  background: rgba(255, 255, 255, 0.82);
-  font-family: "SF Mono", ui-monospace, Menlo, Consolas, monospace;
+  font-family: 'SF Mono', ui-monospace, Menlo, Consolas, monospace;
  font-size: 12px;
  line-height: 1.55;
  overflow-wrap: anywhere;
@@ -1525,28 +1818,28 @@ button:disabled {
  border-color: rgba(44, 111, 231, 0.38);
 }
-:root[data-theme="dark"] .json-key {
+:root[data-theme='dark'] .json-key {
  color: #c4b5fd;
 }
-:root[data-theme="dark"] .json-string {
+:root[data-theme='dark'] .json-string {
  color: #86efac;
 }
-:root[data-theme="dark"] .json-number {
+:root[data-theme='dark'] .json-number {
  color: #93c5fd;
 }
-:root[data-theme="dark"] .json-boolean {
+:root[data-theme='dark'] .json-boolean {
  color: #fca5a5;
 }
-:root[data-theme="dark"] .json-null,
+:root[data-theme='dark'] .json-null,
-:root[data-theme="dark"] .json-punctuation {
+:root[data-theme='dark'] .json-punctuation {
  color: #9aa8bd;
 }
-:root[data-theme="dark"] .json-summary {
+:root[data-theme='dark'] .json-summary {
  color: #b7c3d6;
  border-color: rgba(148, 163, 184, 0.24);
  background: rgba(30, 41, 59, 0.78);
@@ -1563,19 +1856,17 @@ button:disabled {
  height: 0;
 }
-:root[data-theme="dark"] .detail-panel pre,
+:root[data-theme='dark'] .detail-panel pre,
-:root[data-theme="dark"] .code-block,
+:root[data-theme='dark'] .code-block,
-:root[data-theme="dark"] .json-viewer {
+:root[data-theme='dark'] .json-viewer {
  color: var(--text);
  border-color: var(--line);
  background: rgba(17, 24, 39, 0.94);
 }
 .log-row {
  grid-template-columns: 82px 58px minmax(0, 1fr);
-  font-family: "SF Mono", ui-monospace, Menlo, Consolas, monospace;
+  font-family: 'SF Mono', ui-monospace, Menlo, Consolas, monospace;
  font-size: 12px;
  -webkit-user-select: text;
  user-select: text;
@@ -1636,9 +1927,10 @@ button:disabled {
  .dashboard-grid {
    grid-template-areas:
-      "health models"
+      'health models'
-      "config config"
+      'config config'
-      "requests requests";
+      'usage usage'
      'requests requests';
  }
  .status-strip {
@@ -1651,37 +1943,41 @@ button:disabled {
    border-left: 0;
  }
-.strip-actions {
+  .strip-actions {
    grid-column: span 2;
  }
  .config-summary {
    grid-template-columns: 1fr 1fr;
  }
 }
-:root[data-theme="dark"] .strip-actions,
+:root[data-theme='dark'] .strip-actions,
-:root[data-theme="dark"] .secondary-button,
+:root[data-theme='dark'] .secondary-button,
-:root[data-theme="dark"] .ghost-button,
+:root[data-theme='dark'] .ghost-button,
-:root[data-theme="dark"] .icon-button,
+:root[data-theme='dark'] .icon-button,
-:root[data-theme="dark"] .segmented,
+:root[data-theme='dark'] .segmented,
-:root[data-theme="dark"] .segmented button {
+:root[data-theme='dark'] .segmented button {
  color: var(--text);
  border-color: var(--line);
  background: rgba(30, 41, 59, 0.66);
 }
-:root[data-theme="dark"] .strip-actions {
+:root[data-theme='dark'] .strip-actions {
  background: rgba(15, 23, 42, 0.78);
 }
-:root[data-theme="dark"] .strip-actions button {
+:root[data-theme='dark'] .strip-actions button {
  color: #e6eefc;
 }
-:root[data-theme="dark"] .strip-actions button:disabled {
+:root[data-theme='dark'] .strip-actions button:disabled {
  color: #a9b7cc;
  background: rgba(15, 23, 42, 0.52);
  opacity: 0.86;
 }
-:root[data-theme="dark"] .segmented button.active {
+:root[data-theme='dark'] .segmented button.active {
  color: #f8fbff;
  background: rgba(72, 118, 214, 0.42);
 }
@@ -1759,10 +2055,11 @@ button:disabled {
  .dashboard-grid {
    grid-template-areas:
-      "health"
+      'health'
-      "models"
+      'models'
-      "config"
+      'config'
-      "requests";
+      'usage'
      'requests';
  }
  .status-strip {
@@ -1775,6 +2072,14 @@ button:disabled {
    width: 100%;
  }
  .config-summary {
    grid-template-columns: 1fr;
  }
  .config-summary-item.span-2 {
    grid-column: auto;
  }
  .span-2 {
    grid-column: auto;
  }
--- a/desktop/frontend/src/views/Dashboard.vue
+++ b/desktop/frontend/src/views/Dashboard.vue
@@ -1,23 +1,14 @@
 <script setup>
 import { computed, onMounted, onUnmounted, ref } from 'vue'
-import {
+import { GetModels, GetConfig, GetRequests, GetStatus, GetTokenStats, QuitApp, RefreshModels, StartProxy, StopProxy } from '../../wailsjs/go/main/App.js'
  GetModels,
  GetConfig,
  GetRequests,
  GetStatus,
  QuitApp,
  RefreshModels,
  StartProxy,
  StopProxy,
 } from '../../wailsjs/go/main/App.js'
 import { ClipboardSetText } from '../../wailsjs/runtime'
 import { modelIcon } from '../modelIcons'
 const props = defineProps({
  shellStatus: {
    type: Object,
-    default: () => ({ running: false, addr: '', models: 0 }),
+    default: () => ({ running: false, addr: '', models: 0 })
-  },
+  }
 })
 const emit = defineEmits(['log', 'status', 'notice', 'open-settings', 'open-requests', 'open-models'])
@@ -25,9 +16,11 @@ const emit = defineEmits(['log', 'status', 'notice', 'open-settings', 'open-requ
 const status = ref(props.shellStatus)
 const models = ref([])
 const requests = ref([])
 const tokenStats = ref({ totalRequests: 0, successRequests: 0, inputTokens: 0, outputTokens: 0, totalTokens: 0 })
 const health = ref(null)
 const config = ref({})
-const loading = ref(false)
+const proxyLoading = ref(false)
 const modelsLoading = ref(false)
 const testing = ref(false)
 const now = ref(Date.now())
 let interval = null
@@ -63,7 +56,7 @@ const healthStats = computed(() => {
    avg,
    p50: percentile(sorted, 0.5),
    p95: percentile(sorted, 0.95),
-    max: sorted[sorted.length - 1],
+    max: Math.round(sorted[sorted.length - 1])
  }
 })
 const chartBars = computed(() => {
@@ -78,17 +71,22 @@ const displayRequests = computed(() => {
 })
 const displayModels = computed(() => {
  if (models.value.length > 0) {
-    return models.value.slice(0, 5).map((model) => ({ ...model, online: true }))
+    return models.value.map((model) => ({ ...model, online: true }))
  }
  return []
 })
 const successRate = computed(() => {
  const total = Number(tokenStats.value.totalRequests || 0)
  if (!total) return '0%'
  return `${Math.round((Number(tokenStats.value.successRequests || 0) / total) * 100)}%`
 })
 function parseDurationMs(duration) {
  const text = String(duration || '').trim()
  if (!text) return 0
-  if (text.endsWith('ms')) return Number.parseFloat(text)
+  if (text.endsWith('ms')) return Math.round(Number.parseFloat(text))
-  if (text.endsWith('s')) return Number.parseFloat(text) * 1000
+  if (text.endsWith('s')) return Math.round(Number.parseFloat(text) * 1000)
-  return Number.parseFloat(text) || 0
+  return Math.round(Number.parseFloat(text) || 0)
 }
 function percentile(sorted, p) {
@@ -97,12 +95,20 @@ function percentile(sorted, p) {
  return Math.round(sorted[index])
 }
 function formatNumber(value) {
  const n = Number(value || 0)
  if (n >= 1000000) return `${(n / 1000000).toFixed(1)}M`
  if (n >= 10000) return `${Math.round(n / 1000)}K`
  return n.toLocaleString('zh-CN')
 }
 async function refresh() {
  try {
    const nextStatus = await GetStatus()
    status.value = nextStatus
    emit('status', nextStatus)
    requests.value = await GetRequests()
    tokenStats.value = await GetTokenStats()
    config.value = await GetConfig()
    if (nextStatus.running) {
      models.value = await GetModels()
@@ -113,7 +119,7 @@ async function refresh() {
 }
 async function refreshModels() {
-  loading.value = true
+  modelsLoading.value = true
  try {
    models.value = await RefreshModels()
    emit('log', 'info', `模型探测完成：${models.value.length} 个`)
@@ -121,7 +127,7 @@ async function refreshModels() {
  } catch (e) {
    emit('log', 'error', '模型探测失败：' + (e.message || String(e)) + '。请确认 Lingma 插件已启动并登录；自动探测失败时可到设置页手动填写 WebSocket：ws://127.0.0.1:36510/，或 Windows Named Pipe：\\\\.\\pipe\\lingma-xxxx。')
  } finally {
-    loading.value = false
+    modelsLoading.value = false
  }
 }
@@ -141,7 +147,7 @@ async function copyModelName(model) {
 }
 async function toggleProxy() {
-  loading.value = true
+  proxyLoading.value = true
  try {
    if (isRunning.value) {
      await StopProxy()
@@ -154,13 +160,13 @@ async function toggleProxy() {
  } catch (e) {
    emit('log', 'error', '代理切换失败：' + (e.message || String(e)))
  } finally {
-    loading.value = false
+    proxyLoading.value = false
  }
 }
 async function restartProxy() {
  if (!isRunning.value) return
-  loading.value = true
+  proxyLoading.value = true
  try {
    await StopProxy()
    await StartProxy()
@@ -169,7 +175,7 @@ async function restartProxy() {
  } catch (e) {
    emit('log', 'error', '代理重启失败：' + (e.message || String(e)))
  } finally {
-    loading.value = false
+    proxyLoading.value = false
  }
 }
@@ -241,9 +247,9 @@ onUnmounted(() => {
        <strong>{{ sessionLabel }}</strong>
      </div>
      <div class="strip-actions">
-        <button :class="{ active: !isRunning }" type="button" :disabled="loading || isRunning" @click="toggleProxy">启动</button>
+        <button :class="{ active: !isRunning }" type="button" :disabled="proxyLoading || isRunning" @click="toggleProxy">启动</button>
-        <button :class="{ active: isRunning }" type="button" :disabled="loading || !isRunning" @click="toggleProxy">停止</button>
+        <button :class="{ active: isRunning }" type="button" :disabled="proxyLoading || !isRunning" @click="toggleProxy">停止</button>
-        <button type="button" :disabled="loading || !isRunning" @click="restartProxy">重启</button>
+        <button type="button" :disabled="proxyLoading || !isRunning" @click="restartProxy">重启</button>
      </div>
    </section>
@@ -257,12 +263,7 @@ onUnmounted(() => {
          <span class="status-chip ok">Healthy</span>
        </div>
        <div class="activity-chart" aria-label="延迟趋势图">
-          <span
+          <span v-for="(height, index) in chartBars" :key="index" class="bar" :style="{ height: `${height}%`, opacity: 0.55 + index / 45 }"></span>
            v-for="(height, index) in chartBars"
            :key="index"
            class="bar"
            :style="{ height: `${height}%`, opacity: 0.55 + index / 45 }"
          ></span>
          <span v-if="chartBars.length === 0" class="chart-empty">暂无请求</span>
        </div>
        <div class="health-stats">
@@ -278,22 +279,13 @@ onUnmounted(() => {
          <div>
            <h2>Models</h2>
          </div>
-          <button class="secondary-button" type="button" :disabled="loading || !isRunning" @click="refreshModels">探测模型</button>
+          <button class="btn-sm-outline" type="button" :disabled="modelsLoading || !isRunning" @click="refreshModels">
            {{ modelsLoading ? '探测中...' : '探测模型' }}
          </button>
        </div>
        <div class="model-card-list hidden-scrollbar">
-          <button
+          <button v-for="model in displayModels" :key="model.id" class="model-row model-choice" type="button" :title="`复制模型 ID：${model.id}`" @click="copyModelName(model)">
-            v-for="model in displayModels"
+            <span class="model-brand-icon" :style="{ '--model-icon': `url(${modelIcon(model).src})`, '--model-icon-color': modelIcon(model).color }" aria-hidden="true"></span>
            :key="model.id"
            class="model-row model-choice"
            type="button"
            :title="`复制模型 ID：${model.id}`"
            @click="copyModelName(model)"
          >
            <span
              class="model-brand-icon"
              :style="{ '--model-icon': `url(${modelIcon(model).src})`, '--model-icon-color': modelIcon(model).color }"
              aria-hidden="true"
            ></span>
            <div>
              <div class="model-name">{{ model.name || model.id }}</div>
            </div>
@@ -301,75 +293,84 @@ onUnmounted(() => {
          </button>
        </div>
        <div v-if="displayModels.length === 0" class="empty-state compact">暂无模型，启动代理后点击探测模型。</div>
        <button class="link-row" type="button" @click="emit('open-models')">查看全部模型 <i class="bi bi-chevron-right"></i></button>
      </div>
      <div class="glass-panel area-config">
-        <div class="panel-header">
+        <div class="panel-header compact-header">
          <div>
            <h2>Configuration</h2>
            <p>首页只展示关键配置，完整项在设置页查看。</p>
          </div>
          <span class="status-chip ok">Valid</span>
        </div>
-        <div class="setting-row">
+        <div class="config-summary">
          <div class="config-summary-item">
            <label>监听地址</label>
            <strong>{{ config.Host || '127.0.0.1' }}:{{ config.Port || 8095 }}</strong>
          </div>
          <div class="config-summary-item">
            <label>传输方式</label>
            <strong>{{ transportLabel }}</strong>
          </div>
          <div class="config-summary-item">
            <label>会话策略</label>
            <strong>{{ config.SessionMode || 'Reuse' }}</strong>
          </div>
          <div class="config-summary-item">
            <label>超时</label>
            <strong>{{ config.Timeout || 120 }} 秒</strong>
          </div>
          <div class="config-summary-item span-2">
            <label>工作目录</label>
            <strong :title="config.Cwd || '未配置'">{{ config.Cwd || '未配置' }}</strong>
          </div>
          <div v-if="config.CurrentFilePath" class="config-summary-item span-2">
            <label>当前文件</label>
            <strong :title="config.CurrentFilePath">{{ config.CurrentFilePath }}</strong>
          </div>
        </div>
      </div>
      <div class="glass-panel area-usage">
        <div class="panel-header">
          <div>
-            <div class="cell-main">Host</div>
+            <h2>Token 统计</h2>
-            <div class="cell-sub">{{ config.Host || '127.0.0.1' }}</div>
+            <p>按代理返回的 usage 累计，流式缺失字段时只统计可获得部分。</p>
          </div>
-          <span class="status-chip ok"><i class="bi bi-check"></i></span>
+          <span class="status-chip ok">Persisted</span>
        </div>
-        <div class="setting-row">
+        <div class="usage-grid">
          <div>
-            <div class="cell-main">Port</div>
+            <label>总 Token</label>
-            <div class="cell-sub">{{ config.Port || 8095 }}</div>
+            <strong>{{ formatNumber(tokenStats.totalTokens) }}</strong>
          </div>
          <span class="status-chip ok"><i class="bi bi-check"></i></span>
        </div>
        <div class="setting-row">
          <div>
-            <div class="cell-main">Transport</div>
+            <label>输入</label>
-            <div class="cell-sub">{{ transportLabel }}</div>
+            <strong>{{ formatNumber(tokenStats.inputTokens) }}</strong>
          </div>
          <span class="status-chip ok"><i class="bi bi-check"></i></span>
        </div>
        <div class="setting-row">
          <div>
-            <div class="cell-main">Session</div>
+            <label>输出</label>
-            <div class="cell-sub">{{ config.SessionMode || 'Reuse' }}</div>
+            <strong>{{ formatNumber(tokenStats.outputTokens) }}</strong>
          </div>
          <span class="status-chip ok"><i class="bi bi-check"></i></span>
        </div>
        <div class="setting-row">
          <div>
-            <div class="cell-main">Timeout (s)</div>
+            <label>成功率</label>
-            <div class="cell-sub">{{ config.Timeout || 120 }} 秒</div>
+            <strong>{{ successRate }}</strong>
          </div>
          <span class="status-chip ok"><i class="bi bi-check"></i></span>
        </div>
-        <div class="setting-row">
+        <div class="usage-foot">
-          <div>
+          <span>累计请求 {{ formatNumber(tokenStats.totalRequests) }} 次</span>
-            <div class="cell-main">CWD</div>
+          <span v-if="tokenStats.lastModel">最近模型 {{ tokenStats.lastModel }}</span>
            <div class="cell-sub">{{ config.Cwd || '未配置' }}</div>
          </div>
          <span class="status-chip ok"><i class="bi bi-check"></i></span>
        </div>
        <div class="setting-row">
          <div>
            <div class="cell-main">Current File</div>
            <div class="cell-sub">{{ config.CurrentFilePath || '未配置' }}</div>
          </div>
          <span class="status-chip ok"><i class="bi bi-check"></i></span>
        </div>
      </div>
      <div class="table-panel area-requests">
        <div class="table-toolbar">
-        <div>
+          <div class="panel-header toolbar-header">
          <div class="panel-header" style="margin: 0">
            <h2>Recent Requests</h2>
          </div>
-        </div>
+          <button type="button" class="btn-sm-outline" @click="emit('open-requests')">
-        <button class="secondary-button" type="button" @click="emit('open-requests')">查看全部</button>
+            查看全部请求 <i class="bi bi-chevron-right"></i>
          </button>
        </div>
        <div v-if="displayRequests.length > 0" class="table-scroll hidden-scrollbar">
          <table class="data-table">
@@ -398,10 +399,6 @@ onUnmounted(() => {
          </table>
        </div>
        <div v-else class="empty-state compact">暂无请求记录。连接客户端后会显示真实调用。</div>
      <div class="table-footer">
        <span>Showing {{ displayRequests.length }} of {{ requests.length }}</span>
        <button type="button" @click="emit('open-requests')">查看全部请求 <i class="bi bi-chevron-right"></i></button>
      </div>
      </div>
    </section>
  </div>
--- a/desktop/frontend/src/views/Models.vue
+++ b/desktop/frontend/src/views/Models.vue
@@ -17,20 +17,81 @@ const filtered = computed(() => {
  return models.value.filter((model) => `${model.id} ${model.name}`.toLowerCase().includes(q))
 })
-function modelTag(model) {
+function modelSpec(model) {
  const text = `${model.id} ${model.name}`.toLowerCase()
-  if (text.includes('coder')) return '工具优先'
+  if (text.includes('kmodel') || text.includes('kimi')) {
-  if (text.includes('thinking')) return '推理'
+    return {
-  if (text.includes('kimi')) return '长文本'
+      context: '256K',
-  if (text.includes('minimax')) return '通用'
+      capability: '文本/图像/视频/工具',
-  return 'Lingma'
+      source: 'Kimi 官方',
    }
  }
  if (text.includes('mmodel') || text.includes('minimax')) {
    return {
      context: '200K',
      capability: 'Agent / Tool Use',
      source: 'MiniMax 官方',
    }
  }
  if (text.includes('coder')) {
    return {
      context: '1M',
      capability: '思考 / Function Calling / 结构化输出',
      source: '阿里云百炼 Qwen3-Coder',
    }
  }
  if (text.includes('thinking')) {
    return {
      context: '256K',
      capability: '思考 / Function Calling / 推理',
      source: '阿里云百炼 Qwen3',
    }
  }
  if (text.includes('qwen_max') || text.includes('qwen3-max')) {
    return {
      context: '256K',
      capability: '思考 / Function Calling / 内置工具',
      source: '阿里云百炼 Qwen3-Max',
    }
  }
  if (text.includes('qmodel') || text.includes('qwen3.6')) {
    return {
      context: '1M',
      capability: 'Function Calling / 内置工具 / 结构化输出',
      source: '阿里云百炼 Qwen3.6-Plus',
    }
  }
  if (text.includes('auto')) {
    return {
      context: '自动',
      capability: 'Lingma 自动路由',
      source: '账号返回',
    }
  }
  return {
    context: '未公开',
    capability: '通用',
    source: '账号返回',
  }
 }
 async function loadCachedModels() {
  loading.value = true
  try {
    status.value = await GetStatus()
    models.value = await GetModels()
  } catch (e) {
    emit('log', 'error', '模型缓存读取失败：' + (e.message || String(e)))
  } finally {
    loading.value = false
  }
 }
 async function refresh() {
  loading.value = true
  try {
    status.value = await GetStatus()
-    models.value = status.value.running ? await RefreshModels() : await GetModels()
+    models.value = await RefreshModels()
    emit('log', 'info', `模型列表刷新完成：${models.value.length} 个`)
  } catch (e) {
    emit('log', 'error', '模型列表刷新失败：' + (e.message || String(e)) + '。自动探测失败时请到设置页手动填写 WebSocket：ws://127.0.0.1:36510/，或 Windows Named Pipe：\\\\.\\pipe\\lingma-xxxx。')
@@ -54,7 +115,7 @@ async function copyModelName(model) {
  }
 }
-onMounted(refresh)
+onMounted(loadCachedModels)
 </script>
 <template>
@@ -111,7 +172,11 @@ onMounted(refresh)
            <div class="model-name">{{ model.name || model.id }}</div>
            <div class="model-meta">{{ model.id }}</div>
          </div>
-          <span class="status-chip" :class="modelTag(model) === '工具优先' ? 'ok' : 'warn'">{{ modelTag(model) }}</span>
+          <div class="model-specs">
            <span class="spec-chip strong">{{ modelSpec(model).context }}</span>
            <span class="spec-chip">{{ modelSpec(model).capability }}</span>
            <span class="spec-chip muted-chip">{{ modelSpec(model).source }}</span>
          </div>
        </button>
      </div>
      <div v-else class="empty-state">启动代理并刷新后会显示模型。</div>
--- a/desktop/frontend/src/views/Requests.vue
+++ b/desktop/frontend/src/views/Requests.vue
@@ -14,7 +14,7 @@ const activeStatus = ref('all')
 const filtered = computed(() => {
  const q = query.value.trim().toLowerCase()
  return requests.value.filter((request) => {
-    const matchesQuery = !q || `${request.method} ${request.path} ${request.statusCode}`.toLowerCase().includes(q)
+    const matchesQuery = !q || `${request.method} ${request.path} ${request.statusCode} ${request.model || ''}`.toLowerCase().includes(q)
    const code = Number(request.statusCode)
    const matchesStatus =
      activeStatus.value === 'all' ||
@@ -117,7 +117,10 @@ onUnmounted(() => {
    <section class="table-panel requests-panel">
      <div class="table-toolbar">
-        <input v-model="query" class="search-input" type="search" placeholder="搜索路径、方法或状态码" />
+        <div class="toolbar-search-wrap">
          <input v-model="query" class="search-input toolbar-search-input" type="search" placeholder="搜索路径、方法或状态码" />
          <span class="muted toolbar-count">Showing {{ filtered.length }} of {{ requests.length }}</span>
        </div>
        <div class="segmented">
          <button :class="{ active: activeStatus === 'all' }" type="button" @click="activeStatus = 'all'">全部</button>
          <button :class="{ active: activeStatus === 'ok' }" type="button" @click="activeStatus = 'ok'">成功</button>
@@ -133,6 +136,7 @@ onUnmounted(() => {
              <th>时间</th>
              <th>方法</th>
              <th>路径</th>
              <th>模型</th>
              <th>状态</th>
              <th>耗时</th>
            </tr>
@@ -150,6 +154,7 @@ onUnmounted(() => {
                <div class="cell-main">{{ request.path }}</div>
                <div class="cell-sub">{{ request.reqBody ? '包含请求体' : '无请求体' }}</div>
              </td>
              <td>{{ request.model || '-' }}</td>
              <td><span class="status-chip" :class="statusClass(request.statusCode)">{{ request.statusCode }}</span></td>
              <td>{{ request.duration }}</td>
            </tr>
--- a/desktop/frontend/src/views/Settings.vue
+++ b/desktop/frontend/src/views/Settings.vue
@@ -8,6 +8,8 @@ const config = ref({})
 const detection = ref(null)
 const saving = ref(false)
 const openSelect = ref('')
 const fallbackModelsText = ref('')
 const isIPCBackend = computed(() => (config.value.Backend || 'ipc') === 'ipc')
 const selectOptions = {
  Backend: [
@@ -54,6 +56,9 @@ function chooseOption(field, value) {
 onMounted(async () => {
  try {
    config.value = await GetConfig()
    fallbackModelsText.value = Array.isArray(config.value.RemoteFallbackModels)
      ? config.value.RemoteFallbackModels.join('\n')
      : ''
    await refreshDetection()
  } catch (e) {
    emit('log', 'error', '配置加载失败：' + (e.message || String(e)))
@@ -71,6 +76,10 @@ async function refreshDetection() {
 async function save() {
  saving.value = true
  try {
    config.value.RemoteFallbackModels = fallbackModelsText.value
      .split(/\n|,/)
      .map((item) => item.trim())
      .filter(Boolean)
    await UpdateConfig(config.value)
    await refreshDetection()
    emit('log', 'info', '配置已保存，代理已按需重启')
@@ -95,7 +104,7 @@ async function save() {
      </button>
    </div>
-    <section class="grid-2">
+    <section class="grid-2 settings-grid">
      <div class="glass-panel">
        <div class="panel-header">
          <div>
@@ -156,6 +165,23 @@ async function save() {
            <label>超时秒数</label>
            <input v-model.number="config.Timeout" type="number" min="1" />
          </div>
          <div class="field span-2 switch-field">
            <div>
              <label>远端超时兜底</label>
              <p>远端 API 超时、限流或 5xx 且尚未流式输出时，自动切换到下一个可用模型。</p>
            </div>
            <label class="switch">
              <input v-model="config.RemoteFallbackEnabled" type="checkbox" />
              <span></span>
            </label>
          </div>
          <div class="field span-2">
            <label>兜底模型顺序</label>
            <textarea
              v-model="fallbackModelsText"
              placeholder="kmodel&#10;mmodel&#10;dashscope_qwen3_coder&#10;dashscope_qmodel"
            ></textarea>
          </div>
          <div class="field span-2">
            <label>WebSocket 地址</label>
            <input v-model="config.WebSocketURL" type="text" placeholder="留空自动探测 Lingma WebSocket" />
@@ -231,10 +257,16 @@ async function save() {
        <div class="panel-header">
          <div>
            <h2>会话与环境</h2>
-            <p>影响 Lingma 会话上下文和工具执行环境。</p>
+            <p>仅在 IPC 插件模式下生效，影响 Lingma 会话上下文和工具执行环境。</p>
          </div>
          <span class="status-chip" :class="isIPCBackend ? 'ok' : 'warn'">{{ isIPCBackend ? '仅 IPC 生效' : '远端模式忽略' }}</span>
        </div>
-        <div class="form-grid">
+        <div v-if="!isIPCBackend" class="hint-box compact-hint">
          <strong>当前为远端 API 模式</strong>
          <span>右侧这组参数不会参与远端请求，只在切换到 IPC 插件模式后生效。</span>
        </div>
        <fieldset class="settings-fieldset" :disabled="!isIPCBackend">
        <div class="form-grid compact-form-grid">
          <div class="field">
            <label>模式</label>
            <div class="custom-select" :class="{ open: openSelect === 'Mode' }">
@@ -301,9 +333,10 @@ async function save() {
          </div>
          <div class="field span-2">
            <label>工作目录</label>
-            <textarea v-model="config.Cwd" placeholder="Lingma 创建 session 时使用的 cwd"></textarea>
+            <input v-model="config.Cwd" type="text" placeholder="Lingma 创建 session 时使用的 cwd" />
          </div>
        </div>
        </fieldset>
      </div>
    </section>
  </div>
--- a/desktop/frontend/wailsjs/go/main/App.d.ts
+++ b/desktop/frontend/wailsjs/go/main/App.d.ts
@@ -11,12 +11,16 @@ export function GetConfig():Promise<service.Config>;
 export function GetDetectionInfo():Promise<main.DetectionInfo>;
 export function GetLogs():Promise<Array<main.AppLog>>;
 export function GetModels():Promise<Array<main.ModelInfo>>;
 export function GetRequests():Promise<Array<main.RequestRecord>>;
 export function GetStatus():Promise<main.ProxyStatus>;
 export function GetTokenStats():Promise<main.TokenStats>;
 export function HideWindow():Promise<void>;
 export function MinimizeWindow():Promise<void>;
--- a/desktop/frontend/wailsjs/go/main/App.js
+++ b/desktop/frontend/wailsjs/go/main/App.js
@@ -18,6 +18,10 @@ export function GetDetectionInfo() {
  return window['go']['main']['App']['GetDetectionInfo']();
 }
 export function GetLogs() {
  return window['go']['main']['App']['GetLogs']();
 }
 export function GetModels() {
  return window['go']['main']['App']['GetModels']();
 }
@@ -30,6 +34,10 @@ export function GetStatus() {
  return window['go']['main']['App']['GetStatus']();
 }
 export function GetTokenStats() {
  return window['go']['main']['App']['GetTokenStats']();
 }
 export function HideWindow() {
  return window['go']['main']['App']['HideWindow']();
 }
--- a/desktop/frontend/wailsjs/go/models.ts
+++ b/desktop/frontend/wailsjs/go/models.ts
@@ -1,5 +1,21 @@
 export namespace main {
 	export class AppLog {
 	    time: string;
 	    level: string;
 	    message: string;
 	    static createFrom(source: any = {}) {
 	        return new AppLog(source);
 	    }
 	    constructor(source: any = {}) {
 	        if ('string' === typeof source) source = JSON.parse(source);
 	        this.time = source["time"];
 	        this.level = source["level"];
 	        this.message = source["message"];
 	    }
 	}
 	export class DetectionInfo {
 	    listenUrl: string;
 	    backend: string;
@@ -86,6 +102,9 @@ export namespace main {
 	    statusCode: number;
 	    duration: string;
 	    size?: string;
 	    inputTokens?: number;
 	    outputTokens?: number;
 	    totalTokens?: number;
 	    reqBody?: string;
 	    respBody?: string;
@@ -102,10 +121,39 @@ export namespace main {
 	        this.statusCode = source["statusCode"];
 	        this.duration = source["duration"];
 	        this.size = source["size"];
 	        this.inputTokens = source["inputTokens"];
 	        this.outputTokens = source["outputTokens"];
 	        this.totalTokens = source["totalTokens"];
 	        this.reqBody = source["reqBody"];
 	        this.respBody = source["respBody"];
 	    }
 	}
 	export class TokenStats {
 	    totalRequests: number;
 	    successRequests: number;
 	    inputTokens: number;
 	    outputTokens: number;
 	    totalTokens: number;
 	    byModel?: Record<string, number>;
 	    lastModel?: string;
 	    lastUpdated?: string;
 	    static createFrom(source: any = {}) {
 	        return new TokenStats(source);
 	    }
 	    constructor(source: any = {}) {
 	        if ('string' === typeof source) source = JSON.parse(source);
 	        this.totalRequests = source["totalRequests"];
 	        this.successRequests = source["successRequests"];
 	        this.inputTokens = source["inputTokens"];
 	        this.outputTokens = source["outputTokens"];
 	        this.totalTokens = source["totalTokens"];
 	        this.byModel = source["byModel"];
 	        this.lastModel = source["lastModel"];
 	        this.lastUpdated = source["lastUpdated"];
 	    }
 	}
 }
@@ -128,6 +176,8 @@ export namespace service {
 	    ShellType: string;
 	    SessionMode: string;
 	    Timeout: number;
 	    RemoteFallbackEnabled: boolean;
 	    RemoteFallbackModels: string[];
 	    static createFrom(source: any = {}) {
 	        return new Config(source);
@@ -151,6 +201,8 @@ export namespace service {
 	        this.ShellType = source["ShellType"];
 	        this.SessionMode = source["SessionMode"];
 	        this.Timeout = source["Timeout"];
 	        this.RemoteFallbackEnabled = source["RemoteFallbackEnabled"];
 	        this.RemoteFallbackModels = source["RemoteFallbackModels"];
 	    }
 	}
--- a/desktop/main.go
+++ b/desktop/main.go
@@ -2,6 +2,7 @@ package main
 import (
 	"embed"
 	"os"
 	goruntime "runtime"
 	"github.com/wailsapp/wails/v2"
@@ -17,6 +18,7 @@ var assets embed.FS
 func main() {
 	app := NewApp()
 	enableInspector := os.Getenv("LINGMA_DESKTOP_DEBUG") == "1"
 	err := wails.Run(&options.App{
 		Title:             "Lingma IPC Proxy",
@@ -28,6 +30,10 @@ func main() {
 		AssetServer: &assetserver.Options{
 			Assets: assets,
 		},
 		EnableDefaultContextMenu: enableInspector,
 		Debug: options.Debug{
 			OpenInspectorOnStartup: enableInspector,
 		},
 		BackgroundColour: &options.RGBA{R: 15, G: 23, B: 42, A: 1},
 		Menu:             appMenu(app),
 		OnStartup:        app.startup,
--- a/desktop/wails.json
+++ b/desktop/wails.json
@@ -11,6 +11,6 @@
    "email": "lutc5@asiainfo.com"
  },
  "info": {
-    "productVersion": "1.4.2"
+    "productVersion": "1.4.3"
  }
 }
--- a/docs/architecture.md
+++ b/docs/architecture.md
@@ -1,424 +1,316 @@
-# lingma-ipc-proxy 架构文档
+# lingma-ipc-proxy Architecture
-本文档描述 lingma-ipc-proxy 的系统架构、工作原理和核心流程。
+This document describes the current architecture of `lingma-ipc-proxy`, including both backend modes:
 - `ipc`: bridge to the local Lingma IDE plugin transport
 - `remote`: call Lingma remote HTTP APIs directly with detected credentials
 ---
-## 1. 整体架构
+## 1. System Overview
-```
+```mermaid
-┌─────────────────────────────────────────────────────────────────────────┐
+flowchart LR
-│                              客户端层                                     │
+    A["Clients<br/>Claude Code / Hermes / Cline / Continue / OpenAI SDK / Anthropic SDK"]
-│  ┌──────────────┐  ┌──────────────┐  ┌──────────────┐  ┌──────────────┐ │
+    B["internal/httpapi<br/>OpenAI + Anthropic compatible routes"]
-│  │ Claude Code  │  │   OpenAI     │  │   Cline      │  │   Continue   │ │
+    C["internal/service<br/>request normalization / session policy / streaming / fallback"]
-│  │  (Anthropic) │  │    SDK       │  │  (OpenAI)    │  │  (OpenAI)    │ │
+    D["internal/toolemulation<br/>tool prompt injection + action block parsing"]
-│  └──────┬───────┘  └──────┬───────┘  └──────┬───────┘  └──────┬───────┘ │
+    E["internal/lingmaipc<br/>WebSocket / Named Pipe"]
-└─────────┼─────────────────┼─────────────────┼─────────────────┼─────────┘
+    F["internal/remote<br/>credential detection / model list / chat / SSE"]
-          │                 │                 │                 │
+    G["Lingma plugin local process"]
-          └─────────────────┴────────┬────────┴─────────────────┘
+    H["Lingma remote API"]
-                                     │ HTTP
+    I["Desktop app<br/>Wails GUI / logs / token stats / persisted state"]
-                                     ▼
+
-┌─────────────────────────────────────────────────────────────────────────┐
+    A --> B
-│                         lingma-ipc-proxy                                │
+    I --> B
-│  ┌─────────────────────────────────────────────────────────────────┐    │
+    B --> C
-│  │  internal/httpapi                                                │    │
+    C --> D
-│  │  ┌─────────────┐  ┌─────────────┐  ┌─────────────────────────┐ │    │
+    C --> E
-│  │  │ /v1/models  │  │/v1/chat/comp│  │    /v1/messages         │ │    │
+    C --> F
-│  │  │  (GET)      │  │  (POST)     │  │    (POST)               │ │    │
+    E --> G
-│  │  └──────┬──────┘  └──────┬──────┘  └───────────┬─────────────┘ │    │
+    F --> H
 │  │         └─────────────────┴──────────┬──────────┘               │    │
 │  │                                      │ normalizeRequest         │    │
 │  │                                      ▼                          │    │
 │  │  ┌─────────────────────────────────────────────────────────┐   │    │
 │  │  │              internal/service                            │   │    │
 │  │  │  ┌──────────┐  ┌──────────┐  ┌────────────────────────┐ │   │    │
 │  │  │  │ Session  │  │  Prompt  │  │    Stream/Event        │ │   │    │
 │  │  │  │ Manager  │  │ Builder  │  │    Handler             │ │   │    │
 │  │  │  └────┬─────┘  └────┬─────┘  └───────────┬────────────┘ │   │    │
 │  │  │       └─────────────┴──────────┬─────────┘              │   │    │
 │  │  │                              │ buildLingmaPrompt       │   │    │
 │  │  │                              ▼                          │   │    │
 │  │  │  ┌─────────────────────────────────────────────────┐   │   │    │
 │  │  │  │          internal/lingmaipc                      │   │   │    │
 │  │  │  │  ┌──────────────┐  ┌──────────────────────────┐ │   │   │    │
 │  │  │  │  │   WebSocket  │  │    Named Pipe (Win)      │ │   │   │    │
 │  │  │  │  │  Transport   │  │    Transport             │ │   │   │    │
 │  │  │  │  └──────┬───────┘  └───────────┬──────────────┘ │   │   │    │
 │  │  │  └─────────┼──────────────────────┼────────────────┘   │   │    │
 │  │  └────────────┼──────────────────────┼────────────────────┘   │    │
 │  │               │                      │                         │    │
 │  │  ┌────────────┼──────────────────────┼────────────────────┐   │    │
 │  │  │            ▼                      ▼                    │   │    │
 │  │  │  ┌─────────────────────────────────────────────────┐  │   │    │
 │  │  │  │      internal/toolemulation                      │  │   │    │
 │  │  │  │  ┌──────────────┐  ┌──────────────────────────┐ │  │   │    │
 │  │  │  │  │InjectTooling │  │   ParseActionBlocks      │ │  │   │    │
 │  │  │  │  │  (Prompt)    │  │   (Response)             │ │  │   │    │
 │  │  │  │  └──────────────┘  └──────────────────────────┘ │  │   │    │
 │  │  │  └─────────────────────────────────────────────────┘  │   │    │
 │  │  └───────────────────────────────────────────────────────┘   │    │
 │  └───────────────────────────────────────────────────────────────┘    │
 └─────────────────────────────────────────────────────────────────────────┘
                                     │
                                     │ WebSocket / Named Pipe
                                     ▼
 ┌─────────────────────────────────────────────────────────────────────────┐
 │                         Lingma 后端进程                                  │
 │              (VS Code 插件的本地 IPC 服务)                                │
 │                   ws://127.0.0.1:8899/ws                                │
 └─────────────────────────────────────────────────────────────────────────┘
                                     │
                                     │ HTTP API
                                     ▼
 ┌─────────────────────────────────────────────────────────────────────────┐
 │                         云端模型服务                                     │
 │              (Kimi-K2.6 / Qwen3-Max / MiniMax-M2.7 等)                  │
 └─────────────────────────────────────────────────────────────────────────┘
 ```
 ---
-## 2. 模块职责
+## 2. Runtime Modes
-### 2.1 internal/httpapi
+### 2.1 IPC mode
-HTTP API 适配层，负责将外部请求转换为内部 `service.ChatRequest`。
+`backend=ipc`
-| 端点 | 协议 | 功能 |
+- Reads local plugin transport information
-|------|------|------|
+- Connects through:
-| `GET /v1/models` | OpenAI | 返回可用模型列表 |
+  - WebSocket on macOS / Linux
-| `POST /v1/chat/completions` | OpenAI | 聊天补全（流式/非流式） |
+  - Named Pipe on Windows
-| `POST /v1/messages` | Anthropic | 消息接口（流式/非流式） |
+- Reuses Lingma plugin session semantics
 - Session/environment options in the desktop UI apply only here
-**核心函数：**
+### 2.2 Remote API mode
 - `handleOpenAIChatCompletions()` - 处理 OpenAI 格式请求
 - `handleAnthropicMessages()` - 处理 Anthropic 格式请求
 - `normalizeOpenAIRequest()` / `normalizeAnthropicRequest()` - 归一化请求
-**关键设计：**
+`backend=remote`
 - 支持 CORS 预检请求 (`OPTIONS`)
 - 单请求并发控制 (`tryAcquire()` / `release()`)
 - 流式响应通过 `http.Flusher` 实现 SSE
-### 2.2 internal/service
+- Reads Lingma remote base URL
-
+- Loads credentials from:
-业务逻辑层，负责会话管理和 Prompt 构建。
+  - explicit `remote_auth_file`
-
+  - or detected Lingma cache under `~/.lingma`
-**核心结构：**
+- Calls remote model list and chat endpoints directly
-```go
+- Supports timeout / 429 / 5xx fallback across available remote models
-type Service struct {
+- Does not use local plugin session environment knobs
    cfg              Config
    client           *lingmaipc.Client
    stickySessionID  string
    stickyModelID    string
 }
 ```
 **核心函数：**
 - `Generate()` - 非流式生成
 - `GenerateStream()` - 流式生成（返回 `events` + `done` channel）
 - `buildLingmaPrompt()` - 构建 Lingma 原生 Prompt
 - `runPromptLocked()` - 发送 `session/prompt` RPC 并监听 `session/update` 通知
 **会话模式：**
 | 模式 | 行为 |
 |------|------|
 | `reuse` | 复用 sticky session，多轮对话保持上下文 |
 | `fresh` | 每个请求新建临时 session，完成后删除 |
 | `auto` | 单轮请求复用；带 system/history 的请求用 fresh |
 ### 2.3 internal/lingmaipc
 IPC 通信层，负责与 Lingma 后端进程建立连接。
 **传输方式：**
 | 平台 | 默认传输 | 说明 |
 |------|----------|------|
 | Windows | Named Pipe | `\\.\pipe\lingma-*` |
 | macOS/Linux | WebSocket | `ws://127.0.0.1:{port}/ws` |
 **连接发现：**
 - 读取 VS Code 插件缓存：`~/.config/Lingma/SharedClientCache/.info.json`
 - 获取 WebSocket 端口号
 - 自动重连机制
 **RPC 协议：**
 - `session/new` - 创建会话
 - `session/prompt` - 发送用户消息
 - `session/update` - 接收流式响应通知
 - `session/set_model` - 切换模型
 - `chat/deleteSessionById` - 删除会话
 ### 2.4 internal/toolemulation
 Tool 调用模拟层，将标准 `tools` 协议转换为 Prompt 层契约。
 **核心流程：**
 ```
 Client tools ──→ ExtractAnthropicTools() ──→ []Tool
                    │
                    ▼
              InjectTooling() ──→ System Prompt + Tool 说明
                    │
                    ▼
              模型输出 action block
                    │
                    ▼
              ParseActionBlocks() ──→ []ToolCall
                    │
                    ▼
              编码为 Anthropic tool_use / OpenAI tool_calls
 ```
 **Prompt 契约格式：**
 ```
 ```json action
 {"tool":"NAME","parameters":{"key":"value"}}
 ```
 ```
 **支持格式：**
 - `{"tool":"X","parameters":{}}` ✅ 标准格式
 - `{"tool":"X","arguments":{}}` ✅ 兼容格式
 - `{"tool":"X","input":{}}` ✅ 兼容格式
 - `{"tool":"X","arg1":"val"}` ✅ 顶层参数（部分模型）
 ---
-## 3. 核心流程
+## 3. Module Responsibilities
-### 3.1 普通聊天请求流程
+### 3.1 `cmd/lingma-ipc-proxy`
 Entry point and config loading.
 Responsibilities:
 - parse CLI flags
 - merge config file + environment + flags
 - choose backend mode
 - build `service.Config`
 - start `internal/httpapi.Server`
 Important config fields:
 - `backend`
 - `transport`
 - `websocket_url`
 - `pipe`
 - `remote_base_url`
 - `remote_auth_file`
 - `remote_version`
 - `remote_fallback_enabled`
 - `remote_fallback_models`
 ### 3.2 `internal/httpapi`
 Compatibility layer for OpenAI and Anthropic style APIs.
 Primary routes:
 - `GET /v1/models`
 - `POST /v1/chat/completions`
 - `POST /v1/messages`
 - `GET /health`
 - `GET /props`
 Responsibilities:
 - normalize OpenAI / Anthropic requests into `service.ChatRequest`
 - convert service results back to OpenAI / Anthropic payloads
 - stream SSE responses
 - sanitize and record request / response payloads for debug UI
 ### 3.3 `internal/service`
 Core orchestration layer.
 Responsibilities:
 - choose active backend
 - warm up backend connection / credentials
 - list models
 - generate non-streaming responses
 - generate streaming responses
 - apply session reuse policy in IPC mode
 - inject / parse tool emulation
 - normalize image inputs
 - apply remote fallback order
 Important behavior split:
 - IPC path uses `internal/lingmaipc`
 - Remote path uses `internal/remote`
 ### 3.4 `internal/lingmaipc`
 Local transport client for Lingma plugin IPC.
 Responsibilities:
 - detect WebSocket / pipe endpoint
 - dial and reconnect
 - send RPC messages such as:
  - `session/new`
  - `session/prompt`
  - `session/set_model`
  - `chat/deleteSessionById`
 - consume `session/update` notifications
 ### 3.5 `internal/remote`
 Remote HTTP client for Lingma cloud APIs.
 Responsibilities:
 - resolve base URL
 - load and validate credentials
 - derive machine / user identity for remote auth
 - list remote models
 - call remote chat endpoint
 - handle remote SSE streaming
 ### 3.6 `internal/toolemulation`
 Prompt-based tool bridge for models that do not expose native tool calling in Lingma transport.
 Responsibilities:
 - extract tool definitions from OpenAI / Anthropic requests
 - append tool contract to prompt
 - parse JSON action blocks from model output
 - project tool calls back to:
  - Anthropic `tool_use`
  - OpenAI `tool_calls`
 ---
 ## 4. Request Flow
 ### 4.1 Shared ingress flow
 ```mermaid
 sequenceDiagram
-    participant C as Client
+    participant Client
-    participant H as HTTP API
+    participant HTTP as httpapi
-    participant S as Service
+    participant Service as service
    participant L as Lingma IPC
    participant B as Lingma Backend
-    C->>H: POST /v1/messages
+    Client->>HTTP: OpenAI/Anthropic request
-    H->>H: normalizeAnthropicRequest()
+    HTTP->>HTTP: normalize request
-    H->>S: GenerateStream(req)
+    HTTP->>Service: Generate / GenerateStream
    S->>S: ensureConnected()
    S->>S: resolveSession()
    S->>S: buildLingmaPrompt()
    S->>L: Send("session/prompt", params)
    L->>B: WebSocket RPC
    B->>L: session/update (agent_message_chunk)
    loop 流式响应
        L->>S: notification (chunk)
        S->>H: events <- StreamEvent{Delta}
        H->>C: SSE: content_block_delta
    end
    B->>L: session/update (chat_finish)
    L->>S: notification (finish)
    S->>H: done <- StreamResult
    H->>C: SSE: message_stop
 ```
-### 3.2 Tool 调用流程
+### 4.2 IPC backend flow
 ```mermaid
 sequenceDiagram
-    participant C as Client
+    participant Service
-    participant H as HTTP API
+    participant Tool as toolemulation
-    participant T as ToolEmulation
+    participant IPC as lingmaipc
-    participant S as Service
+    participant Plugin as Lingma plugin
    participant L as Lingma IPC
-    C->>H: POST /v1/messages (with tools)
+    Service->>Tool: inject tool contract if needed
-    H->>T: ExtractAnthropicTools()
+    Service->>IPC: ensure connected
-    H->>S: GenerateStream(req)
+    Service->>IPC: create/reuse session
-    S->>T: InjectTooling(system, tools)
+    Service->>IPC: session/prompt
-    S->>L: session/prompt (with tool prompt)
+    IPC->>Plugin: RPC message
-    L->>S: response (with action blocks)
+    Plugin-->>IPC: session/update chunks
-    S->>T: ParseActionBlocks(text)
+    IPC-->>Service: stream events
-    T->>S: []ToolCall
+    Service-->>Service: parse tool blocks / image references / stop reason
    S->>H: ChatResult{Text, ToolCalls}
    H->>C: SSE: tool_use blocks
    C->>H: POST /v1/messages (tool_result)
    H->>T: ActionOutputPrompt(toolUseID, content)
    H->>S: GenerateStream(req)
    S->>L: session/prompt (with tool result)
    L->>S: response
    S->>H: ChatResult
    H->>C: SSE: final response
 ```
-### 3.3 图片传输流程
+### 4.3 Remote backend flow
 ```mermaid
 sequenceDiagram
-    participant C as Client
+    participant Service
-    participant H as HTTP API
+    participant Remote as remote client
-    participant S as Service
+    participant API as Lingma remote API
    participant L as Lingma IPC
-    C->>H: POST /v1/messages (with image)
+    Service->>Remote: load credentials / ensure client
-    H->>H: extractAnthropicImages()
+    Service->>Remote: list models if needed
-    H->>S: ChatRequest{Images: [...]}
+    Service->>Remote: chat request
-    S->>S: runPromptLocked()
+    Remote->>API: HTTPS request
-    Note over S: 1. 保存 base64 到 /tmp/lingma-img-*.ext
+    API-->>Remote: JSON or SSE response
-    Note over S: 2. 构建 URI: lingma:///agent/file?path=...
+    Remote-->>Service: normalized result
-    S->>L: session/prompt
+    Service-->>Service: fallback to next model when allowed
    Note over L: prompt: [{type:"text"}, {type:"image", mimeType, uri, data}]
    L->>S: response (model sees image)
    S->>H: ChatResult
    H->>C: SSE response
 ```
 ### 3.4 流式输出 SSE 事件序列
 **Anthropic 格式（流式）：**
 ```
 event: message_start
 data: {"type":"message_start","message":{...}}
 event: content_block_start
 data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}
 event: content_block_delta
 data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"你"}}
 event: content_block_delta
 data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"好"}}
 ... (更多 delta)
 event: content_block_stop
 data: {"type":"content_block_stop","index":0}
 [如有 tool_calls]
 event: content_block_start
 data: {"type":"content_block_start","index":1,"content_block":{"type":"tool_use","id":"...","name":"Bash","input":{"command":"ls /"}}}
 event: content_block_stop
 data: {"type":"content_block_stop","index":1}
 event: message_delta
 data: {"type":"message_delta","delta":{"stop_reason":"end_turn"},"usage":{"output_tokens":5}}
 event: message_stop
 data: {"type":"message_stop"}
 ```
 ---
-## 4. 关键技术决策
+## 5. Remote Fallback Strategy
-### 4.1 为什么使用 Tool Emulation 而非原生 Tool Calling？
+Remote fallback is used only when all conditions are true:
-Lingma 后端模型（Kimi、Qwen 等）不原生支持 OpenAI/Anthropic 的 `tools` 协议。因此代理层需要将工具定义注入到 Prompt 中，通过结构化文本输出模拟工具调用。
+- `backend=remote`
 - `remote_fallback_enabled=true`
 - request has not emitted stream output yet
 - upstream error matches timeout / 429 / 5xx class
-**优点：**
+Current default order:
 - 不依赖上游模型能力
 - 兼容任何纯聊天模型
 - 可精确控制 Prompt 格式
-**缺点：**
+1. `kmodel`
- 模型需要学习特定格式
+2. `mmodel`
- 解析可能有容错问题
+3. `dashscope_qwen3_coder`
- 增加了 Prompt 长度
+4. `dashscope_qmodel`
 5. `dashscope_qwen_max_latest`
 6. `dashscope_qwen_plus_20250428_thinking`
-### 4.2 为什么使用 WebSocket/Named Pipe 而非 HTTP？
+Before using that order, the service filters candidates against the actual `/v1/models` result from the remote backend so unavailable models are skipped.
 Lingma 插件使用本地 IPC 与后端通信，优势：
 - 低延迟（本地通信）
 - 双向实时通知（session/update）
 - 认证信息由插件管理，代理无需处理
 ### 4.3 图片传输的双保险策略
 ```
 Prompt 数组 (Lingma 原生格式):
 [
  {"type":"text","text":"..."},
  {"type":"image","mimeType":"image/png","uri":"lingma:///agent/file?path=...","data":"base64..."}
 ]
 ```
 - `uri`: Lingma 后端必须验证的本地文件路径
 - `data`: base64 编码的图像数据（备用）
 - `mimeType`: 图像类型标识
 ### 4.4 单请求并发控制
 Lingma IPC 一次只能处理一个请求，因此代理使用 `tryAcquire()` 机制：
 ```go
 if !s.tryAcquire() {
    writeAnthropicError(w, 429, "rate_limit_error",
        "Lingma IPC proxy handles one request at a time.")
    return
 }
 defer s.release()
 ```
 ---
-## 5. 配置说明
+## 6. Desktop App Architecture
-### 5.1 配置文件结构
+The Wails desktop app is a management UI around the local proxy process.
-```json
+Responsibilities:
 {
  "host": "127.0.0.1",
  "port": 8095,
  "transport": "websocket",
  "mode": "agent",
  "shell_type": "zsh",
  "session_mode": "auto",
  "timeout": 120,
  "cwd": "/Users/tiancheng"
 }
 ```
-### 5.2 配置项说明
+- start / stop / restart proxy
 - show current backend and resolved endpoints
 - persist:
  - request history
  - logs
  - token statistics
 - show detected IPC and remote credentials metadata
 - edit config and restart proxy on save
-| 配置项 | 类型 | 默认值 | 说明 |
+Persisted local state:
-|--------|------|--------|------|
+
-| `host` | string | `127.0.0.1` | HTTP 监听地址 |
+- config: `~/.config/lingma-ipc-proxy/config.json`
-| `port` | int | `8095` | HTTP 监听端口 |
+- UI/runtime state: `~/.config/lingma-ipc-proxy/app-state.json`
-| `transport` | string | `auto` | IPC 传输方式：`auto`/`pipe`/`websocket` |
+
-| `mode` | string | `chat` | 模式：`chat`/`agent` |
+Production packaging rules:
-| `shell_type` | string | `powershell` | 终端类型 |
+
-| `session_mode` | string | `auto` | 会话模式：`reuse`/`fresh`/`auto` |
+- packaged app should not auto-open inspector
-| `timeout` | int | `120` | 请求超时（秒） |
+- local development can opt in with `LINGMA_DESKTOP_DEBUG=1`
 | `cwd` | string | `""` | 工作目录（传给 Lingma 后端） |
 ---
-## 6. 扩展点
+## 7. Key Design Decisions
-### 6.1 添加新模型
+### 7.1 Why keep both IPC and remote modes?
-在 `service.go` 的模型映射中添加：
+Because the two modes solve different problems:
-```go
+- IPC mode preserves plugin session semantics and local tool environment
-func (s *Service) resolveInternalModelID(model string) string {
+- Remote mode avoids plugin runtime coupling and is usually better for third-party agent clients
    switch strings.ToLower(strings.TrimSpace(model)) {
    case "kimi-k2.6":
        return "kimi2.6"
    case "qwen3-max":
        return "qwen3max"
    // 添加新模型映射
    default:
        return ""
    }
 }
 ```
-### 6.2 添加新 Tool 格式支持
+### 7.2 Why keep tool emulation even with remote mode?
-在 `toolemulation.go` 的 `parseToolCallJSON()` 中扩展参数解析逻辑。
+Because Lingma-exposed models are not guaranteed to speak OpenAI/Anthropic native tool protocol consistently across all routes. The proxy must keep a stable external contract even when the upstream model capability is uneven.
-### 6.3 添加新 API 端点
+### 7.3 Why persist requests and token stats in the desktop app?
-在 `httpapi/server.go` 的 `NewServer()` 中注册新路由。
+Because the GUI is used as an operational console, not a transient preview. Users need model usage, logs, and recent traffic to survive app restarts.
 ---
-*文档版本: 2025-04-25*
+## 8. Known Boundaries
-*对应代码版本: 当前 master*
+
 - IPC mode still has stronger environment coupling with the local Lingma plugin
 - remote credential detection depends on local Lingma cache / auth file layout
 - image payloads are sanitized in persisted request logs to avoid oversized local state
 - request history may contain mixed models in remote mode when fallback is triggered or when different clients specify different models
 ---
 ## 9. Files to Read First
 If you are extending the system, start here:
 - `cmd/lingma-ipc-proxy/main.go`
 - `internal/httpapi/server.go`
 - `internal/service/service.go`
 - `internal/lingmaipc/*`
 - `internal/remote/*`
 - `desktop/app.go`
 - `desktop/main.go`
 ---
 Document version: 2026-04-30
--- a/docs/architecture.zh-CN.md
+++ b/docs/architecture.zh-CN.md
@@ -1,424 +1,315 @@
 # lingma-ipc-proxy 架构文档
-本文档描述 lingma-ipc-proxy 的系统架构、工作原理和核心流程。
+本文档描述 `lingma-ipc-proxy` 的当前架构，覆盖两种后端模式：
 - `ipc`：桥接本地 Lingma IDE 插件传输层
 - `remote`：直接调用 Lingma 远端 HTTP API
 ---
-## 1. 整体架构
+## 1. 系统总览
-```
+```mermaid
-┌─────────────────────────────────────────────────────────────────────────┐
+flowchart LR
-│                              客户端层                                     │
+    A["客户端<br/>Claude Code / Hermes / Cline / Continue / OpenAI SDK / Anthropic SDK"]
-│  ┌──────────────┐  ┌──────────────┐  ┌──────────────┐  ┌──────────────┐ │
+    B["internal/httpapi<br/>OpenAI + Anthropic 兼容路由"]
-│  │ Claude Code  │  │   OpenAI     │  │   Cline      │  │   Continue   │ │
+    C["internal/service<br/>请求归一化 / 会话策略 / 流式输出 / 兜底"]
-│  │  (Anthropic) │  │    SDK       │  │  (OpenAI)    │  │  (OpenAI)    │ │
+    D["internal/toolemulation<br/>工具提示词注入 + action block 解析"]
-│  └──────┬───────┘  └──────┬───────┘  └──────┬───────┘  └──────┬───────┘ │
+    E["internal/lingmaipc<br/>WebSocket / 命名管道"]
-└─────────┼─────────────────┼─────────────────┼─────────────────┼─────────┘
+    F["internal/remote<br/>登录态探测 / 模型列表 / Chat / SSE"]
-          │                 │                 │                 │
+    G["Lingma 插件本地进程"]
-          └─────────────────┴────────┬────────┴─────────────────┘
+    H["Lingma 远端 API"]
-                                     │ HTTP
+    I["桌面端 GUI<br/>Wails / 日志 / Token 统计 / 持久化状态"]
-                                     ▼
+
-┌─────────────────────────────────────────────────────────────────────────┐
+    A --> B
-│                         lingma-ipc-proxy                                │
+    I --> B
-│  ┌─────────────────────────────────────────────────────────────────┐    │
+    B --> C
-│  │  internal/httpapi                                                │    │
+    C --> D
-│  │  ┌─────────────┐  ┌─────────────┐  ┌─────────────────────────┐ │    │
+    C --> E
-│  │  │ /v1/models  │  │/v1/chat/comp│  │    /v1/messages         │ │    │
+    C --> F
-│  │  │  (GET)      │  │  (POST)     │  │    (POST)               │ │    │
+    E --> G
-│  │  └──────┬──────┘  └──────┬──────┘  └───────────┬─────────────┘ │    │
+    F --> H
 │  │         └─────────────────┴──────────┬──────────┘               │    │
 │  │                                      │ normalizeRequest         │    │
 │  │                                      ▼                          │    │
 │  │  ┌─────────────────────────────────────────────────────────┐   │    │
 │  │  │              internal/service                            │   │    │
 │  │  │  ┌──────────┐  ┌──────────┐  ┌────────────────────────┐ │   │    │
 │  │  │  │ Session  │  │  Prompt  │  │    Stream/Event        │ │   │    │
 │  │  │  │ Manager  │  │ Builder  │  │    Handler             │ │   │    │
 │  │  │  └────┬─────┘  └────┬─────┘  └───────────┬────────────┘ │   │    │
 │  │  │       └─────────────┴──────────┬─────────┘              │   │    │
 │  │  │                              │ buildLingmaPrompt       │   │    │
 │  │  │                              ▼                          │   │    │
 │  │  │  ┌─────────────────────────────────────────────────┐   │   │    │
 │  │  │  │          internal/lingmaipc                      │   │   │    │
 │  │  │  │  ┌──────────────┐  ┌──────────────────────────┐ │   │   │    │
 │  │  │  │  │   WebSocket  │  │    Named Pipe (Win)      │ │   │   │    │
 │  │  │  │  │  Transport   │  │    Transport             │ │   │   │    │
 │  │  │  │  └──────┬───────┘  └───────────┬──────────────┘ │   │   │    │
 │  │  │  └─────────┼──────────────────────┼────────────────┘   │   │    │
 │  │  └────────────┼──────────────────────┼────────────────────┘   │    │
 │  │               │                      │                         │    │
 │  │  ┌────────────┼──────────────────────┼────────────────────┐   │    │
 │  │  │            ▼                      ▼                    │   │    │
 │  │  │  ┌─────────────────────────────────────────────────┐  │   │    │
 │  │  │  │      internal/toolemulation                      │  │   │    │
 │  │  │  │  ┌──────────────┐  ┌──────────────────────────┐ │  │   │    │
 │  │  │  │  │InjectTooling │  │   ParseActionBlocks      │ │  │   │    │
 │  │  │  │  │  (Prompt)    │  │   (Response)             │ │  │   │    │
 │  │  │  │  └──────────────┘  └──────────────────────────┘ │  │   │    │
 │  │  │  └─────────────────────────────────────────────────┘  │   │    │
 │  │  └───────────────────────────────────────────────────────┘   │    │
 │  └───────────────────────────────────────────────────────────────┘    │
 └─────────────────────────────────────────────────────────────────────────┘
                                     │
                                     │ WebSocket / Named Pipe
                                     ▼
 ┌─────────────────────────────────────────────────────────────────────────┐
 │                         Lingma 后端进程                                  │
 │              (VS Code 插件的本地 IPC 服务)                                │
 │                   ws://127.0.0.1:8899/ws                                │
 └─────────────────────────────────────────────────────────────────────────┘
                                     │
                                     │ HTTP API
                                     ▼
 ┌─────────────────────────────────────────────────────────────────────────┐
 │                         云端模型服务                                     │
 │              (Kimi-K2.6 / Qwen3-Max / MiniMax-M2.7 等)                  │
 └─────────────────────────────────────────────────────────────────────────┘
 ```
 ---
-## 2. 模块职责
+## 2. 运行模式
-### 2.1 internal/httpapi
+### 2.1 IPC 模式
-HTTP API 适配层，负责将外部请求转换为内部 `service.ChatRequest`。
+`backend=ipc`
-| 端点 | 协议 | 功能 |
+- 读取本地 Lingma 插件传输信息
-|------|------|------|
+- 通过以下方式连接：
-| `GET /v1/models` | OpenAI | 返回可用模型列表 |
+  - macOS / Linux：WebSocket
-| `POST /v1/chat/completions` | OpenAI | 聊天补全（流式/非流式） |
+  - Windows：Named Pipe
-| `POST /v1/messages` | Anthropic | 消息接口（流式/非流式） |
+- 复用 Lingma 插件自身的 session 语义
 - 桌面端里“会话与环境”相关配置只在这里生效
-**核心函数：**
+### 2.2 Remote API 模式
 - `handleOpenAIChatCompletions()` - 处理 OpenAI 格式请求
 - `handleAnthropicMessages()` - 处理 Anthropic 格式请求
 - `normalizeOpenAIRequest()` / `normalizeAnthropicRequest()` - 归一化请求
-**关键设计：**
+`backend=remote`
 - 支持 CORS 预检请求 (`OPTIONS`)
 - 单请求并发控制 (`tryAcquire()` / `release()`)
 - 流式响应通过 `http.Flusher` 实现 SSE
-### 2.2 internal/service
+- 解析远端域名
-
+- 加载认证信息：
-业务逻辑层，负责会话管理和 Prompt 构建。
+  - 显式指定的 `remote_auth_file`
-
+  - 或自动探测 `~/.lingma` 下的缓存
-**核心结构：**
+- 直接请求远端模型列表和聊天接口
-```go
+- 支持远端超时 / 429 / 5xx 的模型兜底切换
-type Service struct {
+- 不依赖本地插件会话环境参数
    cfg              Config
    client           *lingmaipc.Client
    stickySessionID  string
    stickyModelID    string
 }
 ```
 **核心函数：**
 - `Generate()` - 非流式生成
 - `GenerateStream()` - 流式生成（返回 `events` + `done` channel）
 - `buildLingmaPrompt()` - 构建 Lingma 原生 Prompt
 - `runPromptLocked()` - 发送 `session/prompt` RPC 并监听 `session/update` 通知
 **会话模式：**
 | 模式 | 行为 |
 |------|------|
 | `reuse` | 复用 sticky session，多轮对话保持上下文 |
 | `fresh` | 每个请求新建临时 session，完成后删除 |
 | `auto` | 单轮请求复用；带 system/history 的请求用 fresh |
 ### 2.3 internal/lingmaipc
 IPC 通信层，负责与 Lingma 后端进程建立连接。
 **传输方式：**
 | 平台 | 默认传输 | 说明 |
 |------|----------|------|
 | Windows | Named Pipe | `\\.\pipe\lingma-*` |
 | macOS/Linux | WebSocket | `ws://127.0.0.1:{port}/ws` |
 **连接发现：**
 - 读取 VS Code 插件缓存：`~/.config/Lingma/SharedClientCache/.info.json`
 - 获取 WebSocket 端口号
 - 自动重连机制
 **RPC 协议：**
 - `session/new` - 创建会话
 - `session/prompt` - 发送用户消息
 - `session/update` - 接收流式响应通知
 - `session/set_model` - 切换模型
 - `chat/deleteSessionById` - 删除会话
 ### 2.4 internal/toolemulation
 Tool 调用模拟层，将标准 `tools` 协议转换为 Prompt 层契约。
 **核心流程：**
 ```
 Client tools ──→ ExtractAnthropicTools() ──→ []Tool
                    │
                    ▼
              InjectTooling() ──→ System Prompt + Tool 说明
                    │
                    ▼
              模型输出 action block
                    │
                    ▼
              ParseActionBlocks() ──→ []ToolCall
                    │
                    ▼
              编码为 Anthropic tool_use / OpenAI tool_calls
 ```
 **Prompt 契约格式：**
 ```
 ```json action
 {"tool":"NAME","parameters":{"key":"value"}}
 ```
 ```
 **支持格式：**
 - `{"tool":"X","parameters":{}}` ✅ 标准格式
 - `{"tool":"X","arguments":{}}` ✅ 兼容格式
 - `{"tool":"X","input":{}}` ✅ 兼容格式
 - `{"tool":"X","arg1":"val"}` ✅ 顶层参数（部分模型）
 ---
-## 3. 核心流程
+## 3. 模块职责
-### 3.1 普通聊天请求流程
+### 3.1 `cmd/lingma-ipc-proxy`
 入口与配置装配层。
 职责：
 - 解析命令行参数
 - 合并配置文件 / 环境变量 / CLI flags
 - 选择后端模式
 - 构建 `service.Config`
 - 启动 `internal/httpapi.Server`
 关键配置字段：
 - `backend`
 - `transport`
 - `websocket_url`
 - `pipe`
 - `remote_base_url`
 - `remote_auth_file`
 - `remote_version`
 - `remote_fallback_enabled`
 - `remote_fallback_models`
 ### 3.2 `internal/httpapi`
 OpenAI / Anthropic 兼容层。
 主要路由：
 - `GET /v1/models`
 - `POST /v1/chat/completions`
 - `POST /v1/messages`
 - `GET /health`
 - `GET /props`
 职责：
 - 把 OpenAI / Anthropic 请求归一化为 `service.ChatRequest`
 - 把 service 结果重新编码成 OpenAI / Anthropic 响应
 - 输出 SSE 流
 - 记录调试用请求 / 响应摘要
 ### 3.3 `internal/service`
 核心编排层。
 职责：
 - 选择当前 backend
 - backend 预热
 - 拉取模型列表
 - 非流式生成
 - 流式生成
 - IPC 模式下的 session 复用策略
 - 工具模拟注入与解析
 - 图片输入归一化
 - 远端 fallback 顺序控制
 分支逻辑：
 - IPC 路径走 `internal/lingmaipc`
 - Remote 路径走 `internal/remote`
 ### 3.4 `internal/lingmaipc`
 本地 Lingma 插件 IPC 客户端。
 职责：
 - 自动探测 WebSocket / pipe 端点
 - 建立连接与重连
 - 发送 RPC：
  - `session/new`
  - `session/prompt`
  - `session/set_model`
  - `chat/deleteSessionById`
 - 消费 `session/update` 通知
 ### 3.5 `internal/remote`
 Lingma 远端 HTTP 客户端。
 职责：
 - 解析远端 base URL
 - 加载并校验登录态
 - 生成远端请求所需身份信息
 - 获取远端模型列表
 - 调用远端聊天接口
 - 处理远端 SSE 流式响应
 ### 3.6 `internal/toolemulation`
 工具调用模拟层。
 职责：
 - 从 OpenAI / Anthropic 请求中提取工具定义
 - 将工具契约注入 prompt
 - 从模型文本里解析 JSON action block
 - 回投为：
  - Anthropic `tool_use`
  - OpenAI `tool_calls`
 ---
 ## 4. 请求主流程
 ### 4.1 通用入口
 ```mermaid
 sequenceDiagram
-    participant C as Client
+    participant Client as Client
-    participant H as HTTP API
+    participant HTTP as httpapi
-    participant S as Service
+    participant Service as service
    participant L as Lingma IPC
    participant B as Lingma Backend
-    C->>H: POST /v1/messages
+    Client->>HTTP: OpenAI / Anthropic 请求
-    H->>H: normalizeAnthropicRequest()
+    HTTP->>HTTP: 归一化请求
-    H->>S: GenerateStream(req)
+    HTTP->>Service: Generate / GenerateStream
    S->>S: ensureConnected()
    S->>S: resolveSession()
    S->>S: buildLingmaPrompt()
    S->>L: Send("session/prompt", params)
    L->>B: WebSocket RPC
    B->>L: session/update (agent_message_chunk)
    loop 流式响应
        L->>S: notification (chunk)
        S->>H: events <- StreamEvent{Delta}
        H->>C: SSE: content_block_delta
    end
    B->>L: session/update (chat_finish)
    L->>S: notification (finish)
    S->>H: done <- StreamResult
    H->>C: SSE: message_stop
 ```
-### 3.2 Tool 调用流程
+### 4.2 IPC 后端流程
 ```mermaid
 sequenceDiagram
-    participant C as Client
+    participant Service as service
-    participant H as HTTP API
+    participant Tool as toolemulation
-    participant T as ToolEmulation
+    participant IPC as lingmaipc
-    participant S as Service
+    participant Plugin as Lingma 插件
    participant L as Lingma IPC
-    C->>H: POST /v1/messages (with tools)
+    Service->>Tool: 按需注入工具契约
-    H->>T: ExtractAnthropicTools()
+    Service->>IPC: ensure connected
-    H->>S: GenerateStream(req)
+    Service->>IPC: 创建/复用 session
-    S->>T: InjectTooling(system, tools)
+    Service->>IPC: session/prompt
-    S->>L: session/prompt (with tool prompt)
+    IPC->>Plugin: RPC
-    L->>S: response (with action blocks)
+    Plugin-->>IPC: session/update chunk
-    S->>T: ParseActionBlocks(text)
+    IPC-->>Service: 流式事件
-    T->>S: []ToolCall
+    Service-->>Service: 解析工具 block / 图片 / stop reason
    S->>H: ChatResult{Text, ToolCalls}
    H->>C: SSE: tool_use blocks
    C->>H: POST /v1/messages (tool_result)
    H->>T: ActionOutputPrompt(toolUseID, content)
    H->>S: GenerateStream(req)
    S->>L: session/prompt (with tool result)
    L->>S: response
    S->>H: ChatResult
    H->>C: SSE: final response
 ```
-### 3.3 图片传输流程
+### 4.3 Remote 后端流程
 ```mermaid
 sequenceDiagram
-    participant C as Client
+    participant Service as service
-    participant H as HTTP API
+    participant Remote as remote client
-    participant S as Service
+    participant API as Lingma 远端 API
    participant L as Lingma IPC
-    C->>H: POST /v1/messages (with image)
+    Service->>Remote: 加载登录态 / 初始化 client
-    H->>H: extractAnthropicImages()
+    Service->>Remote: 需要时拉取模型列表
-    H->>S: ChatRequest{Images: [...]}
+    Service->>Remote: 发送 chat 请求
-    S->>S: runPromptLocked()
+    Remote->>API: HTTPS
-    Note over S: 1. 保存 base64 到 /tmp/lingma-img-*.ext
+    API-->>Remote: JSON 或 SSE
-    Note over S: 2. 构建 URI: lingma:///agent/file?path=...
+    Remote-->>Service: 归一化结果
-    S->>L: session/prompt
+    Service-->>Service: 按条件执行 fallback
    Note over L: prompt: [{type:"text"}, {type:"image", mimeType, uri, data}]
    L->>S: response (model sees image)
    S->>H: ChatResult
    H->>C: SSE response
 ```
 ### 3.4 流式输出 SSE 事件序列
 **Anthropic 格式（流式）：**
 ```
 event: message_start
 data: {"type":"message_start","message":{...}}
 event: content_block_start
 data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}
 event: content_block_delta
 data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"你"}}
 event: content_block_delta
 data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"好"}}
 ... (更多 delta)
 event: content_block_stop
 data: {"type":"content_block_stop","index":0}
 [如有 tool_calls]
 event: content_block_start
 data: {"type":"content_block_start","index":1,"content_block":{"type":"tool_use","id":"...","name":"Bash","input":{"command":"ls /"}}}
 event: content_block_stop
 data: {"type":"content_block_stop","index":1}
 event: message_delta
 data: {"type":"message_delta","delta":{"stop_reason":"end_turn"},"usage":{"output_tokens":5}}
 event: message_stop
 data: {"type":"message_stop"}
 ```
 ---
-## 4. 关键技术决策
+## 5. 远端兜底策略
-### 4.1 为什么使用 Tool Emulation 而非原生 Tool Calling？
+仅在以下条件同时满足时启用：
-Lingma 后端模型（Kimi、Qwen 等）不原生支持 OpenAI/Anthropic 的 `tools` 协议。因此代理层需要将工具定义注入到 Prompt 中，通过结构化文本输出模拟工具调用。
+- `backend=remote`
 - `remote_fallback_enabled=true`
 - 还没有向客户端输出任何流式 token
 - 上游错误属于 timeout / 429 / 5xx
-**优点：**
+当前默认顺序：
 - 不依赖上游模型能力
 - 兼容任何纯聊天模型
 - 可精确控制 Prompt 格式
-**缺点：**
+1. `kmodel`
- 模型需要学习特定格式
+2. `mmodel`
- 解析可能有容错问题
+3. `dashscope_qwen3_coder`
- 增加了 Prompt 长度
+4. `dashscope_qmodel`
 5. `dashscope_qwen_max_latest`
 6. `dashscope_qwen_plus_20250428_thinking`
-### 4.2 为什么使用 WebSocket/Named Pipe 而非 HTTP？
+实际执行前，service 会先拿远端 `/v1/models` 的真实结果过滤一遍，只保留当前账号真的可用的模型。
 Lingma 插件使用本地 IPC 与后端通信，优势：
 - 低延迟（本地通信）
 - 双向实时通知（session/update）
 - 认证信息由插件管理，代理无需处理
 ### 4.3 图片传输的双保险策略
 ```
 Prompt 数组 (Lingma 原生格式):
 [
  {"type":"text","text":"..."},
  {"type":"image","mimeType":"image/png","uri":"lingma:///agent/file?path=...","data":"base64..."}
 ]
 ```
 - `uri`: Lingma 后端必须验证的本地文件路径
 - `data`: base64 编码的图像数据（备用）
 - `mimeType`: 图像类型标识
 ### 4.4 单请求并发控制
 Lingma IPC 一次只能处理一个请求，因此代理使用 `tryAcquire()` 机制：
 ```go
 if !s.tryAcquire() {
    writeAnthropicError(w, 429, "rate_limit_error",
        "Lingma IPC proxy handles one request at a time.")
    return
 }
 defer s.release()
 ```
 ---
-## 5. 配置说明
+## 6. 桌面端架构
-### 5.1 配置文件结构
+Wails 桌面端不是简单预览壳，而是本地代理的运维控制台。
-```json
+职责：
 {
  "host": "127.0.0.1",
  "port": 8095,
  "transport": "websocket",
  "mode": "agent",
  "shell_type": "zsh",
  "session_mode": "auto",
  "timeout": 120,
  "cwd": "/Users/tiancheng"
 }
 ```
-### 5.2 配置项说明
+- 启动 / 停止 / 重启代理
 - 展示当前 backend、监听地址、探测结果
 - 持久化：
  - 请求历史
  - 日志
  - Token 统计
 - 编辑配置并保存后按需重启
-| 配置项 | 类型 | 默认值 | 说明 |
+本地持久化路径：
-|--------|------|--------|------|
+
-| `host` | string | `127.0.0.1` | HTTP 监听地址 |
+- 配置：`~/.config/lingma-ipc-proxy/config.json`
-| `port` | int | `8095` | HTTP 监听端口 |
+- GUI 运行状态：`~/.config/lingma-ipc-proxy/app-state.json`
-| `transport` | string | `auto` | IPC 传输方式：`auto`/`pipe`/`websocket` |
+
-| `mode` | string | `chat` | 模式：`chat`/`agent` |
+打包要求：
-| `shell_type` | string | `powershell` | 终端类型 |
+
-| `session_mode` | string | `auto` | 会话模式：`reuse`/`fresh`/`auto` |
+- 生产包不自动打开 Inspector / 调试入口
-| `timeout` | int | `120` | 请求超时（秒） |
+- 本地开发可通过 `LINGMA_DESKTOP_DEBUG=1` 显式开启
 | `cwd` | string | `""` | 工作目录（传给 Lingma 后端） |
 ---
-## 6. 扩展点
+## 7. 关键设计决策
-### 6.1 添加新模型
+### 7.1 为什么同时保留 IPC 和 Remote？
-在 `service.go` 的模型映射中添加：
+因为两种模式解决的问题不同：
-```go
+- IPC 模式更贴近插件本地上下文和 session 语义
-func (s *Service) resolveInternalModelID(model string) string {
+- Remote 模式更适合第三方 agent 客户端，减少对插件运行态的依赖
    switch strings.ToLower(strings.TrimSpace(model)) {
    case "kimi-k2.6":
        return "kimi2.6"
    case "qwen3-max":
        return "qwen3max"
    // 添加新模型映射
    default:
        return ""
    }
 }
 ```
-### 6.2 添加新 Tool 格式支持
+### 7.2 为什么 Remote 也保留 Tool Emulation？
-在 `toolemulation.go` 的 `parseToolCallJSON()` 中扩展参数解析逻辑。
+因为 Lingma 暴露出来的模型能力并不保证始终稳定兼容 OpenAI / Anthropic 原生 tools 协议。代理层必须对外提供稳定契约，不能把上游模型差异直接泄露给客户端。
-### 6.3 添加新 API 端点
+### 7.3 为什么桌面端要持久化请求和 Token？
-在 `httpapi/server.go` 的 `NewServer()` 中注册新路由。
+因为这个 GUI 已经是运维面板，不是一次性调试页。重启后仍然需要保留最近请求、日志和 usage 统计，便于排障和观察模型表现。
 ---
-*文档版本: 2025-04-25*
+## 8. 当前边界
-*对应代码版本: 当前 master*
+
 - IPC 模式仍然受本地 Lingma 插件运行态影响
 - Remote 登录态探测依赖本地 Lingma 缓存结构
 - 图片类请求在本地持久化时会做裁剪/脱敏，避免状态文件过大
 - Remote 模式下如果启用了 fallback，最近一次“聊天模型”可能与客户端最初指定模型不同
 ---
 ## 9. 代码入口建议
 如果要继续扩展，优先看这些文件：
 - `cmd/lingma-ipc-proxy/main.go`
 - `internal/httpapi/server.go`
 - `internal/service/service.go`
 - `internal/lingmaipc/*`
 - `internal/remote/*`
 - `desktop/app.go`
 - `desktop/main.go`
 ---
 文档版本：2026-04-30
--- a/internal/httpapi/server.go
+++ b/internal/httpapi/server.go
@@ -1400,9 +1400,6 @@ func redactRecordedValue(value any) any {
 		if looksLikeImagePayload(typed) {
 			return imageRedaction(typed)
 		}
 		if len(typed) > 12000 {
 			return typed[:12000] + "... [truncated]"
 		}
 		return typed
 	default:
 		return typed
@@ -1443,11 +1440,7 @@ func mustMarshalJSON(value any) []byte {
 }
 func truncateRecordedString(value string) string {
 	const maxRecordedBody = 120000
 	if len(value) <= maxRecordedBody {
 	return value
 	}
 	return value[:maxRecordedBody] + "... [truncated]"
 }
 func withCORS(next http.Handler) http.Handler {
--- a/internal/remote/client.go
+++ b/internal/remote/client.go
@@ -76,7 +76,7 @@ func New(cfg Config) *Client {
 		cfg.CosyVersion = "2.11.2"
 	}
 	if cfg.Timeout <= 0 {
-		cfg.Timeout = 120 * time.Second
+		cfg.Timeout = 300 * time.Second
 	}
 	cfg.BaseURL = strings.TrimRight(cfg.BaseURL, "/")
 	return &Client{cfg: cfg, client: &http.Client{Timeout: cfg.Timeout}}
--- a/internal/service/service.go
+++ b/internal/service/service.go
@@ -51,6 +51,8 @@ type Config struct {
 	ShellType             string
 	SessionMode           SessionMode
 	Timeout               time.Duration
 	RemoteFallbackEnabled bool
 	RemoteFallbackModels  []string
 }
 type Image struct {
@@ -166,7 +168,7 @@ func New(cfg Config) *Service {
 		cfg.ShellType = lingmaipc.DefaultShellType()
 	}
 	if cfg.Timeout <= 0 {
-		cfg.Timeout = 120 * time.Second
+		cfg.Timeout = 300 * time.Second
 	}
 	if cfg.Transport == "" {
 		cfg.Transport = lingmaipc.TransportAuto
@@ -174,6 +176,11 @@ func New(cfg Config) *Service {
 	if cfg.Backend == "" {
 		cfg.Backend = BackendRemote
 	}
 	if cfg.Backend == BackendRemote {
 		if len(cfg.RemoteFallbackModels) == 0 {
 			cfg.RemoteFallbackModels = DefaultRemoteFallbackModels()
 		}
 	}
 	cfg.Model = normalizeModelForBackend(cfg.Backend, cfg.Model)
 	if cfg.SessionMode == "" {
 		cfg.SessionMode = SessionModeAuto
@@ -181,6 +188,17 @@ func New(cfg Config) *Service {
 	return &Service{cfg: cfg}
 }
 func DefaultRemoteFallbackModels() []string {
 	return []string{
 		"kmodel",
 		"mmodel",
 		"dashscope_qwen3_coder",
 		"dashscope_qmodel",
 		"dashscope_qwen_max_latest",
 		"dashscope_qwen_plus_20250428_thinking",
 	}
 }
 func (s *Service) SetDefaultModel(model string) {
 	s.mu.Lock()
 	defer s.mu.Unlock()
@@ -331,9 +349,6 @@ func (s *Service) generateRemote(
 	req ChatRequest,
 	onDelta func(string),
 ) (*ChatResult, error) {
 	requestCtx, cancel := context.WithTimeout(ctx, s.cfg.Timeout)
 	defer cancel()
 	if strings.TrimSpace(req.Model) == "" {
 		req.Model = s.DefaultModel()
 	}
@@ -346,20 +361,54 @@ func (s *Service) generateRemote(
 		return nil, errors.New("empty user message")
 	}
 	models := s.remoteAttemptModels(ctx, req.Model)
 	client := s.remoteClientLocked()
-	remoteResult, err := client.Chat(requestCtx, remote.ChatRequest{
+	var lastErr error
-		Model:       req.Model,
+	for i, model := range models {
 		attemptCtx, cancel := context.WithTimeout(ctx, s.cfg.Timeout)
 		result, emitted, err := s.generateRemoteWithModel(attemptCtx, client, req, prompt, model, onDelta)
 		cancel()
 		if err == nil {
 			return result, nil
 		}
 		lastErr = err
 		if i == len(models)-1 || emitted || !isRemoteFallbackError(err) {
 			return nil, err
 		}
 	}
 	return nil, lastErr
 }
 func (s *Service) generateRemoteWithModel(
 	ctx context.Context,
 	client *remote.Client,
 	req ChatRequest,
 	prompt string,
 	model string,
 	onDelta func(string),
 ) (*ChatResult, bool, error) {
 	emitted := false
 	delta := func(text string) {
 		if text != "" {
 			emitted = true
 		}
 		if onDelta != nil {
 			onDelta(text)
 		}
 	}
 	remoteResult, err := client.Chat(ctx, remote.ChatRequest{
 		Model:       model,
 		Prompt:      prompt,
 		Stream:      onDelta != nil,
 		Temperature: req.Temperature,
-	}, onDelta)
+	}, delta)
 	if err != nil {
-		return nil, err
+		return nil, emitted, err
 	}
 	result := &ChatResult{
 		Text:             remoteResult.Text,
-		Model:            valueOr(strings.TrimSpace(req.Model), "lingma"),
+		Model:            valueOr(strings.TrimSpace(model), "lingma"),
 		InputTokens:      remoteResult.InputTokens,
 		OutputTokens:     remoteResult.OutputTokens,
 		SessionID:        "",
@@ -370,9 +419,9 @@ func (s *Service) generateRemote(
 		Transport:        "remote",
 		EffectiveSession: SessionModeFresh,
 	}
-	s.applyToolEmulation(requestCtx, req, prompt, result, onDelta, func(hintPrompt string) (string, int, error) {
+	s.applyToolEmulation(ctx, req, prompt, result, onDelta, func(hintPrompt string) (string, int, error) {
-		retryResult, retryErr := client.Chat(requestCtx, remote.ChatRequest{
+		retryResult, retryErr := client.Chat(ctx, remote.ChatRequest{
-			Model:       req.Model,
+			Model:       model,
 			Prompt:      hintPrompt,
 			Stream:      onDelta != nil,
 			Temperature: req.Temperature,
@@ -385,7 +434,78 @@ func (s *Service) generateRemote(
 		}
 		return retryResult.Text, retryResult.OutputTokens, nil
 	})
-	return result, nil
+	return result, emitted, nil
 }
 func (s *Service) remoteAttemptModels(ctx context.Context, primary string) []string {
 	primary = normalizeModelForBackend(BackendRemote, primary)
 	models := []string{primary}
 	if !s.cfg.RemoteFallbackEnabled {
 		return models
 	}
 	availableCtx, cancel := context.WithTimeout(ctx, 15*time.Second)
 	remoteModels, err := s.remoteClientLocked().ListModels(availableCtx)
 	cancel()
 	if err != nil {
 		return models
 	}
 	available := make(map[string]bool, len(remoteModels))
 	for _, model := range remoteModels {
 		key := normalizeModelForBackend(BackendRemote, model.Key)
 		if key != "" {
 			available[key] = true
 		}
 	}
 	fallbackModels := s.cfg.RemoteFallbackModels
 	if len(fallbackModels) == 0 {
 		fallbackModels = DefaultRemoteFallbackModels()
 	}
 	ordered := make([]string, 0, len(fallbackModels))
 	seen := map[string]bool{primary: true}
 	primaryIndex := -1
 	for _, candidate := range fallbackModels {
 		model := normalizeModelForBackend(BackendRemote, candidate)
 		if model == "" {
 			continue
 		}
 		if model == primary && primaryIndex == -1 {
 			primaryIndex = len(ordered)
 		}
 		ordered = append(ordered, model)
 	}
 	start := 0
 	if primaryIndex >= 0 {
 		start = primaryIndex + 1
 	}
 	for _, model := range ordered[start:] {
 		if seen[model] || !available[model] {
 			continue
 		}
 		seen[model] = true
 		models = append(models, model)
 	}
 	return models
 }
 func isRemoteFallbackError(err error) bool {
 	if err == nil {
 		return false
 	}
 	if errors.Is(err, context.DeadlineExceeded) {
 		return true
 	}
 	msg := strings.ToLower(err.Error())
 	return strings.Contains(msg, "context deadline exceeded") ||
 		strings.Contains(msg, "client.timeout") ||
 		strings.Contains(msg, "timeout awaiting response") ||
 		strings.Contains(msg, "remote chat status 5") ||
 		strings.Contains(msg, "remote chat status 429") ||
 		strings.Contains(msg, "connection reset") ||
 		strings.Contains(msg, "unexpected eof")
 }
 func (s *Service) generateLocked(
--- a/lingma-ipc-proxy.macos.json
+++ b/lingma-ipc-proxy.macos.json
@@ -7,6 +7,15 @@
  "model": "kmodel",
  "shell_type": "zsh",
  "session_mode": "auto",
-  "timeout": 120,
+  "timeout": 300,
  "remote_fallback_enabled": true,
  "remote_fallback_models": [
    "kmodel",
    "mmodel",
    "dashscope_qwen3_coder",
    "dashscope_qmodel",
    "dashscope_qwen_max_latest",
    "dashscope_qwen_plus_20250428_thinking"
  ],
  "cwd": "/Users/tiancheng"
 }