Release v1.4.3

2026-04-30 18:20:04 +08:00
parent a2f777a1a8
commit a02fd51c19
24 changed files with 1909 additions and 1176 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,22 @@
 # Changelog

+## Unreleased
+
+- Nothing yet.
+
+## v1.4.3 - 2026-04-30
+
+- Added remote API timeout fallback with a configurable model order. The default order is Kimi-K2.6, MiniMax-M2.7, Qwen3-Coder, Qwen3.6-Plus, Qwen3-Max, and Qwen3-Thinking.
+- Fallback only runs before any streaming bytes are sent and only uses models returned by the active `/v1/models` response.
+- Changed the default request timeout from 120 seconds to 300 seconds.
+- Added a desktop Settings switch and fallback model list editor.
+- Added persistent desktop app state for request history, app logs, and cumulative token usage.
+- Added a Dashboard token usage card and model-list specification chips for context window and capability summaries.
+- Added model display to the desktop request stream table and model-aware request search.
+- Fixed Dashboard "recent model" tracking so health/model-list requests no longer override the last real chat model.
+- Updated architecture documentation to cover the IPC and Remote API dual-backend design.
+- Disabled desktop Inspector and default context menu in production builds; local development can opt in with `LINGMA_DESKTOP_DEBUG=1`.
+
 ## v1.4.2 - 2026-04-30

 - Default backend changed to remote API mode for new CLI and desktop configurations.
--- a/README.md
+++ b/README.md
@@ -13,7 +13,7 @@ The proxy now supports two backend modes:

 ## Current Version

-The current desktop line is `v1.4.2`.
+The current desktop line is `v1.4.3`.

 See [CHANGELOG.md](./CHANGELOG.md) for release history.

@@ -326,6 +326,10 @@ The proxy only reports models actually exposed by your Lingma plugin. The table

 Default model when the client omits `model`: `kmodel` (`Kimi-K2.6` in the remote model list).

+Remote mode enables timeout fallback by default. On timeout, upstream 5xx/429, or network interruption, the proxy only switches models if no streaming bytes have been sent to the client yet. Fallback candidates are filtered against the actual `/v1/models` response, so unavailable models are skipped. Default order:
+
+`Kimi-K2.6 -> MiniMax-M2.7 -> Qwen3-Coder -> Qwen3.6-Plus -> Qwen3-Max -> Qwen3-Thinking`
+
 ## Configuration

 Default config file:
@@ -348,7 +352,16 @@ Example:
  "mode": "agent",
  "shell_type": "zsh",
  "session_mode": "auto",
-  "timeout": 120,
+  "timeout": 300,
+  "remote_fallback_enabled": true,
+  "remote_fallback_models": [
+    "kmodel",
+    "mmodel",
+    "dashscope_qwen3_coder",
+    "dashscope_qmodel",
+    "dashscope_qwen_max_latest",
+    "dashscope_qwen_plus_20250428_thinking"
+  ],
  "cwd": "/Users/you/project",
  "current_file_path": ""
 }
--- a/README.zh-CN.md
+++ b/README.zh-CN.md
@@ -16,7 +16,7 @@

 ## 当前版本

-当前桌面端版本线：`v1.4.2`
+当前桌面端版本线：`v1.4.3`

 版本更新记录见 [CHANGELOG.md](./CHANGELOG.md)。

@@ -408,6 +408,10 @@ export ANTHROPIC_API_KEY="any"

 当客户端请求没有携带 `model` 字段时，代理默认使用：`kmodel`（远端模型列表里的 Kimi-K2.6）。

+远端模式默认开启超时兜底。遇到请求超时、上游 5xx/429 或网络中断时，代理只会在尚未向客户端输出任何流式内容的情况下切换模型。兜底候选会先和实际 `/v1/models` 返回结果求交集，不存在或当前账号不可用的模型会自动跳过。默认顺序：
+
+`Kimi-K2.6 -> MiniMax-M2.7 -> Qwen3-Coder -> Qwen3.6-Plus -> Qwen3-Max -> Qwen3-Thinking`
+
 ## 配置文件

 默认读取：
@@ -430,7 +434,16 @@ export ANTHROPIC_API_KEY="any"
  "mode": "agent",
  "shell_type": "zsh",
  "session_mode": "auto",
-  "timeout": 120,
+  "timeout": 300,
+  "remote_fallback_enabled": true,
+  "remote_fallback_models": [
+    "kmodel",
+    "mmodel",
+    "dashscope_qwen3_coder",
+    "dashscope_qmodel",
+    "dashscope_qwen_max_latest",
+    "dashscope_qwen_plus_20250428_thinking"
+  ],
  "cwd": "/Users/tiancheng/project",
  "current_file_path": ""
 }
--- a/cmd/lingma-ipc-proxy/main.go
+++ b/cmd/lingma-ipc-proxy/main.go
@@ -38,6 +38,8 @@ type fileConfig struct {
 	ShellType             string   `json:"shell_type"`
 	SessionMode           string   `json:"session_mode"`
 	TimeoutSeconds        int      `json:"timeout"`
+	RemoteFallbackEnabled *bool    `json:"remote_fallback_enabled"`
+	RemoteFallbackModels  []string `json:"remote_fallback_models"`
 }

 func main() {
@@ -98,7 +100,9 @@ func loadConfig() (service.Config, string) {
 		Model:                 "kmodel",
 		ShellType:             defaultShellType(),
 		SessionMode:           service.SessionModeAuto,
-		Timeout:     120 * time.Second,
+		Timeout:               300 * time.Second,
+		RemoteFallbackEnabled: true,
+		RemoteFallbackModels:  service.DefaultRemoteFallbackModels(),
 	}

 	configPath, configLoaded := resolveConfigPath()
@@ -127,6 +131,8 @@ func loadConfig() (service.Config, string) {
 	model := flag.String("model", cfg.Model, "Default Lingma model when API request omits model")
 	shellType := flag.String("shell-type", cfg.ShellType, "Shell type sent through ACP meta")
 	timeoutSeconds := flag.Int("timeout", int(cfg.Timeout/time.Second), "Per-request timeout in seconds")
+	remoteFallbackEnabled := flag.Bool("remote-fallback", cfg.RemoteFallbackEnabled, "Enable remote timeout/5xx fallback to the next available model")
+	remoteFallbackModels := flag.String("remote-fallback-models", strings.Join(cfg.RemoteFallbackModels, ","), "Comma-separated remote fallback model IDs")
 	sessionMode := flag.String("session-mode", string(cfg.SessionMode), "Session mode: auto, fresh, reuse")
 	config := flag.String("config", valueOr(configPath, filepath.Join(currentDir(), "lingma-ipc-proxy.json")), "Path to JSON config file")
 	flag.Parse()
@@ -151,6 +157,8 @@ func loadConfig() (service.Config, string) {
 	cfg.ShellType = strings.TrimSpace(*shellType)
 	cfg.SessionMode = parsedSessionMode
 	cfg.Timeout = time.Duration(*timeoutSeconds) * time.Second
+	cfg.RemoteFallbackEnabled = *remoteFallbackEnabled
+	cfg.RemoteFallbackModels = splitCSV(*remoteFallbackModels)

 	if configLoaded {
 		configPath = finalConfigPath
@@ -236,6 +244,12 @@ func overlayFileConfig(dst *service.Config, src fileConfig) {
 	if src.TimeoutSeconds > 0 {
 		dst.Timeout = time.Duration(src.TimeoutSeconds) * time.Second
 	}
+	if src.RemoteFallbackEnabled != nil {
+		dst.RemoteFallbackEnabled = *src.RemoteFallbackEnabled
+	}
+	if len(src.RemoteFallbackModels) > 0 {
+		dst.RemoteFallbackModels = cleanStringSlice(src.RemoteFallbackModels)
+	}
 }

 func overlayEnvConfig(dst *service.Config) {
@@ -287,6 +301,12 @@ func overlayEnvConfig(dst *service.Config) {
 	if value := envInt("LINGMA_PROXY_TIMEOUT_SECONDS", 0); value > 0 {
 		dst.Timeout = time.Duration(value) * time.Second
 	}
+	if value, ok := envBool("LINGMA_REMOTE_FALLBACK_ENABLED"); ok {
+		dst.RemoteFallbackEnabled = value
+	}
+	if value := strings.TrimSpace(os.Getenv("LINGMA_REMOTE_FALLBACK_MODELS")); value != "" {
+		dst.RemoteFallbackModels = splitCSV(value)
+	}
 }

 func parseSessionMode(value string) service.SessionMode {
@@ -349,6 +369,36 @@ func envInt(key string, fallback int) int {
 	return fallback
 }

+func envBool(key string) (bool, bool) {
+	value := strings.ToLower(strings.TrimSpace(os.Getenv(key)))
+	switch value {
+	case "1", "true", "yes", "on":
+		return true, true
+	case "0", "false", "no", "off":
+		return false, true
+	default:
+		return false, false
+	}
+}
+
+func splitCSV(value string) []string {
+	return cleanStringSlice(strings.Split(value, ","))
+}
+
+func cleanStringSlice(values []string) []string {
+	out := make([]string, 0, len(values))
+	seen := map[string]bool{}
+	for _, value := range values {
+		item := strings.TrimSpace(value)
+		if item == "" || seen[item] {
+			continue
+		}
+		seen[item] = true
+		out = append(out, item)
+	}
+	return out
+}
+
 func currentDir() string {
 	if wd, err := os.Getwd(); err == nil {
 		return wd
--- a/config.example.json
+++ b/config.example.json
@@ -6,7 +6,16 @@
  "mode": "chat",
  "model": "kmodel",
  "session_mode": "auto",
-  "timeout": 120,
+  "timeout": 300,
+  "remote_fallback_enabled": true,
+  "remote_fallback_models": [
+    "kmodel",
+    "mmodel",
+    "dashscope_qwen3_coder",
+    "dashscope_qmodel",
+    "dashscope_qwen_max_latest",
+    "dashscope_qwen_plus_20250428_thinking"
+  ],
  "cwd": "C:/Workspace/Personal/lingma-ipc-proxy",
  "shell_type": "powershell",
  "current_file_path": "",
--- a/desktop/app.go
+++ b/desktop/app.go
@@ -32,10 +32,30 @@ type RequestRecord struct {
 	StatusCode   int    `json:"statusCode"`
 	Duration     string `json:"duration"`
 	Size         string `json:"size,omitempty"`
+	InputTokens  int    `json:"inputTokens,omitempty"`
+	OutputTokens int    `json:"outputTokens,omitempty"`
+	TotalTokens  int    `json:"totalTokens,omitempty"`
 	ReqBody      string `json:"reqBody,omitempty"`
 	RespBody     string `json:"respBody,omitempty"`
 }

+type AppLog struct {
+	Time    string `json:"time"`
+	Level   string `json:"level"`
+	Message string `json:"message"`
+}
+
+type TokenStats struct {
+	TotalRequests   int            `json:"totalRequests"`
+	SuccessRequests int            `json:"successRequests"`
+	InputTokens     int            `json:"inputTokens"`
+	OutputTokens    int            `json:"outputTokens"`
+	TotalTokens     int            `json:"totalTokens"`
+	ByModel         map[string]int `json:"byModel,omitempty"`
+	LastModel       string         `json:"lastModel,omitempty"`
+	LastUpdated     string         `json:"lastUpdated,omitempty"`
+}
+
 type App struct {
 	ctx context.Context

@@ -49,6 +69,8 @@ type App struct {
 	quitHint  time.Time
 	models    []ModelInfo
 	requests  []RequestRecord
+	logs      []AppLog
+	stats     TokenStats
 }

 // ModelInfo represents a model returned by /v1/models
@@ -96,6 +118,9 @@ func NewApp() *App {
 func (a *App) startup(ctx context.Context) {
 	a.ctx = ctx
 	a.cfg = defaultConfig()
+	if err := a.loadAppState(); err != nil {
+		runtime.LogWarningf(a.ctx, "failed to load app state: %v", err)
+	}

 	// Auto-save default config on first run so users can find/edit it later
 	if err := a.saveConfig(a.cfg); err != nil {
@@ -208,10 +233,19 @@ func (a *App) forceQuit() {
 }

 func (a *App) emitLog(level string, message string) {
-	runtime.EventsEmit(a.ctx, "log", map[string]string{
-		"level":   level,
-		"message": message,
-	})
+	entry := AppLog{
+		Time:    time.Now().Format("15:04:05"),
+		Level:   level,
+		Message: message,
+	}
+	a.mu.Lock()
+	a.logs = append(a.logs, entry)
+	if len(a.logs) > 2000 {
+		a.logs = a.logs[len(a.logs)-2000:]
+	}
+	a.saveAppStateLocked()
+	a.mu.Unlock()
+	runtime.EventsEmit(a.ctx, "log", entry)
 }

 // GetStatus returns the current proxy status
@@ -347,6 +381,8 @@ func (a *App) saveConfig(cfg service.Config) error {
 		"shell_type":              cfg.ShellType,
 		"session_mode":            string(cfg.SessionMode),
 		"timeout":                 timeoutSec,
+		"remote_fallback_enabled": cfg.RemoteFallbackEnabled,
+		"remote_fallback_models":  cfg.RemoteFallbackModels,
 	}

 	data, err := json.MarshalIndent(fileCfg, "", "  ")
@@ -361,14 +397,16 @@ func (a *App) saveConfig(cfg service.Config) error {
 // StartProxy starts the lingma-ipc-proxy HTTP server
 func (a *App) StartProxy() error {
 	a.mu.Lock()
-	defer a.mu.Unlock()
-
 	if a.running {
+		a.mu.Unlock()
 		return fmt.Errorf("proxy already running")
 	}

 	addr := fmt.Sprintf("%s:%d", a.cfg.Host, a.cfg.Port)
-	svc := service.New(a.cfg)
+	cfg := a.cfg
+	a.mu.Unlock()
+
+	svc := service.New(cfg)

 	warmupCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
 	if err := svc.Warmup(warmupCtx); err != nil {
@@ -382,23 +420,32 @@ func (a *App) StartProxy() error {

 	server := httpapi.NewServer(addr, svc)
 	server.OnRequest = func(method, path string, statusCode int, duration time.Duration, reqBody, respBody string) {
-		a.mu.Lock()
-		a.requests = append(a.requests, RequestRecord{
+		inputTokens, outputTokens := extractTokenUsage(respBody)
+		model := extractRequestModel(reqBody)
+		record := RequestRecord{
 			Time:         time.Now().Format("15:04:05"),
 			Method:       method,
 			Path:         path,
-			Model:      extractRequestModel(reqBody),
+			Model:        model,
 			StatusCode:   statusCode,
 			Duration:     duration.Round(time.Millisecond).String(),
 			Size:         formatPayloadSize(len(reqBody) + len(respBody)),
+			InputTokens:  inputTokens,
+			OutputTokens: outputTokens,
+			TotalTokens:  inputTokens + outputTokens,
 			ReqBody:      reqBody,
 			RespBody:     respBody,
-		})
-		if len(a.requests) > 100 {
-			a.requests = a.requests[len(a.requests)-100:]
 		}
+		a.mu.Lock()
+		a.requests = append(a.requests, record)
+		if len(a.requests) > 2000 {
+			a.requests = a.requests[len(a.requests)-2000:]
+		}
+		a.accumulateTokenStatsLocked(record)
+		a.saveAppStateLocked()
 		a.mu.Unlock()
 		runtime.EventsEmit(a.ctx, "requests:updated", a.GetRequests())
+		runtime.EventsEmit(a.ctx, "usage:updated", a.GetTokenStats())
 	}

 	// Check if the port is available before claiming we're running
@@ -420,10 +467,16 @@ func (a *App) StartProxy() error {
 		}
 	}()

+	a.mu.Lock()
+	if a.running {
+		a.mu.Unlock()
+		return fmt.Errorf("proxy already running")
+	}
 	a.server = server
 	a.addr = addr
 	a.running = true
 	a.startedAt = time.Now()
+	a.mu.Unlock()

 	msg := fmt.Sprintf("Proxy started on http://%s", addr)
 	runtime.LogInfof(a.ctx, msg)
@@ -435,8 +488,24 @@ func (a *App) StartProxy() error {
 	return nil
 }

-// ClearLogs is a no-op backend helper (logs are kept in frontend memory)
-func (a *App) ClearLogs() {}
+func (a *App) GetLogs() []AppLog {
+	a.mu.RLock()
+	defer a.mu.RUnlock()
+	out := make([]AppLog, len(a.logs))
+	copy(out, a.logs)
+	for i, j := 0, len(out)-1; i < j; i, j = i+1, j-1 {
+		out[i], out[j] = out[j], out[i]
+	}
+	return out
+}
+
+func (a *App) ClearLogs() {
+	a.mu.Lock()
+	a.logs = nil
+	a.saveAppStateLocked()
+	a.mu.Unlock()
+	runtime.EventsEmit(a.ctx, "logs:updated", a.GetLogs())
+}

 // StopProxy stops the proxy server
 func (a *App) StopProxy() error {
@@ -493,10 +562,21 @@ func (a *App) GetRequests() []RequestRecord {
 func (a *App) ClearRequests() {
 	a.mu.Lock()
 	a.requests = nil
+	a.saveAppStateLocked()
 	a.mu.Unlock()
 	a.emitLog("info", "Request history cleared")
 }

+func (a *App) GetTokenStats() TokenStats {
+	a.mu.RLock()
+	defer a.mu.RUnlock()
+	stats := a.stats
+	if stats.ByModel != nil {
+		stats.ByModel = cloneIntMap(stats.ByModel)
+	}
+	return stats
+}
+
 // RefreshModels probes the running proxy for the latest model list.
 func (a *App) RefreshModels() ([]ModelInfo, error) {
 	a.mu.RLock()
@@ -614,6 +694,207 @@ func formatPayloadSize(bytes int) string {
 	return fmt.Sprintf("%d B", bytes)
 }

+type appStateFile struct {
+	Requests []RequestRecord `json:"requests"`
+	Logs     []AppLog        `json:"logs"`
+	Stats    TokenStats      `json:"stats"`
+}
+
+func (a *App) loadAppState() error {
+	path, err := appStatePath()
+	if err != nil {
+		return err
+	}
+	data, err := os.ReadFile(path)
+	if err != nil {
+		if os.IsNotExist(err) {
+			return nil
+		}
+		return err
+	}
+	var state appStateFile
+	if err := json.Unmarshal(data, &state); err != nil {
+		return err
+	}
+	a.mu.Lock()
+	defer a.mu.Unlock()
+	a.requests = state.Requests
+	a.logs = state.Logs
+	a.stats = state.Stats
+	if a.stats.ByModel == nil {
+		a.stats.ByModel = map[string]int{}
+	}
+	a.reconcileTokenStatsLocked()
+	return nil
+}
+
+func (a *App) saveAppStateLocked() {
+	path, err := appStatePath()
+	if err != nil {
+		runtime.LogWarningf(a.ctx, "resolve app state path failed: %v", err)
+		return
+	}
+	if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
+		runtime.LogWarningf(a.ctx, "create app state dir failed: %v", err)
+		return
+	}
+	state := appStateFile{
+		Requests: a.requests,
+		Logs:     a.logs,
+		Stats:    a.stats,
+	}
+	data, err := json.MarshalIndent(state, "", "  ")
+	if err != nil {
+		runtime.LogWarningf(a.ctx, "marshal app state failed: %v", err)
+		return
+	}
+	if err := os.WriteFile(path, data, 0644); err != nil {
+		runtime.LogWarningf(a.ctx, "write app state failed: %v", err)
+	}
+}
+
+func appStatePath() (string, error) {
+	home, err := os.UserHomeDir()
+	if err != nil {
+		return "", err
+	}
+	return filepath.Join(home, ".config", "lingma-ipc-proxy", "app-state.json"), nil
+}
+
+func (a *App) accumulateTokenStatsLocked(record RequestRecord) {
+	a.stats.TotalRequests++
+	if record.StatusCode >= 200 && record.StatusCode < 300 {
+		a.stats.SuccessRequests++
+	}
+	a.stats.InputTokens += record.InputTokens
+	a.stats.OutputTokens += record.OutputTokens
+	a.stats.TotalTokens += record.TotalTokens
+	if a.stats.ByModel == nil {
+		a.stats.ByModel = map[string]int{}
+	}
+	model := strings.TrimSpace(record.Model)
+	if model == "" {
+		model = "-"
+	}
+	if record.TotalTokens > 0 {
+		a.stats.ByModel[model] += record.TotalTokens
+		if isUsageBearingRequest(record.Path) && model != "-" {
+			a.stats.LastModel = model
+		}
+	}
+	a.stats.LastUpdated = time.Now().Format(time.RFC3339)
+}
+
+func (a *App) reconcileTokenStatsLocked() {
+	if a.stats.ByModel == nil {
+		a.stats.ByModel = map[string]int{}
+	}
+	a.stats.LastModel = ""
+	for i := len(a.requests) - 1; i >= 0; i-- {
+		record := a.requests[i]
+		model := strings.TrimSpace(record.Model)
+		if model == "" || record.TotalTokens <= 0 || !isUsageBearingRequest(record.Path) {
+			continue
+		}
+		a.stats.LastModel = model
+		break
+	}
+}
+
+func isUsageBearingRequest(path string) bool {
+	switch strings.TrimSpace(path) {
+	case "/v1/messages", "/v1/chat/completions", "/v1/completions":
+		return true
+	default:
+		return false
+	}
+}
+
+func cloneIntMap(src map[string]int) map[string]int {
+	out := make(map[string]int, len(src))
+	for k, v := range src {
+		out[k] = v
+	}
+	return out
+}
+
+func extractTokenUsage(respBody string) (int, int) {
+	if strings.TrimSpace(respBody) == "" {
+		return 0, 0
+	}
+	input, output := extractUsageFromJSON(respBody)
+	if input != 0 || output != 0 {
+		return input, output
+	}
+	for _, line := range strings.Split(respBody, "\n") {
+		line = strings.TrimSpace(line)
+		if !strings.HasPrefix(line, "data:") {
+			continue
+		}
+		payload := strings.TrimSpace(strings.TrimPrefix(line, "data:"))
+		if payload == "" || payload == "[DONE]" {
+			continue
+		}
+		in, out := extractUsageFromJSON(payload)
+		if in > 0 {
+			input = in
+		}
+		if out > 0 {
+			output = out
+		}
+	}
+	return input, output
+}
+
+func extractUsageFromJSON(raw string) (int, int) {
+	var payload any
+	if err := json.Unmarshal([]byte(raw), &payload); err != nil {
+		return 0, 0
+	}
+	usage, ok := findUsageMap(payload)
+	if !ok {
+		return 0, 0
+	}
+	input := intFromAny(usage["input_tokens"]) + intFromAny(usage["prompt_tokens"])
+	output := intFromAny(usage["output_tokens"]) + intFromAny(usage["completion_tokens"])
+	return input, output
+}
+
+func findUsageMap(value any) (map[string]any, bool) {
+	switch typed := value.(type) {
+	case map[string]any:
+		if usage, ok := typed["usage"].(map[string]any); ok {
+			return usage, true
+		}
+		for _, child := range typed {
+			if usage, ok := findUsageMap(child); ok {
+				return usage, true
+			}
+		}
+	case []any:
+		for _, child := range typed {
+			if usage, ok := findUsageMap(child); ok {
+				return usage, true
+			}
+		}
+	}
+	return nil, false
+}
+
+func intFromAny(value any) int {
+	switch typed := value.(type) {
+	case float64:
+		return int(typed)
+	case int:
+		return typed
+	case json.Number:
+		n, _ := typed.Int64()
+		return int(n)
+	default:
+		return 0
+	}
+}
+
 func defaultConfig() service.Config {
 	cfg := service.Config{
 		Host:                  "127.0.0.1",
@@ -625,7 +906,9 @@ func defaultConfig() service.Config {
 		Model:                 "kmodel",
 		ShellType:             defaultShellType(),
 		SessionMode:           service.SessionModeAuto,
-		Timeout:     120 * time.Second,
+		Timeout:               300 * time.Second,
+		RemoteFallbackEnabled: true,
+		RemoteFallbackModels:  service.DefaultRemoteFallbackModels(),
 	}

 	// Try to load config file from multiple locations
@@ -650,6 +933,8 @@ func defaultConfig() service.Config {
 					ShellType             string   `json:"shell_type"`
 					SessionMode           string   `json:"session_mode"`
 					TimeoutSeconds        int      `json:"timeout"`
+					RemoteFallbackEnabled *bool    `json:"remote_fallback_enabled"`
+					RemoteFallbackModels  []string `json:"remote_fallback_models"`
 				}
 				if err := json.Unmarshal(data, &fileCfg); err == nil {
 					if fileCfg.Host != "" {
@@ -702,6 +987,12 @@ func defaultConfig() service.Config {
 					if fileCfg.TimeoutSeconds > 0 {
 						cfg.Timeout = time.Duration(fileCfg.TimeoutSeconds) * time.Second
 					}
+					if fileCfg.RemoteFallbackEnabled != nil {
+						cfg.RemoteFallbackEnabled = *fileCfg.RemoteFallbackEnabled
+					}
+					if len(fileCfg.RemoteFallbackModels) > 0 {
+						cfg.RemoteFallbackModels = cleanConfigStrings(fileCfg.RemoteFallbackModels)
+					}
 				}
 				break // loaded successfully
 			}
@@ -732,6 +1023,20 @@ func maskIdentifier(value string) string {
 	return string(runes[:4]) + "..." + string(runes[len(runes)-4:])
 }

+func cleanConfigStrings(values []string) []string {
+	out := make([]string, 0, len(values))
+	seen := map[string]bool{}
+	for _, value := range values {
+		item := strings.TrimSpace(value)
+		if item == "" || seen[item] {
+			continue
+		}
+		seen[item] = true
+		out = append(out, item)
+	}
+	return out
+}
+
 func configSearchPaths() []string {
 	var paths []string
 	// 1. Executable directory (for dev / portable mode)
--- a/desktop/frontend/src/App.vue
+++ b/desktop/frontend/src/App.vue
@@ -6,7 +6,7 @@ import Models from './views/Models.vue'
 import Requests from './views/Requests.vue'
 import Settings from './views/Settings.vue'
 import { EventsOff, EventsOn } from '../wailsjs/runtime'
-import { GetStatus, HideWindow, MinimizeWindow } from '../wailsjs/go/main/App.js'
+import { ClearLogs, GetLogs, GetStatus, HideWindow, MinimizeWindow } from '../wailsjs/go/main/App.js'
 import lingmaIcon from './assets/images/lingma-icon.png'

 const currentTab = ref('dashboard')
@@ -42,8 +42,13 @@ function showToast(message) {
  }, 2200)
 }

-function clearLocalLogs() {
+async function clearLocalLogs() {
+  try {
+    await ClearLogs()
    logs.value = []
+  } catch (e) {
+    logs.value = []
+  }
 }

 function setStatus(nextStatus) {
@@ -158,14 +163,25 @@ onMounted(() => {
  systemThemeQuery?.addEventListener?.('change', applyTheme)
  applyTheme()
  refreshStatus()
+  GetLogs().then((items) => {
+    logs.value = Array.isArray(items) ? items : []
+  }).catch(() => {})
  safeEventsOn('models:updated', (data) => {
    status.value.models = Array.isArray(data) ? data.length : status.value.models
    addLog('info', `模型列表已更新：${status.value.models} 个模型`)
  })
  safeEventsOn('log', (data) => {
+    if (data.time && data.message !== undefined) {
+      logs.value.unshift(data)
+      if (logs.value.length > 500) logs.value = logs.value.slice(0, 500)
+    } else {
      addLog(data.level || 'info', data.message || '')
+    }
    refreshStatus()
  })
+  safeEventsOn('logs:updated', (data) => {
+    logs.value = Array.isArray(data) ? data : []
+  })
  safeEventsOn('quit:confirm', (message) => {
    showToast(message || '再按一次退出快捷键将停止代理并退出应用')
  })
@@ -183,6 +199,7 @@ onUnmounted(() => {
  systemThemeQuery?.removeEventListener?.('change', applyTheme)
  safeEventsOff('models:updated')
  safeEventsOff('log')
+  safeEventsOff('logs:updated')
  safeEventsOff('quit:confirm')
  safeEventsOff('status:updated')
  safeEventsOff('requests:updated')
@@ -222,7 +239,7 @@ onUnmounted(() => {
        <span class="status-dot" :class="{ running: status.running }"></span>
        <div>
          <strong>{{ status.running ? 'Proxy Running' : 'Proxy Stopped' }}</strong>
-          <small>v1.4.2</small>
+          <small>v1.4.3</small>
        </div>
      </div>
    </aside>
--- a/desktop/frontend/src/components/HelloWorld.vue
+++ b/desktop/frontend/src/components/HelloWorld.vue
@@ -1,71 +0,0 @@
-<script lang="ts" setup>
-import {reactive} from 'vue'
-import {Greet} from '../../wailsjs/go/main/App'
-
-const data = reactive({
-  name: "",
-  resultText: "Please enter your name below 👇",
-})
-
-function greet() {
-  Greet(data.name).then(result => {
-    data.resultText = result
-  })
-}
-
-</script>
-
-<template>
-  <main>
-    <div id="result" class="result">{{ data.resultText }}</div>
-    <div id="input" class="input-box">
-      <input id="name" v-model="data.name" autocomplete="off" class="input" type="text"/>
-      <button class="btn" @click="greet">Greet</button>
-    </div>
-  </main>
-</template>
-
-<style scoped>
-.result {
-  height: 20px;
-  line-height: 20px;
-  margin: 1.5rem auto;
-}
-
-.input-box .btn {
-  width: 60px;
-  height: 30px;
-  line-height: 30px;
-  border-radius: 3px;
-  border: none;
-  margin: 0 0 0 20px;
-  padding: 0 8px;
-  cursor: pointer;
-}
-
-.input-box .btn:hover {
-  background-image: linear-gradient(to top, #cfd9df 0%, #e2ebf0 100%);
-  color: #333333;
-}
-
-.input-box .input {
-  border: none;
-  border-radius: 3px;
-  outline: none;
-  height: 30px;
-  line-height: 30px;
-  padding: 0 10px;
-  background-color: rgba(240, 240, 240, 1);
-  -webkit-font-smoothing: antialiased;
-}
-
-.input-box .input:hover {
-  border: none;
-  background-color: rgba(255, 255, 255, 1);
-}
-
-.input-box .input:focus {
-  border: none;
-  background-color: rgba(255, 255, 255, 1);
-}
-</style>
--- a/desktop/frontend/src/style.css
+++ b/desktop/frontend/src/style.css
@@ -1,5 +1,12 @@
 :root {
-  font-family: Inter, ui-sans-serif, -apple-system, BlinkMacSystemFont, "SF Pro Text", "Segoe UI", sans-serif;
+  font-family:
+    Inter,
+    ui-sans-serif,
+    -apple-system,
+    BlinkMacSystemFont,
+    'SF Pro Text',
+    'Segoe UI',
+    sans-serif;
  color: #172033;
  background: #eef2f6;
  font-synthesis: none;
@@ -26,7 +33,7 @@
  --radius: 8px;
 }

-:root[data-theme="dark"] {
+:root[data-theme='dark'] {
  color: #edf3ff;
  background: #111827;
  --bg: #111827;
@@ -68,7 +75,7 @@ body {
  background: var(--bg);
 }

-:root[data-theme="dark"] body {
+:root[data-theme='dark'] body {
  background: var(--bg);
 }

@@ -106,7 +113,7 @@ button {
  box-shadow: none;
 }

-:root[data-theme="dark"] .app-shell {
+:root[data-theme='dark'] .app-shell {
  border-color: rgba(148, 163, 184, 0.22);
  background: rgba(16, 24, 36, 0.78);
 }
@@ -123,7 +130,7 @@ button {
  box-shadow: inset -1px 0 0 rgba(125, 139, 158, 0.16);
 }

-:root[data-theme="dark"] .sidebar {
+:root[data-theme='dark'] .sidebar {
  border-right-color: rgba(148, 163, 184, 0.14);
  background: linear-gradient(180deg, rgba(28, 39, 56, 0.7), rgba(18, 27, 40, 0.66));
  box-shadow: inset -1px 0 0 rgba(148, 163, 184, 0.12);
@@ -145,13 +152,13 @@ button {
  background: rgba(255, 255, 255, 0.58);
 }

-:root[data-theme="dark"] .brand:hover,
-:root[data-theme="dark"] .nav-item:hover,
-:root[data-theme="dark"] .sidebar-status {
+:root[data-theme='dark'] .brand:hover,
+:root[data-theme='dark'] .nav-item:hover,
+:root[data-theme='dark'] .sidebar-status {
  background: rgba(255, 255, 255, 0.08);
 }

-:root[data-theme="dark"] .nav-item {
+:root[data-theme='dark'] .nav-item {
  color: #aebbd0;
 }

@@ -224,7 +231,7 @@ button {
  box-shadow: inset 0 0 0 1px rgba(37, 99, 235, 0.12);
 }

-:root[data-theme="dark"] .nav-item.active {
+:root[data-theme='dark'] .nav-item.active {
  color: #d8e6ff;
  background: rgba(67, 111, 190, 0.24);
  box-shadow: inset 0 0 0 1px rgba(105, 161, 255, 0.18);
@@ -285,13 +292,13 @@ button {
  min-height: 46px;
  padding: 0 16px;
  border-bottom: 1px solid rgba(112, 128, 148, 0.18);
-  background: rgba(255, 255, 255, 0.58);
-  backdrop-filter: blur(20px) saturate(1.08);
+  background: #f6f9fd;
+  backdrop-filter: none;
 }

-:root[data-theme="dark"] .topbar {
+:root[data-theme='dark'] .topbar {
  border-bottom-color: rgba(148, 163, 184, 0.14);
-  background: rgba(20, 30, 45, 0.66);
+  background: #162131;
 }

 .topbar-spacer {
@@ -372,10 +379,10 @@ button {
  backdrop-filter: blur(18px) saturate(1.12);
 }

-:root[data-theme="dark"] .glass-panel,
-:root[data-theme="dark"] .metric,
-:root[data-theme="dark"] .table-panel,
-:root[data-theme="dark"] .config-panel {
+:root[data-theme='dark'] .glass-panel,
+:root[data-theme='dark'] .metric,
+:root[data-theme='dark'] .table-panel,
+:root[data-theme='dark'] .config-panel {
  border-color: rgba(148, 163, 184, 0.14);
  background: var(--surface);
 }
@@ -583,6 +590,10 @@ button {
  gap: 18px;
 }

+.settings-grid {
+  align-items: start;
+}
+
 .grid-3 {
  display: grid;
  grid-template-columns: repeat(3, minmax(0, 1fr));
@@ -591,10 +602,11 @@ button {

 .dashboard-grid {
  display: grid;
+  align-items: stretch;
  grid-template-columns: minmax(0, 1fr) minmax(0, 0.95fr) minmax(300px, 0.95fr);
  grid-template-areas:
-    "health models config"
-    "requests requests config";
+    'health models config'
+    'requests requests usage';
  gap: 12px;
 }

@@ -604,16 +616,109 @@ button {

 .area-models {
  grid-area: models;
+  min-height: 0;
 }

 .area-config {
  grid-area: config;
 }

+.compact-header {
+  margin-bottom: 10px;
+}
+
+.compact-header p {
+  margin-top: 4px;
+}
+
+.area-usage {
+  grid-area: usage;
+}
+
 .area-requests {
  grid-area: requests;
 }

+.usage-grid {
+  display: grid;
+  grid-template-columns: repeat(2, minmax(0, 1fr));
+  gap: 10px;
+}
+
+.usage-grid div {
+  min-width: 0;
+  padding: 12px;
+  border: 1px solid var(--line);
+  border-radius: 8px;
+  background: var(--surface-soft);
+}
+
+.usage-grid label {
+  display: block;
+  margin-bottom: 6px;
+  color: var(--muted);
+  font-size: 11px;
+  font-weight: 680;
+}
+
+.usage-grid strong {
+  display: block;
+  overflow: hidden;
+  color: var(--text);
+  font-size: 20px;
+  line-height: 1.15;
+  text-overflow: ellipsis;
+}
+
+.usage-foot {
+  display: flex;
+  flex-wrap: wrap;
+  gap: 8px 14px;
+  margin-top: 12px;
+  color: var(--muted);
+  font-size: 12px;
+}
+
+.config-summary {
+  display: grid;
+  grid-template-columns: repeat(2, minmax(0, 1fr));
+  gap: 10px;
+}
+
+.config-summary-item {
+  min-width: 0;
+  padding: 12px;
+  border: 1px solid var(--line);
+  border-radius: 8px;
+  background: var(--surface-soft);
+}
+
+.config-summary-item label {
+  display: block;
+  margin-bottom: 6px;
+  color: var(--muted);
+  font-size: 11px;
+  font-weight: 680;
+}
+
+.config-summary-item strong {
+  display: block;
+  overflow: hidden;
+  color: var(--text);
+  font-size: 13px;
+  line-height: 1.3;
+  text-overflow: ellipsis;
+  white-space: nowrap;
+}
+
+.config-summary-item.span-2 {
+  grid-column: 1 / -1;
+}
+
+.compact-link {
+  margin-top: 10px;
+}
+
 .activity-chart {
  display: grid;
  grid-template-columns: repeat(36, minmax(3px, 1fr));
@@ -637,13 +742,13 @@ button {
  white-space: nowrap;
 }

-:root[data-theme="dark"] .activity-chart,
-:root[data-theme="dark"] .data-table th,
-:root[data-theme="dark"] .field input,
-:root[data-theme="dark"] .field textarea,
-:root[data-theme="dark"] .search-input,
-:root[data-theme="dark"] .detail-panel pre,
-:root[data-theme="dark"] .code-block {
+:root[data-theme='dark'] .activity-chart,
+:root[data-theme='dark'] .data-table th,
+:root[data-theme='dark'] .field input,
+:root[data-theme='dark'] .field textarea,
+:root[data-theme='dark'] .search-input,
+:root[data-theme='dark'] .detail-panel pre,
+:root[data-theme='dark'] .code-block {
  color: var(--text);
  border-color: var(--line);
  background: rgba(15, 23, 42, 0.74);
@@ -719,8 +824,8 @@ button {
  box-shadow: inset 0 0 0 1px rgba(37, 99, 235, 0.12);
 }

-:root[data-theme="dark"] .model-choice:hover,
-:root[data-theme="dark"] .model-choice:focus-visible {
+:root[data-theme='dark'] .model-choice:hover,
+:root[data-theme='dark'] .model-choice:focus-visible {
  color: #f3f7ff;
  border-color: rgba(105, 161, 255, 0.38);
  background: rgba(72, 118, 214, 0.34);
@@ -728,7 +833,48 @@ button {

 .models-list .model-row,
 .model-list-row {
-  grid-template-columns: 22px minmax(220px, 1fr) auto;
+  grid-template-columns: 22px minmax(220px, 1fr) minmax(260px, auto);
+}
+
+.model-specs {
+  display: flex;
+  flex-wrap: wrap;
+  justify-content: flex-end;
+  gap: 6px;
+}
+
+.spec-chip {
+  display: inline-flex;
+  min-height: 22px;
+  align-items: center;
+  padding: 0 8px;
+  border: 1px solid var(--line);
+  border-radius: 7px;
+  color: var(--muted);
+  background: var(--surface-soft);
+  font-size: 11px;
+  font-weight: 680;
+  white-space: nowrap;
+}
+
+.spec-chip.strong {
+  color: #0d6a41;
+  border-color: rgba(24, 160, 88, 0.18);
+  background: var(--green-soft);
+}
+
+.spec-chip.muted-chip {
+  color: #8a5a08;
+  border-color: rgba(217, 119, 6, 0.16);
+  background: var(--warn-soft);
+}
+
+:root[data-theme='dark'] .spec-chip.strong {
+  color: #7ee0aa;
+}
+
+:root[data-theme='dark'] .spec-chip.muted-chip {
+  color: #ffd27a;
 }

 .model-brand-icon {
@@ -744,6 +890,8 @@ button {
  display: flex;
  min-height: 0;
  flex-direction: column;
+  overflow: hidden;
+  height: 295px;
 }

 .model-card-list,
@@ -754,7 +902,14 @@ button {
 }

 .model-card-list {
-  max-height: 248px;
+  flex: 1 1 auto;
+  max-height: none;
+  scrollbar-width: none;
+}
+
+.model-card-list::-webkit-scrollbar {
+  width: 0;
+  height: 0;
 }

 .model-page-list {
@@ -870,6 +1025,66 @@ button {
  border-bottom: 1px solid var(--line);
 }

+.toolbar-header {
+  margin: 0;
+  display: flex;
+  align-items: baseline;
+  gap: 8px;
+}
+
+.toolbar-count {
+  font-size: 12px;
+  font-weight: normal;
+  white-space: nowrap;
+}
+
+.toolbar-search-wrap {
+  display: flex;
+  align-items: center;
+  gap: 12px;
+  flex: 1;
+}
+
+.toolbar-search-input {
+  max-width: 300px;
+  width: 100%;
+}
+
+.btn-sm-outline {
+  padding: 4px 10px;
+  font-size: 12px;
+  background: transparent;
+  border: 1px solid var(--line);
+  border-radius: 6px;
+  cursor: pointer;
+  color: var(--text);
+  display: inline-flex;
+  align-items: center;
+  justify-content: center;
+  transition: all 0.16s ease;
+}
+
+.btn-sm-outline:hover {
+  background: rgba(0, 0, 0, 0.05);
+}
+
+.btn-sm-outline:disabled {
+  opacity: 0.6;
+  cursor: not-allowed;
+}
+
+.btn-sm-outline i {
+  margin-left: 2px;
+}
+
+:root[data-theme='dark'] .btn-sm-outline {
+  color: #dce8fb;
+}
+
+:root[data-theme='dark'] .btn-sm-outline:hover {
+  background: rgba(255, 255, 255, 0.1);
+}
+
 .table-scroll {
  flex: 0 0 auto;
  max-height: none;
@@ -893,7 +1108,7 @@ button {

 .area-requests .table-scroll {
  min-height: 0;
-  max-height: 260px;
+  max-height: 211px;
  overflow: auto;
 }

@@ -932,10 +1147,12 @@ button {
 }

 .data-table tbody tr {
-  height: var(--request-row-height, 64px);
+  height: var(--request-row-height, 42px);
  cursor: pointer;
  background: rgba(255, 255, 255, 0.34);
-  transition: background-color 140ms ease, box-shadow 140ms ease;
+  transition:
+    background-color 140ms ease,
+    box-shadow 140ms ease;
 }

 .data-table tbody tr:hover {
@@ -947,23 +1164,23 @@ button {
  box-shadow: inset 3px 0 0 var(--blue);
 }

-:root[data-theme="dark"] .data-table {
+:root[data-theme='dark'] .data-table {
  background: rgba(15, 23, 42, 0.8);
 }

-:root[data-theme="dark"] .data-table th {
+:root[data-theme='dark'] .data-table th {
  background: rgba(15, 23, 42, 0.96);
 }

-:root[data-theme="dark"] .data-table tbody tr {
+:root[data-theme='dark'] .data-table tbody tr {
  background: rgba(20, 31, 48, 0.7);
 }

-:root[data-theme="dark"] .data-table tbody tr:hover {
+:root[data-theme='dark'] .data-table tbody tr:hover {
  background: rgba(45, 65, 96, 0.9);
 }

-:root[data-theme="dark"] .data-table tbody tr.selected {
+:root[data-theme='dark'] .data-table tbody tr.selected {
  background: rgba(38, 65, 112, 0.96);
  box-shadow: inset 3px 0 0 #67a1ff;
 }
@@ -1001,8 +1218,7 @@ button {
  background: var(--red-soft);
 }

-.link-row,
-.table-footer button {
+.link-row {
  display: flex;
  align-items: center;
  justify-content: space-between;
@@ -1014,24 +1230,10 @@ button {
  cursor: pointer;
 }

-:root[data-theme="dark"] .link-row,
-:root[data-theme="dark"] .table-footer button {
+:root[data-theme='dark'] .link-row {
  color: #dce8fb;
 }

-.table-footer {
-  display: flex;
-  align-items: center;
-  justify-content: space-between;
-  padding: 10px 14px;
-  color: var(--muted);
-  font-size: 12px;
-}
-
-.table-footer button {
-  width: auto;
-  gap: 8px;
-}

 .method-chip {
  color: #334155;
@@ -1047,7 +1249,10 @@ button {
  min-height: 32px;
  border-radius: 8px;
  cursor: pointer;
-  transition: transform 0.16s ease, background 0.16s ease, box-shadow 0.16s ease;
+  transition:
+    transform 0.16s ease,
+    background 0.16s ease,
+    box-shadow 0.16s ease;
 }

 .primary-button {
@@ -1132,12 +1337,92 @@ button:disabled {
  gap: 6px;
 }

+.settings-fieldset {
+  min-width: 0;
+  padding: 0;
+  margin: 0;
+  border: 0;
+}
+
+.settings-fieldset:disabled {
+  opacity: 0.56;
+}
+
+.compact-hint {
+  margin-bottom: 14px;
+}
+
+.compact-form-grid {
+  row-gap: 14px;
+}
+
 .field label {
  color: var(--muted);
  font-size: 12px;
  font-weight: 680;
 }

+.switch-field {
+  display: flex;
+  align-items: center;
+  justify-content: space-between;
+  gap: 14px;
+}
+
+.switch-field p {
+  margin: 4px 0 0;
+  color: var(--muted);
+  font-size: 12px;
+  line-height: 1.45;
+}
+
+.switch {
+  position: relative;
+  flex: 0 0 auto;
+  display: inline-flex;
+  width: 44px;
+  height: 26px;
+}
+
+.switch input {
+  position: absolute;
+  inset: 0;
+  opacity: 0;
+}
+
+.switch span {
+  position: absolute;
+  inset: 0;
+  border: 1px solid var(--line-strong);
+  border-radius: 999px;
+  background: rgba(148, 163, 184, 0.28);
+  transition:
+    background 0.16s ease,
+    border-color 0.16s ease;
+}
+
+.switch span::after {
+  content: '';
+  position: absolute;
+  top: 3px;
+  left: 3px;
+  width: 18px;
+  height: 18px;
+  border-radius: 999px;
+  background: white;
+  box-shadow: 0 2px 8px rgba(15, 23, 42, 0.2);
+  transition: transform 0.16s ease;
+}
+
+.switch input:checked + span {
+  border-color: rgba(37, 99, 235, 0.72);
+  background: #2563eb;
+}
+
+.switch input:checked + span::after {
+  transform: translateX(18px);
+}
+
 .field input,
 .field textarea,
 .search-input {
@@ -1151,6 +1436,14 @@ button:disabled {
  outline: none;
 }

+.field .switch input {
+  width: auto;
+  min-height: 0;
+  padding: 0;
+  border: 0;
+  background: transparent;
+}
+
 .field textarea {
  min-height: 78px;
  padding-top: 9px;
@@ -1230,14 +1523,14 @@ button:disabled {
  background: var(--blue-soft);
 }

-:root[data-theme="dark"] .custom-select > button {
+:root[data-theme='dark'] .custom-select > button {
  color: var(--text);
  border-color: var(--line);
  background: rgba(15, 23, 42, 0.74);
 }

-:root[data-theme="dark"] .select-menu button:hover,
-:root[data-theme="dark"] .select-menu button.selected {
+:root[data-theme='dark'] .select-menu button:hover,
+:root[data-theme='dark'] .select-menu button.selected {
  color: #dce9ff;
  background: rgba(72, 118, 214, 0.32);
 }
@@ -1262,7 +1555,7 @@ button:disabled {

 .hint-box code {
  color: var(--text);
-  font-family: "SF Mono", ui-monospace, Menlo, Consolas, monospace;
+  font-family: 'SF Mono', ui-monospace, Menlo, Consolas, monospace;
  font-size: 12px;
 }

@@ -1276,7 +1569,7 @@ button:disabled {
  background: rgba(255, 255, 255, 0.54);
 }

-:root[data-theme="dark"] .detect-card {
+:root[data-theme='dark'] .detect-card {
  background: rgba(15, 23, 42, 0.52);
 }

@@ -1326,7 +1619,7 @@ button:disabled {
  margin: 0;
  color: var(--text);
  overflow-wrap: anywhere;
-  font-family: "SF Mono", ui-monospace, Menlo, Consolas, monospace;
+  font-family: 'SF Mono', ui-monospace, Menlo, Consolas, monospace;
  font-size: 12px;
  line-height: 1.45;
 }
@@ -1377,7 +1670,7 @@ button:disabled {
  user-select: text;
 }

-:root[data-theme="dark"] .detail-panel {
+:root[data-theme='dark'] .detail-panel {
  background: rgba(12, 18, 30, 0.96);
 }

@@ -1423,7 +1716,7 @@ button:disabled {
  -webkit-user-select: text;
  user-select: text;
  background: rgba(255, 255, 255, 0.82);
-  font-family: "SF Mono", ui-monospace, Menlo, Consolas, monospace;
+  font-family: 'SF Mono', ui-monospace, Menlo, Consolas, monospace;
  font-size: 12px;
  line-height: 1.55;
  overflow-wrap: anywhere;
@@ -1525,28 +1818,28 @@ button:disabled {
  border-color: rgba(44, 111, 231, 0.38);
 }

-:root[data-theme="dark"] .json-key {
+:root[data-theme='dark'] .json-key {
  color: #c4b5fd;
 }

-:root[data-theme="dark"] .json-string {
+:root[data-theme='dark'] .json-string {
  color: #86efac;
 }

-:root[data-theme="dark"] .json-number {
+:root[data-theme='dark'] .json-number {
  color: #93c5fd;
 }

-:root[data-theme="dark"] .json-boolean {
+:root[data-theme='dark'] .json-boolean {
  color: #fca5a5;
 }

-:root[data-theme="dark"] .json-null,
-:root[data-theme="dark"] .json-punctuation {
+:root[data-theme='dark'] .json-null,
+:root[data-theme='dark'] .json-punctuation {
  color: #9aa8bd;
 }

-:root[data-theme="dark"] .json-summary {
+:root[data-theme='dark'] .json-summary {
  color: #b7c3d6;
  border-color: rgba(148, 163, 184, 0.24);
  background: rgba(30, 41, 59, 0.78);
@@ -1563,19 +1856,17 @@ button:disabled {
  height: 0;
 }

-:root[data-theme="dark"] .detail-panel pre,
-:root[data-theme="dark"] .code-block,
-:root[data-theme="dark"] .json-viewer {
+:root[data-theme='dark'] .detail-panel pre,
+:root[data-theme='dark'] .code-block,
+:root[data-theme='dark'] .json-viewer {
  color: var(--text);
  border-color: var(--line);
  background: rgba(17, 24, 39, 0.94);
 }

-
-
 .log-row {
  grid-template-columns: 82px 58px minmax(0, 1fr);
-  font-family: "SF Mono", ui-monospace, Menlo, Consolas, monospace;
+  font-family: 'SF Mono', ui-monospace, Menlo, Consolas, monospace;
  font-size: 12px;
  -webkit-user-select: text;
  user-select: text;
@@ -1636,9 +1927,10 @@ button:disabled {

  .dashboard-grid {
    grid-template-areas:
-      "health models"
-      "config config"
-      "requests requests";
+      'health models'
+      'config config'
+      'usage usage'
+      'requests requests';
  }

  .status-strip {
@@ -1654,34 +1946,38 @@ button:disabled {
  .strip-actions {
    grid-column: span 2;
  }
+
+  .config-summary {
+    grid-template-columns: 1fr 1fr;
+  }
 }

-:root[data-theme="dark"] .strip-actions,
-:root[data-theme="dark"] .secondary-button,
-:root[data-theme="dark"] .ghost-button,
-:root[data-theme="dark"] .icon-button,
-:root[data-theme="dark"] .segmented,
-:root[data-theme="dark"] .segmented button {
+:root[data-theme='dark'] .strip-actions,
+:root[data-theme='dark'] .secondary-button,
+:root[data-theme='dark'] .ghost-button,
+:root[data-theme='dark'] .icon-button,
+:root[data-theme='dark'] .segmented,
+:root[data-theme='dark'] .segmented button {
  color: var(--text);
  border-color: var(--line);
  background: rgba(30, 41, 59, 0.66);
 }

-:root[data-theme="dark"] .strip-actions {
+:root[data-theme='dark'] .strip-actions {
  background: rgba(15, 23, 42, 0.78);
 }

-:root[data-theme="dark"] .strip-actions button {
+:root[data-theme='dark'] .strip-actions button {
  color: #e6eefc;
 }

-:root[data-theme="dark"] .strip-actions button:disabled {
+:root[data-theme='dark'] .strip-actions button:disabled {
  color: #a9b7cc;
  background: rgba(15, 23, 42, 0.52);
  opacity: 0.86;
 }

-:root[data-theme="dark"] .segmented button.active {
+:root[data-theme='dark'] .segmented button.active {
  color: #f8fbff;
  background: rgba(72, 118, 214, 0.42);
 }
@@ -1759,10 +2055,11 @@ button:disabled {

  .dashboard-grid {
    grid-template-areas:
-      "health"
-      "models"
-      "config"
-      "requests";
+      'health'
+      'models'
+      'config'
+      'usage'
+      'requests';
  }

  .status-strip {
@@ -1775,6 +2072,14 @@ button:disabled {
    width: 100%;
  }

+  .config-summary {
+    grid-template-columns: 1fr;
+  }
+
+  .config-summary-item.span-2 {
+    grid-column: auto;
+  }
+
  .span-2 {
    grid-column: auto;
  }
--- a/desktop/frontend/src/views/Dashboard.vue
+++ b/desktop/frontend/src/views/Dashboard.vue
@@ -1,23 +1,14 @@
 <script setup>
 import { computed, onMounted, onUnmounted, ref } from 'vue'
-import {
-  GetModels,
-  GetConfig,
-  GetRequests,
-  GetStatus,
-  QuitApp,
-  RefreshModels,
-  StartProxy,
-  StopProxy,
-} from '../../wailsjs/go/main/App.js'
+import { GetModels, GetConfig, GetRequests, GetStatus, GetTokenStats, QuitApp, RefreshModels, StartProxy, StopProxy } from '../../wailsjs/go/main/App.js'
 import { ClipboardSetText } from '../../wailsjs/runtime'
 import { modelIcon } from '../modelIcons'

 const props = defineProps({
  shellStatus: {
    type: Object,
-    default: () => ({ running: false, addr: '', models: 0 }),
-  },
+    default: () => ({ running: false, addr: '', models: 0 })
+  }
 })

 const emit = defineEmits(['log', 'status', 'notice', 'open-settings', 'open-requests', 'open-models'])
@@ -25,9 +16,11 @@ const emit = defineEmits(['log', 'status', 'notice', 'open-settings', 'open-requ
 const status = ref(props.shellStatus)
 const models = ref([])
 const requests = ref([])
+const tokenStats = ref({ totalRequests: 0, successRequests: 0, inputTokens: 0, outputTokens: 0, totalTokens: 0 })
 const health = ref(null)
 const config = ref({})
-const loading = ref(false)
+const proxyLoading = ref(false)
+const modelsLoading = ref(false)
 const testing = ref(false)
 const now = ref(Date.now())
 let interval = null
@@ -63,7 +56,7 @@ const healthStats = computed(() => {
    avg,
    p50: percentile(sorted, 0.5),
    p95: percentile(sorted, 0.95),
-    max: sorted[sorted.length - 1],
+    max: Math.round(sorted[sorted.length - 1])
  }
 })
 const chartBars = computed(() => {
@@ -78,17 +71,22 @@ const displayRequests = computed(() => {
 })
 const displayModels = computed(() => {
  if (models.value.length > 0) {
-    return models.value.slice(0, 5).map((model) => ({ ...model, online: true }))
+    return models.value.map((model) => ({ ...model, online: true }))
  }
  return []
 })
+const successRate = computed(() => {
+  const total = Number(tokenStats.value.totalRequests || 0)
+  if (!total) return '0%'
+  return `${Math.round((Number(tokenStats.value.successRequests || 0) / total) * 100)}%`
+})

 function parseDurationMs(duration) {
  const text = String(duration || '').trim()
  if (!text) return 0
-  if (text.endsWith('ms')) return Number.parseFloat(text)
-  if (text.endsWith('s')) return Number.parseFloat(text) * 1000
-  return Number.parseFloat(text) || 0
+  if (text.endsWith('ms')) return Math.round(Number.parseFloat(text))
+  if (text.endsWith('s')) return Math.round(Number.parseFloat(text) * 1000)
+  return Math.round(Number.parseFloat(text) || 0)
 }

 function percentile(sorted, p) {
@@ -97,12 +95,20 @@ function percentile(sorted, p) {
  return Math.round(sorted[index])
 }

+function formatNumber(value) {
+  const n = Number(value || 0)
+  if (n >= 1000000) return `${(n / 1000000).toFixed(1)}M`
+  if (n >= 10000) return `${Math.round(n / 1000)}K`
+  return n.toLocaleString('zh-CN')
+}
+
 async function refresh() {
  try {
    const nextStatus = await GetStatus()
    status.value = nextStatus
    emit('status', nextStatus)
    requests.value = await GetRequests()
+    tokenStats.value = await GetTokenStats()
    config.value = await GetConfig()
    if (nextStatus.running) {
      models.value = await GetModels()
@@ -113,7 +119,7 @@ async function refresh() {
 }

 async function refreshModels() {
-  loading.value = true
+  modelsLoading.value = true
  try {
    models.value = await RefreshModels()
    emit('log', 'info', `模型探测完成：${models.value.length} 个`)
@@ -121,7 +127,7 @@ async function refreshModels() {
  } catch (e) {
    emit('log', 'error', '模型探测失败：' + (e.message || String(e)) + '。请确认 Lingma 插件已启动并登录；自动探测失败时可到设置页手动填写 WebSocket：ws://127.0.0.1:36510/，或 Windows Named Pipe：\\\\.\\pipe\\lingma-xxxx。')
  } finally {
-    loading.value = false
+    modelsLoading.value = false
  }
 }

@@ -141,7 +147,7 @@ async function copyModelName(model) {
 }

 async function toggleProxy() {
-  loading.value = true
+  proxyLoading.value = true
  try {
    if (isRunning.value) {
      await StopProxy()
@@ -154,13 +160,13 @@ async function toggleProxy() {
  } catch (e) {
    emit('log', 'error', '代理切换失败：' + (e.message || String(e)))
  } finally {
-    loading.value = false
+    proxyLoading.value = false
  }
 }

 async function restartProxy() {
  if (!isRunning.value) return
-  loading.value = true
+  proxyLoading.value = true
  try {
    await StopProxy()
    await StartProxy()
@@ -169,7 +175,7 @@ async function restartProxy() {
  } catch (e) {
    emit('log', 'error', '代理重启失败：' + (e.message || String(e)))
  } finally {
-    loading.value = false
+    proxyLoading.value = false
  }
 }

@@ -241,9 +247,9 @@ onUnmounted(() => {
        <strong>{{ sessionLabel }}</strong>
      </div>
      <div class="strip-actions">
-        <button :class="{ active: !isRunning }" type="button" :disabled="loading || isRunning" @click="toggleProxy">启动</button>
-        <button :class="{ active: isRunning }" type="button" :disabled="loading || !isRunning" @click="toggleProxy">停止</button>
-        <button type="button" :disabled="loading || !isRunning" @click="restartProxy">重启</button>
+        <button :class="{ active: !isRunning }" type="button" :disabled="proxyLoading || isRunning" @click="toggleProxy">启动</button>
+        <button :class="{ active: isRunning }" type="button" :disabled="proxyLoading || !isRunning" @click="toggleProxy">停止</button>
+        <button type="button" :disabled="proxyLoading || !isRunning" @click="restartProxy">重启</button>
      </div>
    </section>

@@ -257,12 +263,7 @@ onUnmounted(() => {
          <span class="status-chip ok">Healthy</span>
        </div>
        <div class="activity-chart" aria-label="延迟趋势图">
-          <span
-            v-for="(height, index) in chartBars"
-            :key="index"
-            class="bar"
-            :style="{ height: `${height}%`, opacity: 0.55 + index / 45 }"
-          ></span>
+          <span v-for="(height, index) in chartBars" :key="index" class="bar" :style="{ height: `${height}%`, opacity: 0.55 + index / 45 }"></span>
          <span v-if="chartBars.length === 0" class="chart-empty">暂无请求</span>
        </div>
        <div class="health-stats">
@@ -278,22 +279,13 @@ onUnmounted(() => {
          <div>
            <h2>Models</h2>
          </div>
-          <button class="secondary-button" type="button" :disabled="loading || !isRunning" @click="refreshModels">探测模型</button>
+          <button class="btn-sm-outline" type="button" :disabled="modelsLoading || !isRunning" @click="refreshModels">
+            {{ modelsLoading ? '探测中...' : '探测模型' }}
+          </button>
        </div>
        <div class="model-card-list hidden-scrollbar">
-          <button
-            v-for="model in displayModels"
-            :key="model.id"
-            class="model-row model-choice"
-            type="button"
-            :title="`复制模型 ID：${model.id}`"
-            @click="copyModelName(model)"
-          >
-            <span
-              class="model-brand-icon"
-              :style="{ '--model-icon': `url(${modelIcon(model).src})`, '--model-icon-color': modelIcon(model).color }"
-              aria-hidden="true"
-            ></span>
+          <button v-for="model in displayModels" :key="model.id" class="model-row model-choice" type="button" :title="`复制模型 ID：${model.id}`" @click="copyModelName(model)">
+            <span class="model-brand-icon" :style="{ '--model-icon': `url(${modelIcon(model).src})`, '--model-icon-color': modelIcon(model).color }" aria-hidden="true"></span>
            <div>
              <div class="model-name">{{ model.name || model.id }}</div>
            </div>
@@ -301,75 +293,84 @@ onUnmounted(() => {
          </button>
        </div>
        <div v-if="displayModels.length === 0" class="empty-state compact">暂无模型，启动代理后点击探测模型。</div>
-        <button class="link-row" type="button" @click="emit('open-models')">查看全部模型 <i class="bi bi-chevron-right"></i></button>
      </div>

      <div class="glass-panel area-config">
-        <div class="panel-header">
+        <div class="panel-header compact-header">
          <div>
            <h2>Configuration</h2>
+            <p>首页只展示关键配置，完整项在设置页查看。</p>
          </div>
          <span class="status-chip ok">Valid</span>
        </div>
-        <div class="setting-row">
+        <div class="config-summary">
+          <div class="config-summary-item">
+            <label>监听地址</label>
+            <strong>{{ config.Host || '127.0.0.1' }}:{{ config.Port || 8095 }}</strong>
+          </div>
+          <div class="config-summary-item">
+            <label>传输方式</label>
+            <strong>{{ transportLabel }}</strong>
+          </div>
+          <div class="config-summary-item">
+            <label>会话策略</label>
+            <strong>{{ config.SessionMode || 'Reuse' }}</strong>
+          </div>
+          <div class="config-summary-item">
+            <label>超时</label>
+            <strong>{{ config.Timeout || 120 }} 秒</strong>
+          </div>
+          <div class="config-summary-item span-2">
+            <label>工作目录</label>
+            <strong :title="config.Cwd || '未配置'">{{ config.Cwd || '未配置' }}</strong>
+          </div>
+          <div v-if="config.CurrentFilePath" class="config-summary-item span-2">
+            <label>当前文件</label>
+            <strong :title="config.CurrentFilePath">{{ config.CurrentFilePath }}</strong>
+          </div>
+        </div>
+      </div>
+
+      <div class="glass-panel area-usage">
+        <div class="panel-header">
          <div>
-            <div class="cell-main">Host</div>
-            <div class="cell-sub">{{ config.Host || '127.0.0.1' }}</div>
+            <h2>Token 统计</h2>
+            <p>按代理返回的 usage 累计，流式缺失字段时只统计可获得部分。</p>
          </div>
-          <span class="status-chip ok"><i class="bi bi-check"></i></span>
+          <span class="status-chip ok">Persisted</span>
        </div>
-        <div class="setting-row">
+        <div class="usage-grid">
          <div>
-            <div class="cell-main">Port</div>
-            <div class="cell-sub">{{ config.Port || 8095 }}</div>
+            <label>总 Token</label>
+            <strong>{{ formatNumber(tokenStats.totalTokens) }}</strong>
          </div>
-          <span class="status-chip ok"><i class="bi bi-check"></i></span>
-        </div>
-        <div class="setting-row">
          <div>
-            <div class="cell-main">Transport</div>
-            <div class="cell-sub">{{ transportLabel }}</div>
+            <label>输入</label>
+            <strong>{{ formatNumber(tokenStats.inputTokens) }}</strong>
          </div>
-          <span class="status-chip ok"><i class="bi bi-check"></i></span>
-        </div>
-        <div class="setting-row">
          <div>
-            <div class="cell-main">Session</div>
-            <div class="cell-sub">{{ config.SessionMode || 'Reuse' }}</div>
+            <label>输出</label>
+            <strong>{{ formatNumber(tokenStats.outputTokens) }}</strong>
          </div>
-          <span class="status-chip ok"><i class="bi bi-check"></i></span>
-        </div>
-        <div class="setting-row">
          <div>
-            <div class="cell-main">Timeout (s)</div>
-            <div class="cell-sub">{{ config.Timeout || 120 }} 秒</div>
+            <label>成功率</label>
+            <strong>{{ successRate }}</strong>
          </div>
-          <span class="status-chip ok"><i class="bi bi-check"></i></span>
        </div>
-        <div class="setting-row">
-          <div>
-            <div class="cell-main">CWD</div>
-            <div class="cell-sub">{{ config.Cwd || '未配置' }}</div>
-          </div>
-          <span class="status-chip ok"><i class="bi bi-check"></i></span>
-        </div>
-        <div class="setting-row">
-          <div>
-            <div class="cell-main">Current File</div>
-            <div class="cell-sub">{{ config.CurrentFilePath || '未配置' }}</div>
-          </div>
-          <span class="status-chip ok"><i class="bi bi-check"></i></span>
+        <div class="usage-foot">
+          <span>累计请求 {{ formatNumber(tokenStats.totalRequests) }} 次</span>
+          <span v-if="tokenStats.lastModel">最近模型 {{ tokenStats.lastModel }}</span>
        </div>
      </div>

      <div class="table-panel area-requests">
        <div class="table-toolbar">
-        <div>
-          <div class="panel-header" style="margin: 0">
+          <div class="panel-header toolbar-header">
            <h2>Recent Requests</h2>
          </div>
-        </div>
-        <button class="secondary-button" type="button" @click="emit('open-requests')">查看全部</button>
+          <button type="button" class="btn-sm-outline" @click="emit('open-requests')">
+            查看全部请求 <i class="bi bi-chevron-right"></i>
+          </button>
        </div>
        <div v-if="displayRequests.length > 0" class="table-scroll hidden-scrollbar">
          <table class="data-table">
@@ -398,10 +399,6 @@ onUnmounted(() => {
          </table>
        </div>
        <div v-else class="empty-state compact">暂无请求记录。连接客户端后会显示真实调用。</div>
-      <div class="table-footer">
-        <span>Showing {{ displayRequests.length }} of {{ requests.length }}</span>
-        <button type="button" @click="emit('open-requests')">查看全部请求 <i class="bi bi-chevron-right"></i></button>
-      </div>
      </div>
    </section>
  </div>
--- a/desktop/frontend/src/views/Models.vue
+++ b/desktop/frontend/src/views/Models.vue
@@ -17,20 +17,81 @@ const filtered = computed(() => {
  return models.value.filter((model) => `${model.id} ${model.name}`.toLowerCase().includes(q))
 })

-function modelTag(model) {
+function modelSpec(model) {
  const text = `${model.id} ${model.name}`.toLowerCase()
-  if (text.includes('coder')) return '工具优先'
-  if (text.includes('thinking')) return '推理'
-  if (text.includes('kimi')) return '长文本'
-  if (text.includes('minimax')) return '通用'
-  return 'Lingma'
+  if (text.includes('kmodel') || text.includes('kimi')) {
+    return {
+      context: '256K',
+      capability: '文本/图像/视频/工具',
+      source: 'Kimi 官方',
+    }
+  }
+  if (text.includes('mmodel') || text.includes('minimax')) {
+    return {
+      context: '200K',
+      capability: 'Agent / Tool Use',
+      source: 'MiniMax 官方',
+    }
+  }
+  if (text.includes('coder')) {
+    return {
+      context: '1M',
+      capability: '思考 / Function Calling / 结构化输出',
+      source: '阿里云百炼 Qwen3-Coder',
+    }
+  }
+  if (text.includes('thinking')) {
+    return {
+      context: '256K',
+      capability: '思考 / Function Calling / 推理',
+      source: '阿里云百炼 Qwen3',
+    }
+  }
+  if (text.includes('qwen_max') || text.includes('qwen3-max')) {
+    return {
+      context: '256K',
+      capability: '思考 / Function Calling / 内置工具',
+      source: '阿里云百炼 Qwen3-Max',
+    }
+  }
+  if (text.includes('qmodel') || text.includes('qwen3.6')) {
+    return {
+      context: '1M',
+      capability: 'Function Calling / 内置工具 / 结构化输出',
+      source: '阿里云百炼 Qwen3.6-Plus',
+    }
+  }
+  if (text.includes('auto')) {
+    return {
+      context: '自动',
+      capability: 'Lingma 自动路由',
+      source: '账号返回',
+    }
+  }
+  return {
+    context: '未公开',
+    capability: '通用',
+    source: '账号返回',
+  }
+}
+
+async function loadCachedModels() {
+  loading.value = true
+  try {
+    status.value = await GetStatus()
+    models.value = await GetModels()
+  } catch (e) {
+    emit('log', 'error', '模型缓存读取失败：' + (e.message || String(e)))
+  } finally {
+    loading.value = false
+  }
 }

 async function refresh() {
  loading.value = true
  try {
    status.value = await GetStatus()
-    models.value = status.value.running ? await RefreshModels() : await GetModels()
+    models.value = await RefreshModels()
    emit('log', 'info', `模型列表刷新完成：${models.value.length} 个`)
  } catch (e) {
    emit('log', 'error', '模型列表刷新失败：' + (e.message || String(e)) + '。自动探测失败时请到设置页手动填写 WebSocket：ws://127.0.0.1:36510/，或 Windows Named Pipe：\\\\.\\pipe\\lingma-xxxx。')
@@ -54,7 +115,7 @@ async function copyModelName(model) {
  }
 }

-onMounted(refresh)
+onMounted(loadCachedModels)
 </script>

 <template>
@@ -111,7 +172,11 @@ onMounted(refresh)
            <div class="model-name">{{ model.name || model.id }}</div>
            <div class="model-meta">{{ model.id }}</div>
          </div>
-          <span class="status-chip" :class="modelTag(model) === '工具优先' ? 'ok' : 'warn'">{{ modelTag(model) }}</span>
+          <div class="model-specs">
+            <span class="spec-chip strong">{{ modelSpec(model).context }}</span>
+            <span class="spec-chip">{{ modelSpec(model).capability }}</span>
+            <span class="spec-chip muted-chip">{{ modelSpec(model).source }}</span>
+          </div>
        </button>
      </div>
      <div v-else class="empty-state">启动代理并刷新后会显示模型。</div>
--- a/desktop/frontend/src/views/Requests.vue
+++ b/desktop/frontend/src/views/Requests.vue
@@ -14,7 +14,7 @@ const activeStatus = ref('all')
 const filtered = computed(() => {
  const q = query.value.trim().toLowerCase()
  return requests.value.filter((request) => {
-    const matchesQuery = !q || `${request.method} ${request.path} ${request.statusCode}`.toLowerCase().includes(q)
+    const matchesQuery = !q || `${request.method} ${request.path} ${request.statusCode} ${request.model || ''}`.toLowerCase().includes(q)
    const code = Number(request.statusCode)
    const matchesStatus =
      activeStatus.value === 'all' ||
@@ -117,7 +117,10 @@ onUnmounted(() => {

    <section class="table-panel requests-panel">
      <div class="table-toolbar">
-        <input v-model="query" class="search-input" type="search" placeholder="搜索路径、方法或状态码" />
+        <div class="toolbar-search-wrap">
+          <input v-model="query" class="search-input toolbar-search-input" type="search" placeholder="搜索路径、方法或状态码" />
+          <span class="muted toolbar-count">Showing {{ filtered.length }} of {{ requests.length }}</span>
+        </div>
        <div class="segmented">
          <button :class="{ active: activeStatus === 'all' }" type="button" @click="activeStatus = 'all'">全部</button>
          <button :class="{ active: activeStatus === 'ok' }" type="button" @click="activeStatus = 'ok'">成功</button>
@@ -133,6 +136,7 @@ onUnmounted(() => {
              <th>时间</th>
              <th>方法</th>
              <th>路径</th>
+              <th>模型</th>
              <th>状态</th>
              <th>耗时</th>
            </tr>
@@ -150,6 +154,7 @@ onUnmounted(() => {
                <div class="cell-main">{{ request.path }}</div>
                <div class="cell-sub">{{ request.reqBody ? '包含请求体' : '无请求体' }}</div>
              </td>
+              <td>{{ request.model || '-' }}</td>
              <td><span class="status-chip" :class="statusClass(request.statusCode)">{{ request.statusCode }}</span></td>
              <td>{{ request.duration }}</td>
            </tr>
--- a/desktop/frontend/src/views/Settings.vue
+++ b/desktop/frontend/src/views/Settings.vue
@@ -8,6 +8,8 @@ const config = ref({})
 const detection = ref(null)
 const saving = ref(false)
 const openSelect = ref('')
+const fallbackModelsText = ref('')
+const isIPCBackend = computed(() => (config.value.Backend || 'ipc') === 'ipc')

 const selectOptions = {
  Backend: [
@@ -54,6 +56,9 @@ function chooseOption(field, value) {
 onMounted(async () => {
  try {
    config.value = await GetConfig()
+    fallbackModelsText.value = Array.isArray(config.value.RemoteFallbackModels)
+      ? config.value.RemoteFallbackModels.join('\n')
+      : ''
    await refreshDetection()
  } catch (e) {
    emit('log', 'error', '配置加载失败：' + (e.message || String(e)))
@@ -71,6 +76,10 @@ async function refreshDetection() {
 async function save() {
  saving.value = true
  try {
+    config.value.RemoteFallbackModels = fallbackModelsText.value
+      .split(/\n|,/)
+      .map((item) => item.trim())
+      .filter(Boolean)
    await UpdateConfig(config.value)
    await refreshDetection()
    emit('log', 'info', '配置已保存，代理已按需重启')
@@ -95,7 +104,7 @@ async function save() {
      </button>
    </div>

-    <section class="grid-2">
+    <section class="grid-2 settings-grid">
      <div class="glass-panel">
        <div class="panel-header">
          <div>
@@ -156,6 +165,23 @@ async function save() {
            <label>超时秒数</label>
            <input v-model.number="config.Timeout" type="number" min="1" />
          </div>
+          <div class="field span-2 switch-field">
+            <div>
+              <label>远端超时兜底</label>
+              <p>远端 API 超时、限流或 5xx 且尚未流式输出时，自动切换到下一个可用模型。</p>
+            </div>
+            <label class="switch">
+              <input v-model="config.RemoteFallbackEnabled" type="checkbox" />
+              <span></span>
+            </label>
+          </div>
+          <div class="field span-2">
+            <label>兜底模型顺序</label>
+            <textarea
+              v-model="fallbackModelsText"
+              placeholder="kmodel&#10;mmodel&#10;dashscope_qwen3_coder&#10;dashscope_qmodel"
+            ></textarea>
+          </div>
          <div class="field span-2">
            <label>WebSocket 地址</label>
            <input v-model="config.WebSocketURL" type="text" placeholder="留空自动探测 Lingma WebSocket" />
@@ -231,10 +257,16 @@ async function save() {
        <div class="panel-header">
          <div>
            <h2>会话与环境</h2>
-            <p>影响 Lingma 会话上下文和工具执行环境。</p>
+            <p>仅在 IPC 插件模式下生效，影响 Lingma 会话上下文和工具执行环境。</p>
          </div>
+          <span class="status-chip" :class="isIPCBackend ? 'ok' : 'warn'">{{ isIPCBackend ? '仅 IPC 生效' : '远端模式忽略' }}</span>
        </div>
-        <div class="form-grid">
+        <div v-if="!isIPCBackend" class="hint-box compact-hint">
+          <strong>当前为远端 API 模式</strong>
+          <span>右侧这组参数不会参与远端请求，只在切换到 IPC 插件模式后生效。</span>
+        </div>
+        <fieldset class="settings-fieldset" :disabled="!isIPCBackend">
+        <div class="form-grid compact-form-grid">
          <div class="field">
            <label>模式</label>
            <div class="custom-select" :class="{ open: openSelect === 'Mode' }">
@@ -301,9 +333,10 @@ async function save() {
          </div>
          <div class="field span-2">
            <label>工作目录</label>
-            <textarea v-model="config.Cwd" placeholder="Lingma 创建 session 时使用的 cwd"></textarea>
+            <input v-model="config.Cwd" type="text" placeholder="Lingma 创建 session 时使用的 cwd" />
          </div>
        </div>
+        </fieldset>
      </div>
    </section>
  </div>
--- a/desktop/frontend/wailsjs/go/main/App.d.ts
+++ b/desktop/frontend/wailsjs/go/main/App.d.ts
@@ -11,12 +11,16 @@ export function GetConfig():Promise<service.Config>;

 export function GetDetectionInfo():Promise<main.DetectionInfo>;

+export function GetLogs():Promise<Array<main.AppLog>>;
+
 export function GetModels():Promise<Array<main.ModelInfo>>;

 export function GetRequests():Promise<Array<main.RequestRecord>>;

 export function GetStatus():Promise<main.ProxyStatus>;

+export function GetTokenStats():Promise<main.TokenStats>;
+
 export function HideWindow():Promise<void>;

 export function MinimizeWindow():Promise<void>;
--- a/desktop/frontend/wailsjs/go/main/App.js
+++ b/desktop/frontend/wailsjs/go/main/App.js
@@ -18,6 +18,10 @@ export function GetDetectionInfo() {
  return window['go']['main']['App']['GetDetectionInfo']();
 }

+export function GetLogs() {
+  return window['go']['main']['App']['GetLogs']();
+}
+
 export function GetModels() {
  return window['go']['main']['App']['GetModels']();
 }
@@ -30,6 +34,10 @@ export function GetStatus() {
  return window['go']['main']['App']['GetStatus']();
 }

+export function GetTokenStats() {
+  return window['go']['main']['App']['GetTokenStats']();
+}
+
 export function HideWindow() {
  return window['go']['main']['App']['HideWindow']();
 }
--- a/desktop/frontend/wailsjs/go/models.ts
+++ b/desktop/frontend/wailsjs/go/models.ts
@@ -1,5 +1,21 @@
 export namespace main {

+	export class AppLog {
+	    time: string;
+	    level: string;
+	    message: string;
+
+	    static createFrom(source: any = {}) {
+	        return new AppLog(source);
+	    }
+
+	    constructor(source: any = {}) {
+	        if ('string' === typeof source) source = JSON.parse(source);
+	        this.time = source["time"];
+	        this.level = source["level"];
+	        this.message = source["message"];
+	    }
+	}
 	export class DetectionInfo {
 	    listenUrl: string;
 	    backend: string;
@@ -86,6 +102,9 @@ export namespace main {
 	    statusCode: number;
 	    duration: string;
 	    size?: string;
+	    inputTokens?: number;
+	    outputTokens?: number;
+	    totalTokens?: number;
 	    reqBody?: string;
 	    respBody?: string;

@@ -102,10 +121,39 @@ export namespace main {
 	        this.statusCode = source["statusCode"];
 	        this.duration = source["duration"];
 	        this.size = source["size"];
+	        this.inputTokens = source["inputTokens"];
+	        this.outputTokens = source["outputTokens"];
+	        this.totalTokens = source["totalTokens"];
 	        this.reqBody = source["reqBody"];
 	        this.respBody = source["respBody"];
 	    }
 	}
+	export class TokenStats {
+	    totalRequests: number;
+	    successRequests: number;
+	    inputTokens: number;
+	    outputTokens: number;
+	    totalTokens: number;
+	    byModel?: Record<string, number>;
+	    lastModel?: string;
+	    lastUpdated?: string;
+
+	    static createFrom(source: any = {}) {
+	        return new TokenStats(source);
+	    }
+
+	    constructor(source: any = {}) {
+	        if ('string' === typeof source) source = JSON.parse(source);
+	        this.totalRequests = source["totalRequests"];
+	        this.successRequests = source["successRequests"];
+	        this.inputTokens = source["inputTokens"];
+	        this.outputTokens = source["outputTokens"];
+	        this.totalTokens = source["totalTokens"];
+	        this.byModel = source["byModel"];
+	        this.lastModel = source["lastModel"];
+	        this.lastUpdated = source["lastUpdated"];
+	    }
+	}

 }

@@ -128,6 +176,8 @@ export namespace service {
 	    ShellType: string;
 	    SessionMode: string;
 	    Timeout: number;
+	    RemoteFallbackEnabled: boolean;
+	    RemoteFallbackModels: string[];

 	    static createFrom(source: any = {}) {
 	        return new Config(source);
@@ -151,6 +201,8 @@ export namespace service {
 	        this.ShellType = source["ShellType"];
 	        this.SessionMode = source["SessionMode"];
 	        this.Timeout = source["Timeout"];
+	        this.RemoteFallbackEnabled = source["RemoteFallbackEnabled"];
+	        this.RemoteFallbackModels = source["RemoteFallbackModels"];
 	    }
 	}

--- a/desktop/main.go
+++ b/desktop/main.go
@@ -2,6 +2,7 @@ package main

 import (
 	"embed"
+	"os"
 	goruntime "runtime"

 	"github.com/wailsapp/wails/v2"
@@ -17,6 +18,7 @@ var assets embed.FS

 func main() {
 	app := NewApp()
+	enableInspector := os.Getenv("LINGMA_DESKTOP_DEBUG") == "1"

 	err := wails.Run(&options.App{
 		Title:             "Lingma IPC Proxy",
@@ -28,6 +30,10 @@ func main() {
 		AssetServer: &assetserver.Options{
 			Assets: assets,
 		},
+		EnableDefaultContextMenu: enableInspector,
+		Debug: options.Debug{
+			OpenInspectorOnStartup: enableInspector,
+		},
 		BackgroundColour: &options.RGBA{R: 15, G: 23, B: 42, A: 1},
 		Menu:             appMenu(app),
 		OnStartup:        app.startup,
--- a/desktop/wails.json
+++ b/desktop/wails.json
@@ -11,6 +11,6 @@
    "email": "lutc5@asiainfo.com"
  },
  "info": {
-    "productVersion": "1.4.2"
+    "productVersion": "1.4.3"
  }
 }
--- a/docs/architecture.md
+++ b/docs/architecture.md
@@ -1,424 +1,316 @@
-# lingma-ipc-proxy 架构文档
+# lingma-ipc-proxy Architecture

-本文档描述 lingma-ipc-proxy 的系统架构、工作原理和核心流程。
+This document describes the current architecture of `lingma-ipc-proxy`, including both backend modes:
+
+- `ipc`: bridge to the local Lingma IDE plugin transport
+- `remote`: call Lingma remote HTTP APIs directly with detected credentials

 ---

-## 1. 整体架构
+## 1. System Overview

-```
-┌─────────────────────────────────────────────────────────────────────────┐
-│                              客户端层                                     │
-│  ┌──────────────┐  ┌──────────────┐  ┌──────────────┐  ┌──────────────┐ │
-│  │ Claude Code  │  │   OpenAI     │  │   Cline      │  │   Continue   │ │
-│  │  (Anthropic) │  │    SDK       │  │  (OpenAI)    │  │  (OpenAI)    │ │
-│  └──────┬───────┘  └──────┬───────┘  └──────┬───────┘  └──────┬───────┘ │
-└─────────┼─────────────────┼─────────────────┼─────────────────┼─────────┘
-          │                 │                 │                 │
-          └─────────────────┴────────┬────────┴─────────────────┘
-                                     │ HTTP
-                                     ▼
-┌─────────────────────────────────────────────────────────────────────────┐
-│                         lingma-ipc-proxy                                │
-│  ┌─────────────────────────────────────────────────────────────────┐    │
-│  │  internal/httpapi                                                │    │
-│  │  ┌─────────────┐  ┌─────────────┐  ┌─────────────────────────┐ │    │
-│  │  │ /v1/models  │  │/v1/chat/comp│  │    /v1/messages         │ │    │
-│  │  │  (GET)      │  │  (POST)     │  │    (POST)               │ │    │
-│  │  └──────┬──────┘  └──────┬──────┘  └───────────┬─────────────┘ │    │
-│  │         └─────────────────┴──────────┬──────────┘               │    │
-│  │                                      │ normalizeRequest         │    │
-│  │                                      ▼                          │    │
-│  │  ┌─────────────────────────────────────────────────────────┐   │    │
-│  │  │              internal/service                            │   │    │
-│  │  │  ┌──────────┐  ┌──────────┐  ┌────────────────────────┐ │   │    │
-│  │  │  │ Session  │  │  Prompt  │  │    Stream/Event        │ │   │    │
-│  │  │  │ Manager  │  │ Builder  │  │    Handler             │ │   │    │
-│  │  │  └────┬─────┘  └────┬─────┘  └───────────┬────────────┘ │   │    │
-│  │  │       └─────────────┴──────────┬─────────┘              │   │    │
-│  │  │                              │ buildLingmaPrompt       │   │    │
-│  │  │                              ▼                          │   │    │
-│  │  │  ┌─────────────────────────────────────────────────┐   │   │    │
-│  │  │  │          internal/lingmaipc                      │   │   │    │
-│  │  │  │  ┌──────────────┐  ┌──────────────────────────┐ │   │   │    │
-│  │  │  │  │   WebSocket  │  │    Named Pipe (Win)      │ │   │   │    │
-│  │  │  │  │  Transport   │  │    Transport             │ │   │   │    │
-│  │  │  │  └──────┬───────┘  └───────────┬──────────────┘ │   │   │    │
-│  │  │  └─────────┼──────────────────────┼────────────────┘   │   │    │
-│  │  └────────────┼──────────────────────┼────────────────────┘   │    │
-│  │               │                      │                         │    │
-│  │  ┌────────────┼──────────────────────┼────────────────────┐   │    │
-│  │  │            ▼                      ▼                    │   │    │
-│  │  │  ┌─────────────────────────────────────────────────┐  │   │    │
-│  │  │  │      internal/toolemulation                      │  │   │    │
-│  │  │  │  ┌──────────────┐  ┌──────────────────────────┐ │  │   │    │
-│  │  │  │  │InjectTooling │  │   ParseActionBlocks      │ │  │   │    │
-│  │  │  │  │  (Prompt)    │  │   (Response)             │ │  │   │    │
-│  │  │  │  └──────────────┘  └──────────────────────────┘ │  │   │    │
-│  │  │  └─────────────────────────────────────────────────┘  │   │    │
-│  │  └───────────────────────────────────────────────────────┘   │    │
-│  └───────────────────────────────────────────────────────────────┘    │
-└─────────────────────────────────────────────────────────────────────────┘
-                                     │
-                                     │ WebSocket / Named Pipe
-                                     ▼
-┌─────────────────────────────────────────────────────────────────────────┐
-│                         Lingma 后端进程                                  │
-│              (VS Code 插件的本地 IPC 服务)                                │
-│                   ws://127.0.0.1:8899/ws                                │
-└─────────────────────────────────────────────────────────────────────────┘
-                                     │
-                                     │ HTTP API
-                                     ▼
-┌─────────────────────────────────────────────────────────────────────────┐
-│                         云端模型服务                                     │
-│              (Kimi-K2.6 / Qwen3-Max / MiniMax-M2.7 等)                  │
-└─────────────────────────────────────────────────────────────────────────┘
+```mermaid
+flowchart LR
+    A["Clients<br/>Claude Code / Hermes / Cline / Continue / OpenAI SDK / Anthropic SDK"]
+    B["internal/httpapi<br/>OpenAI + Anthropic compatible routes"]
+    C["internal/service<br/>request normalization / session policy / streaming / fallback"]
+    D["internal/toolemulation<br/>tool prompt injection + action block parsing"]
+    E["internal/lingmaipc<br/>WebSocket / Named Pipe"]
+    F["internal/remote<br/>credential detection / model list / chat / SSE"]
+    G["Lingma plugin local process"]
+    H["Lingma remote API"]
+    I["Desktop app<br/>Wails GUI / logs / token stats / persisted state"]
+
+    A --> B
+    I --> B
+    B --> C
+    C --> D
+    C --> E
+    C --> F
+    E --> G
+    F --> H
 ```

 ---

-## 2. 模块职责
+## 2. Runtime Modes

-### 2.1 internal/httpapi
+### 2.1 IPC mode

-HTTP API 适配层，负责将外部请求转换为内部 `service.ChatRequest`。
+`backend=ipc`

-| 端点 | 协议 | 功能 |
-|------|------|------|
-| `GET /v1/models` | OpenAI | 返回可用模型列表 |
-| `POST /v1/chat/completions` | OpenAI | 聊天补全（流式/非流式） |
-| `POST /v1/messages` | Anthropic | 消息接口（流式/非流式） |
+- Reads local plugin transport information
+- Connects through:
+  - WebSocket on macOS / Linux
+  - Named Pipe on Windows
+- Reuses Lingma plugin session semantics
+- Session/environment options in the desktop UI apply only here

-**核心函数：**
- `handleOpenAIChatCompletions()` - 处理 OpenAI 格式请求
- `handleAnthropicMessages()` - 处理 Anthropic 格式请求
- `normalizeOpenAIRequest()` / `normalizeAnthropicRequest()` - 归一化请求
+### 2.2 Remote API mode

-**关键设计：**
- 支持 CORS 预检请求 (`OPTIONS`)
- 单请求并发控制 (`tryAcquire()` / `release()`)
- 流式响应通过 `http.Flusher` 实现 SSE
+`backend=remote`

-### 2.2 internal/service
-
-业务逻辑层，负责会话管理和 Prompt 构建。
-
-**核心结构：**
-```go
-type Service struct {
-    cfg              Config
-    client           *lingmaipc.Client
-    stickySessionID  string
-    stickyModelID    string
-}
-```
-
-**核心函数：**
- `Generate()` - 非流式生成
- `GenerateStream()` - 流式生成（返回 `events` + `done` channel）
- `buildLingmaPrompt()` - 构建 Lingma 原生 Prompt
- `runPromptLocked()` - 发送 `session/prompt` RPC 并监听 `session/update` 通知
-
-**会话模式：**
-| 模式 | 行为 |
-|------|------|
-| `reuse` | 复用 sticky session，多轮对话保持上下文 |
-| `fresh` | 每个请求新建临时 session，完成后删除 |
-| `auto` | 单轮请求复用；带 system/history 的请求用 fresh |
-
-### 2.3 internal/lingmaipc
-
-IPC 通信层，负责与 Lingma 后端进程建立连接。
-
-**传输方式：**
-| 平台 | 默认传输 | 说明 |
-|------|----------|------|
-| Windows | Named Pipe | `\\.\pipe\lingma-*` |
-| macOS/Linux | WebSocket | `ws://127.0.0.1:{port}/ws` |
-
-**连接发现：**
- 读取 VS Code 插件缓存：`~/.config/Lingma/SharedClientCache/.info.json`
- 获取 WebSocket 端口号
- 自动重连机制
-
-**RPC 协议：**
- `session/new` - 创建会话
- `session/prompt` - 发送用户消息
- `session/update` - 接收流式响应通知
- `session/set_model` - 切换模型
- `chat/deleteSessionById` - 删除会话
-
-### 2.4 internal/toolemulation
-
-Tool 调用模拟层，将标准 `tools` 协议转换为 Prompt 层契约。
-
-**核心流程：**
-```
-Client tools ──→ ExtractAnthropicTools() ──→ []Tool
-                    │
-                    ▼
-              InjectTooling() ──→ System Prompt + Tool 说明
-                    │
-                    ▼
-              模型输出 action block
-                    │
-                    ▼
-              ParseActionBlocks() ──→ []ToolCall
-                    │
-                    ▼
-              编码为 Anthropic tool_use / OpenAI tool_calls
-```
-
-**Prompt 契约格式：**
-```
-```json action
-{"tool":"NAME","parameters":{"key":"value"}}
-```
-```
-
-**支持格式：**
- `{"tool":"X","parameters":{}}` ✅ 标准格式
- `{"tool":"X","arguments":{}}` ✅ 兼容格式
- `{"tool":"X","input":{}}` ✅ 兼容格式
- `{"tool":"X","arg1":"val"}` ✅ 顶层参数（部分模型）
+- Reads Lingma remote base URL
+- Loads credentials from:
+  - explicit `remote_auth_file`
+  - or detected Lingma cache under `~/.lingma`
+- Calls remote model list and chat endpoints directly
+- Supports timeout / 429 / 5xx fallback across available remote models
+- Does not use local plugin session environment knobs

 ---

-## 3. 核心流程
+## 3. Module Responsibilities

-### 3.1 普通聊天请求流程
+### 3.1 `cmd/lingma-ipc-proxy`
+
+Entry point and config loading.
+
+Responsibilities:
+
+- parse CLI flags
+- merge config file + environment + flags
+- choose backend mode
+- build `service.Config`
+- start `internal/httpapi.Server`
+
+Important config fields:
+
+- `backend`
+- `transport`
+- `websocket_url`
+- `pipe`
+- `remote_base_url`
+- `remote_auth_file`
+- `remote_version`
+- `remote_fallback_enabled`
+- `remote_fallback_models`
+
+### 3.2 `internal/httpapi`
+
+Compatibility layer for OpenAI and Anthropic style APIs.
+
+Primary routes:
+
+- `GET /v1/models`
+- `POST /v1/chat/completions`
+- `POST /v1/messages`
+- `GET /health`
+- `GET /props`
+
+Responsibilities:
+
+- normalize OpenAI / Anthropic requests into `service.ChatRequest`
+- convert service results back to OpenAI / Anthropic payloads
+- stream SSE responses
+- sanitize and record request / response payloads for debug UI
+
+### 3.3 `internal/service`
+
+Core orchestration layer.
+
+Responsibilities:
+
+- choose active backend
+- warm up backend connection / credentials
+- list models
+- generate non-streaming responses
+- generate streaming responses
+- apply session reuse policy in IPC mode
+- inject / parse tool emulation
+- normalize image inputs
+- apply remote fallback order
+
+Important behavior split:
+
+- IPC path uses `internal/lingmaipc`
+- Remote path uses `internal/remote`
+
+### 3.4 `internal/lingmaipc`
+
+Local transport client for Lingma plugin IPC.
+
+Responsibilities:
+
+- detect WebSocket / pipe endpoint
+- dial and reconnect
+- send RPC messages such as:
+  - `session/new`
+  - `session/prompt`
+  - `session/set_model`
+  - `chat/deleteSessionById`
+- consume `session/update` notifications
+
+### 3.5 `internal/remote`
+
+Remote HTTP client for Lingma cloud APIs.
+
+Responsibilities:
+
+- resolve base URL
+- load and validate credentials
+- derive machine / user identity for remote auth
+- list remote models
+- call remote chat endpoint
+- handle remote SSE streaming
+
+### 3.6 `internal/toolemulation`
+
+Prompt-based tool bridge for models that do not expose native tool calling in Lingma transport.
+
+Responsibilities:
+
+- extract tool definitions from OpenAI / Anthropic requests
+- append tool contract to prompt
+- parse JSON action blocks from model output
+- project tool calls back to:
+  - Anthropic `tool_use`
+  - OpenAI `tool_calls`
+
+---
+
+## 4. Request Flow
+
+### 4.1 Shared ingress flow

 ```mermaid
 sequenceDiagram
-    participant C as Client
-    participant H as HTTP API
-    participant S as Service
-    participant L as Lingma IPC
-    participant B as Lingma Backend
+    participant Client
+    participant HTTP as httpapi
+    participant Service as service

-    C->>H: POST /v1/messages
-    H->>H: normalizeAnthropicRequest()
-    H->>S: GenerateStream(req)
-    S->>S: ensureConnected()
-    S->>S: resolveSession()
-    S->>S: buildLingmaPrompt()
-    S->>L: Send("session/prompt", params)
-    L->>B: WebSocket RPC
-    B->>L: session/update (agent_message_chunk)
-    loop 流式响应
-        L->>S: notification (chunk)
-        S->>H: events <- StreamEvent{Delta}
-        H->>C: SSE: content_block_delta
-    end
-    B->>L: session/update (chat_finish)
-    L->>S: notification (finish)
-    S->>H: done <- StreamResult
-    H->>C: SSE: message_stop
+    Client->>HTTP: OpenAI/Anthropic request
+    HTTP->>HTTP: normalize request
+    HTTP->>Service: Generate / GenerateStream
 ```

-### 3.2 Tool 调用流程
+### 4.2 IPC backend flow

 ```mermaid
 sequenceDiagram
-    participant C as Client
-    participant H as HTTP API
-    participant T as ToolEmulation
-    participant S as Service
-    participant L as Lingma IPC
+    participant Service
+    participant Tool as toolemulation
+    participant IPC as lingmaipc
+    participant Plugin as Lingma plugin

-    C->>H: POST /v1/messages (with tools)
-    H->>T: ExtractAnthropicTools()
-    H->>S: GenerateStream(req)
-    S->>T: InjectTooling(system, tools)
-    S->>L: session/prompt (with tool prompt)
-    L->>S: response (with action blocks)
-    S->>T: ParseActionBlocks(text)
-    T->>S: []ToolCall
-    S->>H: ChatResult{Text, ToolCalls}
-    H->>C: SSE: tool_use blocks
-
-    C->>H: POST /v1/messages (tool_result)
-    H->>T: ActionOutputPrompt(toolUseID, content)
-    H->>S: GenerateStream(req)
-    S->>L: session/prompt (with tool result)
-    L->>S: response
-    S->>H: ChatResult
-    H->>C: SSE: final response
+    Service->>Tool: inject tool contract if needed
+    Service->>IPC: ensure connected
+    Service->>IPC: create/reuse session
+    Service->>IPC: session/prompt
+    IPC->>Plugin: RPC message
+    Plugin-->>IPC: session/update chunks
+    IPC-->>Service: stream events
+    Service-->>Service: parse tool blocks / image references / stop reason
 ```

-### 3.3 图片传输流程
+### 4.3 Remote backend flow

 ```mermaid
 sequenceDiagram
-    participant C as Client
-    participant H as HTTP API
-    participant S as Service
-    participant L as Lingma IPC
+    participant Service
+    participant Remote as remote client
+    participant API as Lingma remote API

-    C->>H: POST /v1/messages (with image)
-    H->>H: extractAnthropicImages()
-    H->>S: ChatRequest{Images: [...]}
-    S->>S: runPromptLocked()
-    Note over S: 1. 保存 base64 到 /tmp/lingma-img-*.ext
-    Note over S: 2. 构建 URI: lingma:///agent/file?path=...
-    S->>L: session/prompt
-    Note over L: prompt: [{type:"text"}, {type:"image", mimeType, uri, data}]
-    L->>S: response (model sees image)
-    S->>H: ChatResult
-    H->>C: SSE response
-```
-
-### 3.4 流式输出 SSE 事件序列
-
-**Anthropic 格式（流式）：**
-```
-event: message_start
-data: {"type":"message_start","message":{...}}
-
-event: content_block_start
-data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}
-
-event: content_block_delta
-data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"你"}}
-
-event: content_block_delta
-data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"好"}}
-
-... (更多 delta)
-
-event: content_block_stop
-data: {"type":"content_block_stop","index":0}
-
-[如有 tool_calls]
-event: content_block_start
-data: {"type":"content_block_start","index":1,"content_block":{"type":"tool_use","id":"...","name":"Bash","input":{"command":"ls /"}}}
-
-event: content_block_stop
-data: {"type":"content_block_stop","index":1}
-
-event: message_delta
-data: {"type":"message_delta","delta":{"stop_reason":"end_turn"},"usage":{"output_tokens":5}}
-
-event: message_stop
-data: {"type":"message_stop"}
+    Service->>Remote: load credentials / ensure client
+    Service->>Remote: list models if needed
+    Service->>Remote: chat request
+    Remote->>API: HTTPS request
+    API-->>Remote: JSON or SSE response
+    Remote-->>Service: normalized result
+    Service-->>Service: fallback to next model when allowed
 ```

 ---

-## 4. 关键技术决策
+## 5. Remote Fallback Strategy

-### 4.1 为什么使用 Tool Emulation 而非原生 Tool Calling？
+Remote fallback is used only when all conditions are true:

-Lingma 后端模型（Kimi、Qwen 等）不原生支持 OpenAI/Anthropic 的 `tools` 协议。因此代理层需要将工具定义注入到 Prompt 中，通过结构化文本输出模拟工具调用。
+- `backend=remote`
+- `remote_fallback_enabled=true`
+- request has not emitted stream output yet
+- upstream error matches timeout / 429 / 5xx class

-**优点：**
- 不依赖上游模型能力
- 兼容任何纯聊天模型
- 可精确控制 Prompt 格式
+Current default order:

-**缺点：**
- 模型需要学习特定格式
- 解析可能有容错问题
- 增加了 Prompt 长度
+1. `kmodel`
+2. `mmodel`
+3. `dashscope_qwen3_coder`
+4. `dashscope_qmodel`
+5. `dashscope_qwen_max_latest`
+6. `dashscope_qwen_plus_20250428_thinking`

-### 4.2 为什么使用 WebSocket/Named Pipe 而非 HTTP？
-
-Lingma 插件使用本地 IPC 与后端通信，优势：
- 低延迟（本地通信）
- 双向实时通知（session/update）
- 认证信息由插件管理，代理无需处理
-
-### 4.3 图片传输的双保险策略
-
-```
-Prompt 数组 (Lingma 原生格式):
-[
-  {"type":"text","text":"..."},
-  {"type":"image","mimeType":"image/png","uri":"lingma:///agent/file?path=...","data":"base64..."}
-]
-```
-
- `uri`: Lingma 后端必须验证的本地文件路径
- `data`: base64 编码的图像数据（备用）
- `mimeType`: 图像类型标识
-
-### 4.4 单请求并发控制
-
-Lingma IPC 一次只能处理一个请求，因此代理使用 `tryAcquire()` 机制：
-
-```go
-if !s.tryAcquire() {
-    writeAnthropicError(w, 429, "rate_limit_error",
-        "Lingma IPC proxy handles one request at a time.")
-    return
-}
-defer s.release()
-```
+Before using that order, the service filters candidates against the actual `/v1/models` result from the remote backend so unavailable models are skipped.

 ---

-## 5. 配置说明
+## 6. Desktop App Architecture

-### 5.1 配置文件结构
+The Wails desktop app is a management UI around the local proxy process.

-```json
-{
-  "host": "127.0.0.1",
-  "port": 8095,
-  "transport": "websocket",
-  "mode": "agent",
-  "shell_type": "zsh",
-  "session_mode": "auto",
-  "timeout": 120,
-  "cwd": "/Users/tiancheng"
-}
-```
+Responsibilities:

-### 5.2 配置项说明
+- start / stop / restart proxy
+- show current backend and resolved endpoints
+- persist:
+  - request history
+  - logs
+  - token statistics
+- show detected IPC and remote credentials metadata
+- edit config and restart proxy on save

-| 配置项 | 类型 | 默认值 | 说明 |
-|--------|------|--------|------|
-| `host` | string | `127.0.0.1` | HTTP 监听地址 |
-| `port` | int | `8095` | HTTP 监听端口 |
-| `transport` | string | `auto` | IPC 传输方式：`auto`/`pipe`/`websocket` |
-| `mode` | string | `chat` | 模式：`chat`/`agent` |
-| `shell_type` | string | `powershell` | 终端类型 |
-| `session_mode` | string | `auto` | 会话模式：`reuse`/`fresh`/`auto` |
-| `timeout` | int | `120` | 请求超时（秒） |
-| `cwd` | string | `""` | 工作目录（传给 Lingma 后端） |
+Persisted local state:
+
+- config: `~/.config/lingma-ipc-proxy/config.json`
+- UI/runtime state: `~/.config/lingma-ipc-proxy/app-state.json`
+
+Production packaging rules:
+
+- packaged app should not auto-open inspector
+- local development can opt in with `LINGMA_DESKTOP_DEBUG=1`

 ---

-## 6. 扩展点
+## 7. Key Design Decisions

-### 6.1 添加新模型
+### 7.1 Why keep both IPC and remote modes?

-在 `service.go` 的模型映射中添加：
+Because the two modes solve different problems:

-```go
-func (s *Service) resolveInternalModelID(model string) string {
-    switch strings.ToLower(strings.TrimSpace(model)) {
-    case "kimi-k2.6":
-        return "kimi2.6"
-    case "qwen3-max":
-        return "qwen3max"
-    // 添加新模型映射
-    default:
-        return ""
-    }
-}
-```
+- IPC mode preserves plugin session semantics and local tool environment
+- Remote mode avoids plugin runtime coupling and is usually better for third-party agent clients

-### 6.2 添加新 Tool 格式支持
+### 7.2 Why keep tool emulation even with remote mode?

-在 `toolemulation.go` 的 `parseToolCallJSON()` 中扩展参数解析逻辑。
+Because Lingma-exposed models are not guaranteed to speak OpenAI/Anthropic native tool protocol consistently across all routes. The proxy must keep a stable external contract even when the upstream model capability is uneven.

-### 6.3 添加新 API 端点
+### 7.3 Why persist requests and token stats in the desktop app?

-在 `httpapi/server.go` 的 `NewServer()` 中注册新路由。
+Because the GUI is used as an operational console, not a transient preview. Users need model usage, logs, and recent traffic to survive app restarts.

 ---

-*文档版本: 2025-04-25*
-*对应代码版本: 当前 master*
+## 8. Known Boundaries
+
+- IPC mode still has stronger environment coupling with the local Lingma plugin
+- remote credential detection depends on local Lingma cache / auth file layout
+- image payloads are sanitized in persisted request logs to avoid oversized local state
+- request history may contain mixed models in remote mode when fallback is triggered or when different clients specify different models
+
+---
+
+## 9. Files to Read First
+
+If you are extending the system, start here:
+
+- `cmd/lingma-ipc-proxy/main.go`
+- `internal/httpapi/server.go`
+- `internal/service/service.go`
+- `internal/lingmaipc/*`
+- `internal/remote/*`
+- `desktop/app.go`
+- `desktop/main.go`
+
+---
+
+Document version: 2026-04-30
--- a/docs/architecture.zh-CN.md
+++ b/docs/architecture.zh-CN.md
@@ -1,424 +1,315 @@
 # lingma-ipc-proxy 架构文档

-本文档描述 lingma-ipc-proxy 的系统架构、工作原理和核心流程。
+本文档描述 `lingma-ipc-proxy` 的当前架构，覆盖两种后端模式：
+
+- `ipc`：桥接本地 Lingma IDE 插件传输层
+- `remote`：直接调用 Lingma 远端 HTTP API

 ---

-## 1. 整体架构
+## 1. 系统总览

-```
-┌─────────────────────────────────────────────────────────────────────────┐
-│                              客户端层                                     │
-│  ┌──────────────┐  ┌──────────────┐  ┌──────────────┐  ┌──────────────┐ │
-│  │ Claude Code  │  │   OpenAI     │  │   Cline      │  │   Continue   │ │
-│  │  (Anthropic) │  │    SDK       │  │  (OpenAI)    │  │  (OpenAI)    │ │
-│  └──────┬───────┘  └──────┬───────┘  └──────┬───────┘  └──────┬───────┘ │
-└─────────┼─────────────────┼─────────────────┼─────────────────┼─────────┘
-          │                 │                 │                 │
-          └─────────────────┴────────┬────────┴─────────────────┘
-                                     │ HTTP
-                                     ▼
-┌─────────────────────────────────────────────────────────────────────────┐
-│                         lingma-ipc-proxy                                │
-│  ┌─────────────────────────────────────────────────────────────────┐    │
-│  │  internal/httpapi                                                │    │
-│  │  ┌─────────────┐  ┌─────────────┐  ┌─────────────────────────┐ │    │
-│  │  │ /v1/models  │  │/v1/chat/comp│  │    /v1/messages         │ │    │
-│  │  │  (GET)      │  │  (POST)     │  │    (POST)               │ │    │
-│  │  └──────┬──────┘  └──────┬──────┘  └───────────┬─────────────┘ │    │
-│  │         └─────────────────┴──────────┬──────────┘               │    │
-│  │                                      │ normalizeRequest         │    │
-│  │                                      ▼                          │    │
-│  │  ┌─────────────────────────────────────────────────────────┐   │    │
-│  │  │              internal/service                            │   │    │
-│  │  │  ┌──────────┐  ┌──────────┐  ┌────────────────────────┐ │   │    │
-│  │  │  │ Session  │  │  Prompt  │  │    Stream/Event        │ │   │    │
-│  │  │  │ Manager  │  │ Builder  │  │    Handler             │ │   │    │
-│  │  │  └────┬─────┘  └────┬─────┘  └───────────┬────────────┘ │   │    │
-│  │  │       └─────────────┴──────────┬─────────┘              │   │    │
-│  │  │                              │ buildLingmaPrompt       │   │    │
-│  │  │                              ▼                          │   │    │
-│  │  │  ┌─────────────────────────────────────────────────┐   │   │    │
-│  │  │  │          internal/lingmaipc                      │   │   │    │
-│  │  │  │  ┌──────────────┐  ┌──────────────────────────┐ │   │   │    │
-│  │  │  │  │   WebSocket  │  │    Named Pipe (Win)      │ │   │   │    │
-│  │  │  │  │  Transport   │  │    Transport             │ │   │   │    │
-│  │  │  │  └──────┬───────┘  └───────────┬──────────────┘ │   │   │    │
-│  │  │  └─────────┼──────────────────────┼────────────────┘   │   │    │
-│  │  └────────────┼──────────────────────┼────────────────────┘   │    │
-│  │               │                      │                         │    │
-│  │  ┌────────────┼──────────────────────┼────────────────────┐   │    │
-│  │  │            ▼                      ▼                    │   │    │
-│  │  │  ┌─────────────────────────────────────────────────┐  │   │    │
-│  │  │  │      internal/toolemulation                      │  │   │    │
-│  │  │  │  ┌──────────────┐  ┌──────────────────────────┐ │  │   │    │
-│  │  │  │  │InjectTooling │  │   ParseActionBlocks      │ │  │   │    │
-│  │  │  │  │  (Prompt)    │  │   (Response)             │ │  │   │    │
-│  │  │  │  └──────────────┘  └──────────────────────────┘ │  │   │    │
-│  │  │  └─────────────────────────────────────────────────┘  │   │    │
-│  │  └───────────────────────────────────────────────────────┘   │    │
-│  └───────────────────────────────────────────────────────────────┘    │
-└─────────────────────────────────────────────────────────────────────────┘
-                                     │
-                                     │ WebSocket / Named Pipe
-                                     ▼
-┌─────────────────────────────────────────────────────────────────────────┐
-│                         Lingma 后端进程                                  │
-│              (VS Code 插件的本地 IPC 服务)                                │
-│                   ws://127.0.0.1:8899/ws                                │
-└─────────────────────────────────────────────────────────────────────────┘
-                                     │
-                                     │ HTTP API
-                                     ▼
-┌─────────────────────────────────────────────────────────────────────────┐
-│                         云端模型服务                                     │
-│              (Kimi-K2.6 / Qwen3-Max / MiniMax-M2.7 等)                  │
-└─────────────────────────────────────────────────────────────────────────┘
+```mermaid
+flowchart LR
+    A["客户端<br/>Claude Code / Hermes / Cline / Continue / OpenAI SDK / Anthropic SDK"]
+    B["internal/httpapi<br/>OpenAI + Anthropic 兼容路由"]
+    C["internal/service<br/>请求归一化 / 会话策略 / 流式输出 / 兜底"]
+    D["internal/toolemulation<br/>工具提示词注入 + action block 解析"]
+    E["internal/lingmaipc<br/>WebSocket / 命名管道"]
+    F["internal/remote<br/>登录态探测 / 模型列表 / Chat / SSE"]
+    G["Lingma 插件本地进程"]
+    H["Lingma 远端 API"]
+    I["桌面端 GUI<br/>Wails / 日志 / Token 统计 / 持久化状态"]
+
+    A --> B
+    I --> B
+    B --> C
+    C --> D
+    C --> E
+    C --> F
+    E --> G
+    F --> H
 ```

 ---

-## 2. 模块职责
+## 2. 运行模式

-### 2.1 internal/httpapi
+### 2.1 IPC 模式

-HTTP API 适配层，负责将外部请求转换为内部 `service.ChatRequest`。
+`backend=ipc`

-| 端点 | 协议 | 功能 |
-|------|------|------|
-| `GET /v1/models` | OpenAI | 返回可用模型列表 |
-| `POST /v1/chat/completions` | OpenAI | 聊天补全（流式/非流式） |
-| `POST /v1/messages` | Anthropic | 消息接口（流式/非流式） |
+- 读取本地 Lingma 插件传输信息
+- 通过以下方式连接：
+  - macOS / Linux：WebSocket
+  - Windows：Named Pipe
+- 复用 Lingma 插件自身的 session 语义
+- 桌面端里“会话与环境”相关配置只在这里生效

-**核心函数：**
- `handleOpenAIChatCompletions()` - 处理 OpenAI 格式请求
- `handleAnthropicMessages()` - 处理 Anthropic 格式请求
- `normalizeOpenAIRequest()` / `normalizeAnthropicRequest()` - 归一化请求
+### 2.2 Remote API 模式

-**关键设计：**
- 支持 CORS 预检请求 (`OPTIONS`)
- 单请求并发控制 (`tryAcquire()` / `release()`)
- 流式响应通过 `http.Flusher` 实现 SSE
+`backend=remote`

-### 2.2 internal/service
-
-业务逻辑层，负责会话管理和 Prompt 构建。
-
-**核心结构：**
-```go
-type Service struct {
-    cfg              Config
-    client           *lingmaipc.Client
-    stickySessionID  string
-    stickyModelID    string
-}
-```
-
-**核心函数：**
- `Generate()` - 非流式生成
- `GenerateStream()` - 流式生成（返回 `events` + `done` channel）
- `buildLingmaPrompt()` - 构建 Lingma 原生 Prompt
- `runPromptLocked()` - 发送 `session/prompt` RPC 并监听 `session/update` 通知
-
-**会话模式：**
-| 模式 | 行为 |
-|------|------|
-| `reuse` | 复用 sticky session，多轮对话保持上下文 |
-| `fresh` | 每个请求新建临时 session，完成后删除 |
-| `auto` | 单轮请求复用；带 system/history 的请求用 fresh |
-
-### 2.3 internal/lingmaipc
-
-IPC 通信层，负责与 Lingma 后端进程建立连接。
-
-**传输方式：**
-| 平台 | 默认传输 | 说明 |
-|------|----------|------|
-| Windows | Named Pipe | `\\.\pipe\lingma-*` |
-| macOS/Linux | WebSocket | `ws://127.0.0.1:{port}/ws` |
-
-**连接发现：**
- 读取 VS Code 插件缓存：`~/.config/Lingma/SharedClientCache/.info.json`
- 获取 WebSocket 端口号
- 自动重连机制
-
-**RPC 协议：**
- `session/new` - 创建会话
- `session/prompt` - 发送用户消息
- `session/update` - 接收流式响应通知
- `session/set_model` - 切换模型
- `chat/deleteSessionById` - 删除会话
-
-### 2.4 internal/toolemulation
-
-Tool 调用模拟层，将标准 `tools` 协议转换为 Prompt 层契约。
-
-**核心流程：**
-```
-Client tools ──→ ExtractAnthropicTools() ──→ []Tool
-                    │
-                    ▼
-              InjectTooling() ──→ System Prompt + Tool 说明
-                    │
-                    ▼
-              模型输出 action block
-                    │
-                    ▼
-              ParseActionBlocks() ──→ []ToolCall
-                    │
-                    ▼
-              编码为 Anthropic tool_use / OpenAI tool_calls
-```
-
-**Prompt 契约格式：**
-```
-```json action
-{"tool":"NAME","parameters":{"key":"value"}}
-```
-```
-
-**支持格式：**
- `{"tool":"X","parameters":{}}` ✅ 标准格式
- `{"tool":"X","arguments":{}}` ✅ 兼容格式
- `{"tool":"X","input":{}}` ✅ 兼容格式
- `{"tool":"X","arg1":"val"}` ✅ 顶层参数（部分模型）
+- 解析远端域名
+- 加载认证信息：
+  - 显式指定的 `remote_auth_file`
+  - 或自动探测 `~/.lingma` 下的缓存
+- 直接请求远端模型列表和聊天接口
+- 支持远端超时 / 429 / 5xx 的模型兜底切换
+- 不依赖本地插件会话环境参数

 ---

-## 3. 核心流程
+## 3. 模块职责

-### 3.1 普通聊天请求流程
+### 3.1 `cmd/lingma-ipc-proxy`
+
+入口与配置装配层。
+
+职责：
+
+- 解析命令行参数
+- 合并配置文件 / 环境变量 / CLI flags
+- 选择后端模式
+- 构建 `service.Config`
+- 启动 `internal/httpapi.Server`
+
+关键配置字段：
+
+- `backend`
+- `transport`
+- `websocket_url`
+- `pipe`
+- `remote_base_url`
+- `remote_auth_file`
+- `remote_version`
+- `remote_fallback_enabled`
+- `remote_fallback_models`
+
+### 3.2 `internal/httpapi`
+
+OpenAI / Anthropic 兼容层。
+
+主要路由：
+
+- `GET /v1/models`
+- `POST /v1/chat/completions`
+- `POST /v1/messages`
+- `GET /health`
+- `GET /props`
+
+职责：
+
+- 把 OpenAI / Anthropic 请求归一化为 `service.ChatRequest`
+- 把 service 结果重新编码成 OpenAI / Anthropic 响应
+- 输出 SSE 流
+- 记录调试用请求 / 响应摘要
+
+### 3.3 `internal/service`
+
+核心编排层。
+
+职责：
+
+- 选择当前 backend
+- backend 预热
+- 拉取模型列表
+- 非流式生成
+- 流式生成
+- IPC 模式下的 session 复用策略
+- 工具模拟注入与解析
+- 图片输入归一化
+- 远端 fallback 顺序控制
+
+分支逻辑：
+
+- IPC 路径走 `internal/lingmaipc`
+- Remote 路径走 `internal/remote`
+
+### 3.4 `internal/lingmaipc`
+
+本地 Lingma 插件 IPC 客户端。
+
+职责：
+
+- 自动探测 WebSocket / pipe 端点
+- 建立连接与重连
+- 发送 RPC：
+  - `session/new`
+  - `session/prompt`
+  - `session/set_model`
+  - `chat/deleteSessionById`
+- 消费 `session/update` 通知
+
+### 3.5 `internal/remote`
+
+Lingma 远端 HTTP 客户端。
+
+职责：
+
+- 解析远端 base URL
+- 加载并校验登录态
+- 生成远端请求所需身份信息
+- 获取远端模型列表
+- 调用远端聊天接口
+- 处理远端 SSE 流式响应
+
+### 3.6 `internal/toolemulation`
+
+工具调用模拟层。
+
+职责：
+
+- 从 OpenAI / Anthropic 请求中提取工具定义
+- 将工具契约注入 prompt
+- 从模型文本里解析 JSON action block
+- 回投为：
+  - Anthropic `tool_use`
+  - OpenAI `tool_calls`
+
+---
+
+## 4. 请求主流程
+
+### 4.1 通用入口

 ```mermaid
 sequenceDiagram
-    participant C as Client
-    participant H as HTTP API
-    participant S as Service
-    participant L as Lingma IPC
-    participant B as Lingma Backend
+    participant Client as Client
+    participant HTTP as httpapi
+    participant Service as service

-    C->>H: POST /v1/messages
-    H->>H: normalizeAnthropicRequest()
-    H->>S: GenerateStream(req)
-    S->>S: ensureConnected()
-    S->>S: resolveSession()
-    S->>S: buildLingmaPrompt()
-    S->>L: Send("session/prompt", params)
-    L->>B: WebSocket RPC
-    B->>L: session/update (agent_message_chunk)
-    loop 流式响应
-        L->>S: notification (chunk)
-        S->>H: events <- StreamEvent{Delta}
-        H->>C: SSE: content_block_delta
-    end
-    B->>L: session/update (chat_finish)
-    L->>S: notification (finish)
-    S->>H: done <- StreamResult
-    H->>C: SSE: message_stop
+    Client->>HTTP: OpenAI / Anthropic 请求
+    HTTP->>HTTP: 归一化请求
+    HTTP->>Service: Generate / GenerateStream
 ```

-### 3.2 Tool 调用流程
+### 4.2 IPC 后端流程

 ```mermaid
 sequenceDiagram
-    participant C as Client
-    participant H as HTTP API
-    participant T as ToolEmulation
-    participant S as Service
-    participant L as Lingma IPC
+    participant Service as service
+    participant Tool as toolemulation
+    participant IPC as lingmaipc
+    participant Plugin as Lingma 插件

-    C->>H: POST /v1/messages (with tools)
-    H->>T: ExtractAnthropicTools()
-    H->>S: GenerateStream(req)
-    S->>T: InjectTooling(system, tools)
-    S->>L: session/prompt (with tool prompt)
-    L->>S: response (with action blocks)
-    S->>T: ParseActionBlocks(text)
-    T->>S: []ToolCall
-    S->>H: ChatResult{Text, ToolCalls}
-    H->>C: SSE: tool_use blocks
-
-    C->>H: POST /v1/messages (tool_result)
-    H->>T: ActionOutputPrompt(toolUseID, content)
-    H->>S: GenerateStream(req)
-    S->>L: session/prompt (with tool result)
-    L->>S: response
-    S->>H: ChatResult
-    H->>C: SSE: final response
+    Service->>Tool: 按需注入工具契约
+    Service->>IPC: ensure connected
+    Service->>IPC: 创建/复用 session
+    Service->>IPC: session/prompt
+    IPC->>Plugin: RPC
+    Plugin-->>IPC: session/update chunk
+    IPC-->>Service: 流式事件
+    Service-->>Service: 解析工具 block / 图片 / stop reason
 ```

-### 3.3 图片传输流程
+### 4.3 Remote 后端流程

 ```mermaid
 sequenceDiagram
-    participant C as Client
-    participant H as HTTP API
-    participant S as Service
-    participant L as Lingma IPC
+    participant Service as service
+    participant Remote as remote client
+    participant API as Lingma 远端 API

-    C->>H: POST /v1/messages (with image)
-    H->>H: extractAnthropicImages()
-    H->>S: ChatRequest{Images: [...]}
-    S->>S: runPromptLocked()
-    Note over S: 1. 保存 base64 到 /tmp/lingma-img-*.ext
-    Note over S: 2. 构建 URI: lingma:///agent/file?path=...
-    S->>L: session/prompt
-    Note over L: prompt: [{type:"text"}, {type:"image", mimeType, uri, data}]
-    L->>S: response (model sees image)
-    S->>H: ChatResult
-    H->>C: SSE response
-```
-
-### 3.4 流式输出 SSE 事件序列
-
-**Anthropic 格式（流式）：**
-```
-event: message_start
-data: {"type":"message_start","message":{...}}
-
-event: content_block_start
-data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}
-
-event: content_block_delta
-data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"你"}}
-
-event: content_block_delta
-data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"好"}}
-
-... (更多 delta)
-
-event: content_block_stop
-data: {"type":"content_block_stop","index":0}
-
-[如有 tool_calls]
-event: content_block_start
-data: {"type":"content_block_start","index":1,"content_block":{"type":"tool_use","id":"...","name":"Bash","input":{"command":"ls /"}}}
-
-event: content_block_stop
-data: {"type":"content_block_stop","index":1}
-
-event: message_delta
-data: {"type":"message_delta","delta":{"stop_reason":"end_turn"},"usage":{"output_tokens":5}}
-
-event: message_stop
-data: {"type":"message_stop"}
+    Service->>Remote: 加载登录态 / 初始化 client
+    Service->>Remote: 需要时拉取模型列表
+    Service->>Remote: 发送 chat 请求
+    Remote->>API: HTTPS
+    API-->>Remote: JSON 或 SSE
+    Remote-->>Service: 归一化结果
+    Service-->>Service: 按条件执行 fallback
 ```

 ---

-## 4. 关键技术决策
+## 5. 远端兜底策略

-### 4.1 为什么使用 Tool Emulation 而非原生 Tool Calling？
+仅在以下条件同时满足时启用：

-Lingma 后端模型（Kimi、Qwen 等）不原生支持 OpenAI/Anthropic 的 `tools` 协议。因此代理层需要将工具定义注入到 Prompt 中，通过结构化文本输出模拟工具调用。
+- `backend=remote`
+- `remote_fallback_enabled=true`
+- 还没有向客户端输出任何流式 token
+- 上游错误属于 timeout / 429 / 5xx

-**优点：**
- 不依赖上游模型能力
- 兼容任何纯聊天模型
- 可精确控制 Prompt 格式
+当前默认顺序：

-**缺点：**
- 模型需要学习特定格式
- 解析可能有容错问题
- 增加了 Prompt 长度
+1. `kmodel`
+2. `mmodel`
+3. `dashscope_qwen3_coder`
+4. `dashscope_qmodel`
+5. `dashscope_qwen_max_latest`
+6. `dashscope_qwen_plus_20250428_thinking`

-### 4.2 为什么使用 WebSocket/Named Pipe 而非 HTTP？
-
-Lingma 插件使用本地 IPC 与后端通信，优势：
- 低延迟（本地通信）
- 双向实时通知（session/update）
- 认证信息由插件管理，代理无需处理
-
-### 4.3 图片传输的双保险策略
-
-```
-Prompt 数组 (Lingma 原生格式):
-[
-  {"type":"text","text":"..."},
-  {"type":"image","mimeType":"image/png","uri":"lingma:///agent/file?path=...","data":"base64..."}
-]
-```
-
- `uri`: Lingma 后端必须验证的本地文件路径
- `data`: base64 编码的图像数据（备用）
- `mimeType`: 图像类型标识
-
-### 4.4 单请求并发控制
-
-Lingma IPC 一次只能处理一个请求，因此代理使用 `tryAcquire()` 机制：
-
-```go
-if !s.tryAcquire() {
-    writeAnthropicError(w, 429, "rate_limit_error",
-        "Lingma IPC proxy handles one request at a time.")
-    return
-}
-defer s.release()
-```
+实际执行前，service 会先拿远端 `/v1/models` 的真实结果过滤一遍，只保留当前账号真的可用的模型。

 ---

-## 5. 配置说明
+## 6. 桌面端架构

-### 5.1 配置文件结构
+Wails 桌面端不是简单预览壳，而是本地代理的运维控制台。

-```json
-{
-  "host": "127.0.0.1",
-  "port": 8095,
-  "transport": "websocket",
-  "mode": "agent",
-  "shell_type": "zsh",
-  "session_mode": "auto",
-  "timeout": 120,
-  "cwd": "/Users/tiancheng"
-}
-```
+职责：

-### 5.2 配置项说明
+- 启动 / 停止 / 重启代理
+- 展示当前 backend、监听地址、探测结果
+- 持久化：
+  - 请求历史
+  - 日志
+  - Token 统计
+- 编辑配置并保存后按需重启

-| 配置项 | 类型 | 默认值 | 说明 |
-|--------|------|--------|------|
-| `host` | string | `127.0.0.1` | HTTP 监听地址 |
-| `port` | int | `8095` | HTTP 监听端口 |
-| `transport` | string | `auto` | IPC 传输方式：`auto`/`pipe`/`websocket` |
-| `mode` | string | `chat` | 模式：`chat`/`agent` |
-| `shell_type` | string | `powershell` | 终端类型 |
-| `session_mode` | string | `auto` | 会话模式：`reuse`/`fresh`/`auto` |
-| `timeout` | int | `120` | 请求超时（秒） |
-| `cwd` | string | `""` | 工作目录（传给 Lingma 后端） |
+本地持久化路径：
+
+- 配置：`~/.config/lingma-ipc-proxy/config.json`
+- GUI 运行状态：`~/.config/lingma-ipc-proxy/app-state.json`
+
+打包要求：
+
+- 生产包不自动打开 Inspector / 调试入口
+- 本地开发可通过 `LINGMA_DESKTOP_DEBUG=1` 显式开启

 ---

-## 6. 扩展点
+## 7. 关键设计决策

-### 6.1 添加新模型
+### 7.1 为什么同时保留 IPC 和 Remote？

-在 `service.go` 的模型映射中添加：
+因为两种模式解决的问题不同：

-```go
-func (s *Service) resolveInternalModelID(model string) string {
-    switch strings.ToLower(strings.TrimSpace(model)) {
-    case "kimi-k2.6":
-        return "kimi2.6"
-    case "qwen3-max":
-        return "qwen3max"
-    // 添加新模型映射
-    default:
-        return ""
-    }
-}
-```
+- IPC 模式更贴近插件本地上下文和 session 语义
+- Remote 模式更适合第三方 agent 客户端，减少对插件运行态的依赖

-### 6.2 添加新 Tool 格式支持
+### 7.2 为什么 Remote 也保留 Tool Emulation？

-在 `toolemulation.go` 的 `parseToolCallJSON()` 中扩展参数解析逻辑。
+因为 Lingma 暴露出来的模型能力并不保证始终稳定兼容 OpenAI / Anthropic 原生 tools 协议。代理层必须对外提供稳定契约，不能把上游模型差异直接泄露给客户端。

-### 6.3 添加新 API 端点
+### 7.3 为什么桌面端要持久化请求和 Token？

-在 `httpapi/server.go` 的 `NewServer()` 中注册新路由。
+因为这个 GUI 已经是运维面板，不是一次性调试页。重启后仍然需要保留最近请求、日志和 usage 统计，便于排障和观察模型表现。

 ---

-*文档版本: 2025-04-25*
-*对应代码版本: 当前 master*
+## 8. 当前边界
+
+- IPC 模式仍然受本地 Lingma 插件运行态影响
+- Remote 登录态探测依赖本地 Lingma 缓存结构
+- 图片类请求在本地持久化时会做裁剪/脱敏，避免状态文件过大
+- Remote 模式下如果启用了 fallback，最近一次“聊天模型”可能与客户端最初指定模型不同
+
+---
+
+## 9. 代码入口建议
+
+如果要继续扩展，优先看这些文件：
+
+- `cmd/lingma-ipc-proxy/main.go`
+- `internal/httpapi/server.go`
+- `internal/service/service.go`
+- `internal/lingmaipc/*`
+- `internal/remote/*`
+- `desktop/app.go`
+- `desktop/main.go`
+
+---
+
+文档版本：2026-04-30
--- a/internal/httpapi/server.go
+++ b/internal/httpapi/server.go
@@ -1400,9 +1400,6 @@ func redactRecordedValue(value any) any {
 		if looksLikeImagePayload(typed) {
 			return imageRedaction(typed)
 		}
-		if len(typed) > 12000 {
-			return typed[:12000] + "... [truncated]"
-		}
 		return typed
 	default:
 		return typed
@@ -1443,12 +1440,8 @@ func mustMarshalJSON(value any) []byte {
 }

 func truncateRecordedString(value string) string {
-	const maxRecordedBody = 120000
-	if len(value) <= maxRecordedBody {
 	return value
 }
-	return value[:maxRecordedBody] + "... [truncated]"
-}

 func withCORS(next http.Handler) http.Handler {
 	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
--- a/internal/remote/client.go
+++ b/internal/remote/client.go
@@ -76,7 +76,7 @@ func New(cfg Config) *Client {
 		cfg.CosyVersion = "2.11.2"
 	}
 	if cfg.Timeout <= 0 {
-		cfg.Timeout = 120 * time.Second
+		cfg.Timeout = 300 * time.Second
 	}
 	cfg.BaseURL = strings.TrimRight(cfg.BaseURL, "/")
 	return &Client{cfg: cfg, client: &http.Client{Timeout: cfg.Timeout}}
--- a/internal/service/service.go
+++ b/internal/service/service.go
@@ -51,6 +51,8 @@ type Config struct {
 	ShellType             string
 	SessionMode           SessionMode
 	Timeout               time.Duration
+	RemoteFallbackEnabled bool
+	RemoteFallbackModels  []string
 }

 type Image struct {
@@ -166,7 +168,7 @@ func New(cfg Config) *Service {
 		cfg.ShellType = lingmaipc.DefaultShellType()
 	}
 	if cfg.Timeout <= 0 {
-		cfg.Timeout = 120 * time.Second
+		cfg.Timeout = 300 * time.Second
 	}
 	if cfg.Transport == "" {
 		cfg.Transport = lingmaipc.TransportAuto
@@ -174,6 +176,11 @@ func New(cfg Config) *Service {
 	if cfg.Backend == "" {
 		cfg.Backend = BackendRemote
 	}
+	if cfg.Backend == BackendRemote {
+		if len(cfg.RemoteFallbackModels) == 0 {
+			cfg.RemoteFallbackModels = DefaultRemoteFallbackModels()
+		}
+	}
 	cfg.Model = normalizeModelForBackend(cfg.Backend, cfg.Model)
 	if cfg.SessionMode == "" {
 		cfg.SessionMode = SessionModeAuto
@@ -181,6 +188,17 @@ func New(cfg Config) *Service {
 	return &Service{cfg: cfg}
 }

+func DefaultRemoteFallbackModels() []string {
+	return []string{
+		"kmodel",
+		"mmodel",
+		"dashscope_qwen3_coder",
+		"dashscope_qmodel",
+		"dashscope_qwen_max_latest",
+		"dashscope_qwen_plus_20250428_thinking",
+	}
+}
+
 func (s *Service) SetDefaultModel(model string) {
 	s.mu.Lock()
 	defer s.mu.Unlock()
@@ -331,9 +349,6 @@ func (s *Service) generateRemote(
 	req ChatRequest,
 	onDelta func(string),
 ) (*ChatResult, error) {
-	requestCtx, cancel := context.WithTimeout(ctx, s.cfg.Timeout)
-	defer cancel()
-
 	if strings.TrimSpace(req.Model) == "" {
 		req.Model = s.DefaultModel()
 	}
@@ -346,20 +361,54 @@ func (s *Service) generateRemote(
 		return nil, errors.New("empty user message")
 	}

+	models := s.remoteAttemptModels(ctx, req.Model)
 	client := s.remoteClientLocked()
-	remoteResult, err := client.Chat(requestCtx, remote.ChatRequest{
-		Model:       req.Model,
+	var lastErr error
+	for i, model := range models {
+		attemptCtx, cancel := context.WithTimeout(ctx, s.cfg.Timeout)
+		result, emitted, err := s.generateRemoteWithModel(attemptCtx, client, req, prompt, model, onDelta)
+		cancel()
+		if err == nil {
+			return result, nil
+		}
+		lastErr = err
+		if i == len(models)-1 || emitted || !isRemoteFallbackError(err) {
+			return nil, err
+		}
+	}
+	return nil, lastErr
+}
+
+func (s *Service) generateRemoteWithModel(
+	ctx context.Context,
+	client *remote.Client,
+	req ChatRequest,
+	prompt string,
+	model string,
+	onDelta func(string),
+) (*ChatResult, bool, error) {
+	emitted := false
+	delta := func(text string) {
+		if text != "" {
+			emitted = true
+		}
+		if onDelta != nil {
+			onDelta(text)
+		}
+	}
+	remoteResult, err := client.Chat(ctx, remote.ChatRequest{
+		Model:       model,
 		Prompt:      prompt,
 		Stream:      onDelta != nil,
 		Temperature: req.Temperature,
-	}, onDelta)
+	}, delta)
 	if err != nil {
-		return nil, err
+		return nil, emitted, err
 	}

 	result := &ChatResult{
 		Text:             remoteResult.Text,
-		Model:            valueOr(strings.TrimSpace(req.Model), "lingma"),
+		Model:            valueOr(strings.TrimSpace(model), "lingma"),
 		InputTokens:      remoteResult.InputTokens,
 		OutputTokens:     remoteResult.OutputTokens,
 		SessionID:        "",
@@ -370,9 +419,9 @@ func (s *Service) generateRemote(
 		Transport:        "remote",
 		EffectiveSession: SessionModeFresh,
 	}
-	s.applyToolEmulation(requestCtx, req, prompt, result, onDelta, func(hintPrompt string) (string, int, error) {
-		retryResult, retryErr := client.Chat(requestCtx, remote.ChatRequest{
-			Model:       req.Model,
+	s.applyToolEmulation(ctx, req, prompt, result, onDelta, func(hintPrompt string) (string, int, error) {
+		retryResult, retryErr := client.Chat(ctx, remote.ChatRequest{
+			Model:       model,
 			Prompt:      hintPrompt,
 			Stream:      onDelta != nil,
 			Temperature: req.Temperature,
@@ -385,7 +434,78 @@ func (s *Service) generateRemote(
 		}
 		return retryResult.Text, retryResult.OutputTokens, nil
 	})
-	return result, nil
+	return result, emitted, nil
+}
+
+func (s *Service) remoteAttemptModels(ctx context.Context, primary string) []string {
+	primary = normalizeModelForBackend(BackendRemote, primary)
+	models := []string{primary}
+	if !s.cfg.RemoteFallbackEnabled {
+		return models
+	}
+
+	availableCtx, cancel := context.WithTimeout(ctx, 15*time.Second)
+	remoteModels, err := s.remoteClientLocked().ListModels(availableCtx)
+	cancel()
+	if err != nil {
+		return models
+	}
+
+	available := make(map[string]bool, len(remoteModels))
+	for _, model := range remoteModels {
+		key := normalizeModelForBackend(BackendRemote, model.Key)
+		if key != "" {
+			available[key] = true
+		}
+	}
+
+	fallbackModels := s.cfg.RemoteFallbackModels
+	if len(fallbackModels) == 0 {
+		fallbackModels = DefaultRemoteFallbackModels()
+	}
+	ordered := make([]string, 0, len(fallbackModels))
+	seen := map[string]bool{primary: true}
+	primaryIndex := -1
+	for _, candidate := range fallbackModels {
+		model := normalizeModelForBackend(BackendRemote, candidate)
+		if model == "" {
+			continue
+		}
+		if model == primary && primaryIndex == -1 {
+			primaryIndex = len(ordered)
+		}
+		ordered = append(ordered, model)
+	}
+
+	start := 0
+	if primaryIndex >= 0 {
+		start = primaryIndex + 1
+	}
+	for _, model := range ordered[start:] {
+		if seen[model] || !available[model] {
+			continue
+		}
+		seen[model] = true
+		models = append(models, model)
+	}
+	return models
+}
+
+func isRemoteFallbackError(err error) bool {
+	if err == nil {
+		return false
+	}
+	if errors.Is(err, context.DeadlineExceeded) {
+		return true
+	}
+	msg := strings.ToLower(err.Error())
+	return strings.Contains(msg, "context deadline exceeded") ||
+		strings.Contains(msg, "client.timeout") ||
+		strings.Contains(msg, "timeout awaiting response") ||
+		strings.Contains(msg, "remote chat status 5") ||
+		strings.Contains(msg, "remote chat status 429") ||
+		strings.Contains(msg, "connection reset") ||
+		strings.Contains(msg, "unexpected eof")
 }

 func (s *Service) generateLocked(
--- a/lingma-ipc-proxy.macos.json
+++ b/lingma-ipc-proxy.macos.json
@@ -7,6 +7,15 @@
  "model": "kmodel",
  "shell_type": "zsh",
  "session_mode": "auto",
-  "timeout": 120,
+  "timeout": 300,
+  "remote_fallback_enabled": true,
+  "remote_fallback_models": [
+    "kmodel",
+    "mmodel",
+    "dashscope_qwen3_coder",
+    "dashscope_qmodel",
+    "dashscope_qwen_max_latest",
+    "dashscope_qwen_plus_20250428_thinking"
+  ],
  "cwd": "/Users/tiancheng"
 }