- Add SessionCache (LRU + TTL, per-API-key scoped) mapping conversation-prefix hash -> upstream Lingma sessionId. - Hash only user/system/developer turns so client-side assistant reformatting doesn't invalidate the key. - On cache hit: reuse sessionId, send only the latest user message with isReply=true, and stick the request to the instance that originally served it. - LingmaGatewayClient.chat_complete/chat_stream accept session_id/is_reply and report the real finish.sessionId via out_meta so we persist what Lingma actually allocated. - Invalidate cache on non-stream failure; skip writes on cancelled/partial streams. - Expose cache stats in /internal/stats and /metrics. - Configurable via SESSION_REUSE_ENABLED / SESSION_CACHE_MAX_ENTRIES / SESSION_CACHE_TTL_SEC (documented in README + .env.example). Made-with: Cursor
78 lines
2.6 KiB
Plaintext
78 lines
2.6 KiB
Plaintext
# 网关监听地址
|
||
HOST=0.0.0.0
|
||
# 网关监听端口
|
||
PORT=8317
|
||
# API Key,可配置多个(逗号分隔)
|
||
API_KEYS=sk-your-api-key
|
||
# 独立的 /metrics 鉴权 token(留空则退化为 API_KEYS 也可访问;若连 API_KEYS 都没配,/metrics 为公开)
|
||
METRICS_TOKEN=
|
||
# 日志级别(DEBUG / INFO / WARNING / ERROR)
|
||
LOG_LEVEL=INFO
|
||
|
||
# /v1/chat/completions 并发上限(<=0 表示不限流)
|
||
GATEWAY_MAX_IN_FLIGHT=4
|
||
# 排队等待超时秒数,超过后返回 429 + Retry-After
|
||
GATEWAY_QUEUE_TIMEOUT_SEC=30
|
||
|
||
# 容器内 Lingma 二进制路径
|
||
LINGMA_BIN=/app/data/bin/Lingma
|
||
# Lingma 获取方式:marketplace 或 vsix
|
||
LINGMA_SOURCE_TYPE=marketplace
|
||
# Marketplace 发布者
|
||
LINGMA_MARKETPLACE_PUBLISHER=Alibaba-Cloud
|
||
# Marketplace 扩展名
|
||
LINGMA_MARKETPLACE_EXTENSION=tongyi-lingma
|
||
# VSIX 下载地址(最新优先)
|
||
LINGMA_VSIX_URL=https://tongyi-code.oss-cn-hangzhou.aliyuncs.com/vscode/tongyi-lingma-latest.vsix
|
||
# 启动时总是尝试从 VSIX 刷新二进制
|
||
LINGMA_BOOTSTRAP_ALWAYS=true
|
||
# 强制刷新(true 时忽略本地缓存)
|
||
LINGMA_FORCE_REFRESH=false
|
||
# Lingma 工作目录(登录/会话数据)
|
||
LINGMA_WORK_DIR=/app/data/.lingma/vscode/sharedClientCache
|
||
# Lingma WebSocket 端口
|
||
LINGMA_SOCKET_PORT=36510
|
||
# Lingma 启动等待秒数
|
||
LINGMA_STARTUP_TIMEOUT=40
|
||
# 单次 RPC 超时秒数
|
||
LINGMA_RPC_TIMEOUT=30
|
||
|
||
# 默认模型(无法映射时使用)
|
||
DEFAULT_MODEL=org_auto
|
||
# 默认模式:chat 或 agent
|
||
DEFAULT_ASK_MODE=chat
|
||
|
||
# 专属域(可选)
|
||
DEDICATED_DOMAIN_URL=
|
||
|
||
# 未登录时是否自动登录
|
||
AUTO_LOGIN_ENABLED=true
|
||
# 自动登录是否无头浏览器
|
||
AUTO_LOGIN_HEADLESS=true
|
||
# 自动登录超时秒数
|
||
AUTO_LOGIN_TIMEOUT=180
|
||
# 自动登录重试次数
|
||
AUTO_LOGIN_MAX_RETRY=2
|
||
|
||
# Lingma 登录用户名(仅当 LINGMA_ACCOUNTS 为空时生效,单实例模式)
|
||
LINGMA_USERNAME=
|
||
# Lingma 登录密码(仅当 LINGMA_ACCOUNTS 为空时生效)
|
||
LINGMA_PASSWORD=
|
||
|
||
# ==== 多实例池(方案乙:多账号) ====
|
||
# 多账号列表,支持两种格式:
|
||
# CSV: user1:pass1,user2:pass2
|
||
# JSON: [{"username":"u1","password":"p1"},{"username":"u2","password":"p2"}]
|
||
# 配置后每个账号对应一个独立 Lingma 实例(独立 workDir + 独立自动登录)
|
||
LINGMA_ACCOUNTS=
|
||
# 实例数量:默认等于 LINGMA_ACCOUNTS 数;显式指定时账号不足会循环复用并打 warning
|
||
LINGMA_INSTANCE_COUNT=
|
||
|
||
# ==== 会话复用(多轮对话命中上游 KV cache,减少首 token 延迟) ====
|
||
# 开关(默认开)
|
||
SESSION_REUSE_ENABLED=true
|
||
# 最多缓存多少条会话 (LRU)
|
||
SESSION_CACHE_MAX_ENTRIES=256
|
||
# 会话 TTL 秒数;超时自动失效,避免 Lingma 侧早已回收还在命中
|
||
SESSION_CACHE_TTL_SEC=1800
|