Files
lingma-openai-gateway/app/config.py
GitHub Actions dfdb7087dc perf: session reuse for multi-turn latency
- Add SessionCache (LRU + TTL, per-API-key scoped) mapping
  conversation-prefix hash -> upstream Lingma sessionId.
- Hash only user/system/developer turns so client-side
  assistant reformatting doesn't invalidate the key.
- On cache hit: reuse sessionId, send only the latest user
  message with isReply=true, and stick the request to the
  instance that originally served it.
- LingmaGatewayClient.chat_complete/chat_stream accept
  session_id/is_reply and report the real finish.sessionId
  via out_meta so we persist what Lingma actually allocated.
- Invalidate cache on non-stream failure; skip writes on
  cancelled/partial streams.
- Expose cache stats in /internal/stats and /metrics.
- Configurable via SESSION_REUSE_ENABLED / SESSION_CACHE_MAX_ENTRIES
  / SESSION_CACHE_TTL_SEC (documented in README + .env.example).

Made-with: Cursor
2026-04-18 08:10:39 +08:00

141 lines
4.7 KiB
Python

from __future__ import annotations
import json
import os
from dataclasses import dataclass, field
@dataclass
class LingmaAccount:
username: str
password: str
@dataclass
class Settings:
host: str
port: int
api_keys: list[str]
metrics_token: str
log_level: str
gateway_max_in_flight: int
gateway_queue_timeout_sec: float
lingma_bin: str
lingma_work_dir: str
lingma_socket_port: int
lingma_startup_timeout: int
lingma_rpc_timeout: int
default_model: str
default_ask_mode: str
dedicated_domain_url: str
auto_login_enabled: bool
auto_login_headless: bool
auto_login_timeout: int
auto_login_max_retry: int
accounts: list[LingmaAccount] = field(default_factory=list)
instance_count: int = 1
session_reuse_enabled: bool = True
session_cache_max_entries: int = 256
session_cache_ttl_sec: float = 1800.0
def _bool_env(name: str, default: bool) -> bool:
raw = os.getenv(name)
if raw is None:
return default
return raw.strip().lower() in {"1", "true", "yes", "on"}
def _parse_accounts(raw: str) -> list[LingmaAccount]:
"""Parse LINGMA_ACCOUNTS.
Accepted formats:
- JSON array: `[{"username":"u1","password":"p1"},{"username":"u2","password":"p2"}]`
- CSV: `u1:p1,u2:p2`
- Newlines: `u1:p1\nu2:p2`
Whitespace around entries is trimmed. Empty entries are ignored.
Passwords containing ':' are supported (only the first ':' is the separator).
"""
raw = (raw or "").strip()
if not raw:
return []
if raw.startswith("["):
try:
data = json.loads(raw)
except Exception:
return []
out: list[LingmaAccount] = []
if isinstance(data, list):
for item in data:
if isinstance(item, dict):
u = str(item.get("username", "")).strip()
p = str(item.get("password", "")).strip()
if u and p:
out.append(LingmaAccount(u, p))
return out
out: list[LingmaAccount] = []
for entry in raw.replace("\n", ",").split(","):
entry = entry.strip()
if not entry or ":" not in entry:
continue
u, p = entry.split(":", 1)
u, p = u.strip(), p.strip()
if u and p:
out.append(LingmaAccount(u, p))
return out
def load_settings() -> Settings:
keys_raw = os.getenv("API_KEYS", "")
api_keys = [k.strip() for k in keys_raw.split(",") if k.strip()]
work_dir = os.getenv(
"LINGMA_WORK_DIR",
"/app/data/.lingma/vscode/sharedClientCache",
)
accounts = _parse_accounts(os.getenv("LINGMA_ACCOUNTS", ""))
if not accounts:
u = os.getenv("LINGMA_USERNAME", "").strip()
p = os.getenv("LINGMA_PASSWORD", "").strip()
if u and p:
accounts.append(LingmaAccount(u, p))
explicit_count = os.getenv("LINGMA_INSTANCE_COUNT", "").strip()
if explicit_count:
try:
instance_count = max(1, int(explicit_count))
except ValueError:
instance_count = len(accounts) or 1
else:
instance_count = max(1, len(accounts)) if accounts else 1
return Settings(
host=os.getenv("HOST", "0.0.0.0"),
port=int(os.getenv("PORT", "8317")),
api_keys=api_keys,
metrics_token=os.getenv("METRICS_TOKEN", "").strip(),
log_level=os.getenv("LOG_LEVEL", "INFO").strip() or "INFO",
gateway_max_in_flight=int(os.getenv("GATEWAY_MAX_IN_FLIGHT", "4")),
gateway_queue_timeout_sec=float(os.getenv("GATEWAY_QUEUE_TIMEOUT_SEC", "30")),
lingma_bin=os.getenv("LINGMA_BIN", "/app/data/bin/Lingma"),
lingma_work_dir=work_dir,
lingma_socket_port=int(os.getenv("LINGMA_SOCKET_PORT", "36510")),
lingma_startup_timeout=int(os.getenv("LINGMA_STARTUP_TIMEOUT", "40")),
lingma_rpc_timeout=int(os.getenv("LINGMA_RPC_TIMEOUT", "30")),
default_model=os.getenv("DEFAULT_MODEL", "org_auto"),
default_ask_mode=os.getenv("DEFAULT_ASK_MODE", "chat"),
dedicated_domain_url=os.getenv("DEDICATED_DOMAIN_URL", "").strip(),
auto_login_enabled=_bool_env("AUTO_LOGIN_ENABLED", True),
auto_login_headless=_bool_env("AUTO_LOGIN_HEADLESS", True),
auto_login_timeout=int(os.getenv("AUTO_LOGIN_TIMEOUT", "180")),
auto_login_max_retry=int(os.getenv("AUTO_LOGIN_MAX_RETRY", "2")),
accounts=accounts,
instance_count=instance_count,
session_reuse_enabled=_bool_env("SESSION_REUSE_ENABLED", True),
session_cache_max_entries=int(os.getenv("SESSION_CACHE_MAX_ENTRIES", "256")),
session_cache_ttl_sec=float(os.getenv("SESSION_CACHE_TTL_SEC", "1800")),
)