feat: M1+M2 gateway hardening and multi-instance pool
Behavior hardening (M1):
- Fix `_chat_streams` memory leak: pop_stream on completion, error, and
client disconnect.
- Add WebSocket reconnect with state machine (stopped/starting/ready/
reconnecting/failed/closed) and exponential backoff, so a Lingma
restart no longer requires restarting the gateway.
- Lazy initialization: startup failure is non-fatal, first real request
triggers retry, `/healthz` reflects readiness.
- Migrate FastAPI on_event to lifespan.
- Structured JSON logging with request_id ContextVar; `x-request-id`
propagated to responses.
- SSE now sets `Cache-Control: no-cache`, `X-Accel-Buffering: no` to
defeat proxy buffering.
- OpenAI schema compatibility: `content` accepts str | list[parts] | None,
added `developer`/`function` roles, `tools/tool_choice/stream_options/
user/max_tokens` fields, and `stream_options.include_usage` emits final
usage chunk.
- `require_bearer` uses `hmac.compare_digest`; `/metrics` now requires
Bearer when `METRICS_TOKEN` or `API_KEYS` are set.
- Python 3.10/3.11 `TimeoutError` vs `asyncio.TimeoutError` unified.
- Error responses no longer leak `auto_login.status()` details.
Backpressure (M2 / A2):
- New `InFlightGuard` with per-request ticket, queue + rejection
accounting, `BackpressureRejected` raises 429 + `Retry-After` once
`GATEWAY_QUEUE_TIMEOUT_SEC` elapses.
- Streaming ticket ownership transfers to the generator so CancelledError
from client disconnect still releases the slot.
- `/internal/stats.concurrency` and `/metrics` expose in_flight/queued/
accepted_total/rejected_total/max_in_flight.
Multi-instance pool (M2 / A1 + B3):
- New `LingmaPool` with N processes, each with its own workDir, socket
port (dynamic when N>1), and `AutoLoginManager`.
- Account parser supports CSV (`u1:p1,u2:p2`) and JSON formats via
`LINGMA_ACCOUNTS`; falls back to `LINGMA_USERNAME/LINGMA_PASSWORD` for
backwards compatibility (N=1 keeps legacy paths/ports).
- Routing: sticky affinity by `user` / system-prompt hash, then
least-in-flight, finally round-robin fallback for unhealthy pool.
- `/healthz` reports per-instance state and ready count.
- `/internal/stats.pool` and `/metrics` expose per-instance
`gateway_pool_instance_in_flight{name}` / `gateway_pool_instance_ready{name}`.
- `/internal/auto-login/start?instance=inst-N` targets a specific instance;
`/internal/auto-login/status` lists all instances.
Compat notes:
- `.env.example` adds `METRICS_TOKEN`, `LOG_LEVEL`, `GATEWAY_MAX_IN_FLIGHT`,
`GATEWAY_QUEUE_TIMEOUT_SEC`, `LINGMA_ACCOUNTS`, `LINGMA_INSTANCE_COUNT`.
- `.gitignore` cleaned up data/ duplication.
- Existing single-instance deployments keep working without config change.
Made-with: Cursor
This commit is contained in:
@@ -1,8 +1,14 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
|
||||
@dataclass
|
||||
class LingmaAccount:
|
||||
username: str
|
||||
password: str
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -10,6 +16,10 @@ class Settings:
|
||||
host: str
|
||||
port: int
|
||||
api_keys: list[str]
|
||||
metrics_token: str
|
||||
log_level: str
|
||||
gateway_max_in_flight: int
|
||||
gateway_queue_timeout_sec: float
|
||||
lingma_bin: str
|
||||
lingma_work_dir: str
|
||||
lingma_socket_port: int
|
||||
@@ -22,8 +32,57 @@ class Settings:
|
||||
auto_login_headless: bool
|
||||
auto_login_timeout: int
|
||||
auto_login_max_retry: int
|
||||
lingma_username: str
|
||||
lingma_password: str
|
||||
accounts: list[LingmaAccount] = field(default_factory=list)
|
||||
instance_count: int = 1
|
||||
|
||||
|
||||
def _bool_env(name: str, default: bool) -> bool:
|
||||
raw = os.getenv(name)
|
||||
if raw is None:
|
||||
return default
|
||||
return raw.strip().lower() in {"1", "true", "yes", "on"}
|
||||
|
||||
|
||||
def _parse_accounts(raw: str) -> list[LingmaAccount]:
|
||||
"""Parse LINGMA_ACCOUNTS.
|
||||
|
||||
Accepted formats:
|
||||
- JSON array: `[{"username":"u1","password":"p1"},{"username":"u2","password":"p2"}]`
|
||||
- CSV: `u1:p1,u2:p2`
|
||||
- Newlines: `u1:p1\nu2:p2`
|
||||
|
||||
Whitespace around entries is trimmed. Empty entries are ignored.
|
||||
Passwords containing ':' are supported (only the first ':' is the separator).
|
||||
"""
|
||||
raw = (raw or "").strip()
|
||||
if not raw:
|
||||
return []
|
||||
|
||||
if raw.startswith("["):
|
||||
try:
|
||||
data = json.loads(raw)
|
||||
except Exception:
|
||||
return []
|
||||
out: list[LingmaAccount] = []
|
||||
if isinstance(data, list):
|
||||
for item in data:
|
||||
if isinstance(item, dict):
|
||||
u = str(item.get("username", "")).strip()
|
||||
p = str(item.get("password", "")).strip()
|
||||
if u and p:
|
||||
out.append(LingmaAccount(u, p))
|
||||
return out
|
||||
|
||||
out: list[LingmaAccount] = []
|
||||
for entry in raw.replace("\n", ",").split(","):
|
||||
entry = entry.strip()
|
||||
if not entry or ":" not in entry:
|
||||
continue
|
||||
u, p = entry.split(":", 1)
|
||||
u, p = u.strip(), p.strip()
|
||||
if u and p:
|
||||
out.append(LingmaAccount(u, p))
|
||||
return out
|
||||
|
||||
|
||||
def load_settings() -> Settings:
|
||||
@@ -33,10 +92,31 @@ def load_settings() -> Settings:
|
||||
"LINGMA_WORK_DIR",
|
||||
"/app/data/.lingma/vscode/sharedClientCache",
|
||||
)
|
||||
|
||||
accounts = _parse_accounts(os.getenv("LINGMA_ACCOUNTS", ""))
|
||||
if not accounts:
|
||||
u = os.getenv("LINGMA_USERNAME", "").strip()
|
||||
p = os.getenv("LINGMA_PASSWORD", "").strip()
|
||||
if u and p:
|
||||
accounts.append(LingmaAccount(u, p))
|
||||
|
||||
explicit_count = os.getenv("LINGMA_INSTANCE_COUNT", "").strip()
|
||||
if explicit_count:
|
||||
try:
|
||||
instance_count = max(1, int(explicit_count))
|
||||
except ValueError:
|
||||
instance_count = len(accounts) or 1
|
||||
else:
|
||||
instance_count = max(1, len(accounts)) if accounts else 1
|
||||
|
||||
return Settings(
|
||||
host=os.getenv("HOST", "0.0.0.0"),
|
||||
port=int(os.getenv("PORT", "8317")),
|
||||
api_keys=api_keys,
|
||||
metrics_token=os.getenv("METRICS_TOKEN", "").strip(),
|
||||
log_level=os.getenv("LOG_LEVEL", "INFO").strip() or "INFO",
|
||||
gateway_max_in_flight=int(os.getenv("GATEWAY_MAX_IN_FLIGHT", "4")),
|
||||
gateway_queue_timeout_sec=float(os.getenv("GATEWAY_QUEUE_TIMEOUT_SEC", "30")),
|
||||
lingma_bin=os.getenv("LINGMA_BIN", "/app/data/bin/Lingma"),
|
||||
lingma_work_dir=work_dir,
|
||||
lingma_socket_port=int(os.getenv("LINGMA_SOCKET_PORT", "36510")),
|
||||
@@ -45,10 +125,10 @@ def load_settings() -> Settings:
|
||||
default_model=os.getenv("DEFAULT_MODEL", "org_auto"),
|
||||
default_ask_mode=os.getenv("DEFAULT_ASK_MODE", "chat"),
|
||||
dedicated_domain_url=os.getenv("DEDICATED_DOMAIN_URL", "").strip(),
|
||||
auto_login_enabled=os.getenv("AUTO_LOGIN_ENABLED", "true").lower() in {"1", "true", "yes", "on"},
|
||||
auto_login_headless=os.getenv("AUTO_LOGIN_HEADLESS", "true").lower() in {"1", "true", "yes", "on"},
|
||||
auto_login_enabled=_bool_env("AUTO_LOGIN_ENABLED", True),
|
||||
auto_login_headless=_bool_env("AUTO_LOGIN_HEADLESS", True),
|
||||
auto_login_timeout=int(os.getenv("AUTO_LOGIN_TIMEOUT", "180")),
|
||||
auto_login_max_retry=int(os.getenv("AUTO_LOGIN_MAX_RETRY", "2")),
|
||||
lingma_username=os.getenv("LINGMA_USERNAME", "").strip(),
|
||||
lingma_password=os.getenv("LINGMA_PASSWORD", "").strip(),
|
||||
accounts=accounts,
|
||||
instance_count=instance_count,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user