prod hardening: admin/metrics authz split, subprocess lifecycle, parallel pool start, HEALTHCHECK

- authz: new ADMIN_TOKEN gates /internal/*; METRICS_PUBLIC=false by default, so
  /metrics returns 503 when neither METRICS_TOKEN nor API_KEYS is set
  (previously leaked pool topology). Startup logs loudly if API_KEYS is empty
  or admin falls back to chat keys.
- lingma_client: keep a Popen handle instead of orphaning Lingma with
  start_new_session, drain stderr to logger at DEBUG, SIGTERM -> 5s grace ->
  SIGKILL on shutdown. Fixes the zombie-process leak on container reload.
- pool: asyncio.gather to start N instances concurrently; N=2 pool shaves
  ~startup_timeout seconds off boot.
- Dockerfile: HEALTHCHECK hits /healthz and greps for pool_ready>0 so Docker
  / compose orchestrators see "stuck on login" as unhealthy.

Made-with: Cursor
This commit is contained in:
GitHub Actions
2026-04-18 10:22:13 +08:00
parent 3130533888
commit 2febc37c2c
8 changed files with 248 additions and 28 deletions

View File

@@ -10,7 +10,7 @@ from contextlib import asynccontextmanager
from fastapi import Depends, FastAPI, HTTPException, Request
from fastapi.responses import JSONResponse, StreamingResponse
from .auth import require_bearer, require_metrics_access
from .auth import require_admin_access, require_bearer, require_metrics_access
from .concurrency import BackpressureRejected, InFlightGuard
from .config import Settings, load_settings
from .lingma_pool import LingmaPool, PoolInstance
@@ -76,6 +76,7 @@ async def lifespan(_app: FastAPI):
pool.size(),
settings.gateway_max_in_flight,
)
_log_auth_posture()
await pool.start()
try:
yield
@@ -121,7 +122,37 @@ def auth_guard(request: Request):
def metrics_auth_guard(request: Request):
require_metrics_access(request, settings.api_keys, settings.metrics_token)
require_metrics_access(
request,
settings.api_keys,
settings.metrics_token,
public=settings.metrics_public,
)
def admin_auth_guard(request: Request):
require_admin_access(request, settings.api_keys, settings.admin_token)
def _log_auth_posture() -> None:
"""Loud warnings on misconfigured auth so ops can't miss them."""
if not settings.api_keys:
logger.warning(
"AUTH DISABLED: API_KEYS is empty, /v1/* is wide open. "
"Set API_KEYS before exposing this gateway to anything "
"other than localhost."
)
if not settings.admin_token:
logger.warning(
"ADMIN_TOKEN not set: /internal/* reuses API_KEYS for auth. "
"For production set a dedicated ADMIN_TOKEN so rotating chat "
"keys doesn't require exporting the session bundle."
)
if settings.metrics_public:
logger.warning(
"METRICS_PUBLIC=true: /metrics is open. Only enable this "
"when the gateway is behind a private-network scraper."
)
@app.get("/healthz")
@@ -563,7 +594,7 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
ticket.release()
@app.post("/internal/auto-login/start", dependencies=[Depends(auth_guard)])
@app.post("/internal/auto-login/start", dependencies=[Depends(admin_auth_guard)])
async def internal_auto_login_start(instance: str | None = None):
p = _require_pool()
target = None
@@ -614,7 +645,7 @@ async def internal_auto_login_start(instance: str | None = None):
}
@app.get("/internal/auto-login/status", dependencies=[Depends(auth_guard)])
@app.get("/internal/auto-login/status", dependencies=[Depends(admin_auth_guard)])
async def internal_auto_login_status():
p = _require_pool()
out = []
@@ -634,7 +665,7 @@ async def internal_auto_login_status():
return {"ok": True, "instances": out}
@app.post("/internal/session/export", dependencies=[Depends(auth_guard)])
@app.post("/internal/session/export", dependencies=[Depends(admin_auth_guard)])
async def internal_session_export(instance: str | None = None):
"""Export a logged-in Lingma session as a base64 tar.gz bundle.
@@ -693,7 +724,7 @@ async def internal_session_export(instance: str | None = None):
}
@app.get("/internal/models/raw", dependencies=[Depends(auth_guard)])
@app.get("/internal/models/raw", dependencies=[Depends(admin_auth_guard)])
async def internal_models_raw(instance: str | None = None):
"""Return the raw `config/queryModels` response from Lingma.
@@ -723,7 +754,7 @@ async def internal_models_raw(instance: str | None = None):
}
@app.get("/internal/stats", dependencies=[Depends(auth_guard)])
@app.get("/internal/stats", dependencies=[Depends(admin_auth_guard)])
async def internal_stats():
p = _require_pool()
return {