prod hardening: admin/metrics authz split, subprocess lifecycle, parallel pool start, HEALTHCHECK
- authz: new ADMIN_TOKEN gates /internal/*; METRICS_PUBLIC=false by default, so /metrics returns 503 when neither METRICS_TOKEN nor API_KEYS is set (previously leaked pool topology). Startup logs loudly if API_KEYS is empty or admin falls back to chat keys. - lingma_client: keep a Popen handle instead of orphaning Lingma with start_new_session, drain stderr to logger at DEBUG, SIGTERM -> 5s grace -> SIGKILL on shutdown. Fixes the zombie-process leak on container reload. - pool: asyncio.gather to start N instances concurrently; N=2 pool shaves ~startup_timeout seconds off boot. - Dockerfile: HEALTHCHECK hits /healthz and greps for pool_ready>0 so Docker / compose orchestrators see "stuck on login" as unhealthy. Made-with: Cursor
This commit is contained in:
45
app/main.py
45
app/main.py
@@ -10,7 +10,7 @@ from contextlib import asynccontextmanager
|
||||
from fastapi import Depends, FastAPI, HTTPException, Request
|
||||
from fastapi.responses import JSONResponse, StreamingResponse
|
||||
|
||||
from .auth import require_bearer, require_metrics_access
|
||||
from .auth import require_admin_access, require_bearer, require_metrics_access
|
||||
from .concurrency import BackpressureRejected, InFlightGuard
|
||||
from .config import Settings, load_settings
|
||||
from .lingma_pool import LingmaPool, PoolInstance
|
||||
@@ -76,6 +76,7 @@ async def lifespan(_app: FastAPI):
|
||||
pool.size(),
|
||||
settings.gateway_max_in_flight,
|
||||
)
|
||||
_log_auth_posture()
|
||||
await pool.start()
|
||||
try:
|
||||
yield
|
||||
@@ -121,7 +122,37 @@ def auth_guard(request: Request):
|
||||
|
||||
|
||||
def metrics_auth_guard(request: Request):
|
||||
require_metrics_access(request, settings.api_keys, settings.metrics_token)
|
||||
require_metrics_access(
|
||||
request,
|
||||
settings.api_keys,
|
||||
settings.metrics_token,
|
||||
public=settings.metrics_public,
|
||||
)
|
||||
|
||||
|
||||
def admin_auth_guard(request: Request):
|
||||
require_admin_access(request, settings.api_keys, settings.admin_token)
|
||||
|
||||
|
||||
def _log_auth_posture() -> None:
|
||||
"""Loud warnings on misconfigured auth so ops can't miss them."""
|
||||
if not settings.api_keys:
|
||||
logger.warning(
|
||||
"AUTH DISABLED: API_KEYS is empty, /v1/* is wide open. "
|
||||
"Set API_KEYS before exposing this gateway to anything "
|
||||
"other than localhost."
|
||||
)
|
||||
if not settings.admin_token:
|
||||
logger.warning(
|
||||
"ADMIN_TOKEN not set: /internal/* reuses API_KEYS for auth. "
|
||||
"For production set a dedicated ADMIN_TOKEN so rotating chat "
|
||||
"keys doesn't require exporting the session bundle."
|
||||
)
|
||||
if settings.metrics_public:
|
||||
logger.warning(
|
||||
"METRICS_PUBLIC=true: /metrics is open. Only enable this "
|
||||
"when the gateway is behind a private-network scraper."
|
||||
)
|
||||
|
||||
|
||||
@app.get("/healthz")
|
||||
@@ -563,7 +594,7 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
|
||||
ticket.release()
|
||||
|
||||
|
||||
@app.post("/internal/auto-login/start", dependencies=[Depends(auth_guard)])
|
||||
@app.post("/internal/auto-login/start", dependencies=[Depends(admin_auth_guard)])
|
||||
async def internal_auto_login_start(instance: str | None = None):
|
||||
p = _require_pool()
|
||||
target = None
|
||||
@@ -614,7 +645,7 @@ async def internal_auto_login_start(instance: str | None = None):
|
||||
}
|
||||
|
||||
|
||||
@app.get("/internal/auto-login/status", dependencies=[Depends(auth_guard)])
|
||||
@app.get("/internal/auto-login/status", dependencies=[Depends(admin_auth_guard)])
|
||||
async def internal_auto_login_status():
|
||||
p = _require_pool()
|
||||
out = []
|
||||
@@ -634,7 +665,7 @@ async def internal_auto_login_status():
|
||||
return {"ok": True, "instances": out}
|
||||
|
||||
|
||||
@app.post("/internal/session/export", dependencies=[Depends(auth_guard)])
|
||||
@app.post("/internal/session/export", dependencies=[Depends(admin_auth_guard)])
|
||||
async def internal_session_export(instance: str | None = None):
|
||||
"""Export a logged-in Lingma session as a base64 tar.gz bundle.
|
||||
|
||||
@@ -693,7 +724,7 @@ async def internal_session_export(instance: str | None = None):
|
||||
}
|
||||
|
||||
|
||||
@app.get("/internal/models/raw", dependencies=[Depends(auth_guard)])
|
||||
@app.get("/internal/models/raw", dependencies=[Depends(admin_auth_guard)])
|
||||
async def internal_models_raw(instance: str | None = None):
|
||||
"""Return the raw `config/queryModels` response from Lingma.
|
||||
|
||||
@@ -723,7 +754,7 @@ async def internal_models_raw(instance: str | None = None):
|
||||
}
|
||||
|
||||
|
||||
@app.get("/internal/stats", dependencies=[Depends(auth_guard)])
|
||||
@app.get("/internal/stats", dependencies=[Depends(admin_auth_guard)])
|
||||
async def internal_stats():
|
||||
p = _require_pool()
|
||||
return {
|
||||
|
||||
Reference in New Issue
Block a user