prod hardening: admin/metrics authz split, subprocess lifecycle, parallel pool start, HEALTHCHECK

- authz: new ADMIN_TOKEN gates /internal/*; METRICS_PUBLIC=false by default, so /metrics returns 503 when neither METRICS_TOKEN nor API_KEYS is set (previously leaked pool topology). Startup logs loudly if API_KEYS is empty or admin falls back to chat keys. - lingma_client: keep a Popen handle instead of orphaning Lingma with start_new_session, drain stderr to logger at DEBUG, SIGTERM -> 5s grace -> SIGKILL on shutdown. Fixes the zombie-process leak on container reload. - pool: asyncio.gather to start N instances concurrently; N=2 pool shaves ~startup_timeout seconds off boot. - Dockerfile: HEALTHCHECK hits /healthz and greps for pool_ready>0 so Docker / compose orchestrators see "stuck on login" as unhealthy. Made-with: Cursor
2026-04-18 10:22:13 +08:00
parent 3130533888
commit 2febc37c2c
8 changed files with 248 additions and 28 deletions
--- a/app/lingma_client.py
+++ b/app/lingma_client.py
@@ -316,6 +316,11 @@ class LingmaGatewayClient:
        self._ready_event = asyncio.Event()
        self._reconnect_task: asyncio.Task | None = None
        self._last_error: str = ""
+        # Lingma subprocess handle. Kept so we can reap on shutdown and read
+        # stderr for debugging (pre-v0.4 we forked with DEVNULL + new_session
+        # which orphaned the process and hid crash logs).
+        self._proc: subprocess.Popen | None = None
+        self._stderr_task: asyncio.Task | None = None

    # ------------------------------------------------------------------ state

@@ -359,6 +364,76 @@ class LingmaGatewayClient:
        if self._ws:
            with contextlib.suppress(Exception):
                await self._ws.close()
+        await self._terminate_proc()
+        if self._stderr_task and not self._stderr_task.done():
+            self._stderr_task.cancel()
+            with contextlib.suppress(Exception):
+                await self._stderr_task
+
+    async def _drain_stderr(self, proc: subprocess.Popen) -> None:
+        """Mirror Lingma stderr to the logger at DEBUG level.
+
+        Running in a worker thread (readline is blocking) and dumping lines
+        through logger.debug means crashes like native-module load failures
+        are visible when LOG_LEVEL=DEBUG but don't spam production logs.
+        """
+        if proc.stderr is None:
+            return
+
+        name = self.name
+
+        def reader() -> None:
+            try:
+                for line in iter(proc.stderr.readline, b""):
+                    if not line:
+                        break
+                    text = line.decode("utf-8", errors="replace").rstrip()
+                    if text:
+                        logger.debug("[%s] lingma stderr: %s", name, text)
+            except Exception as exc:  # pragma: no cover -- defensive
+                logger.debug("[%s] stderr drain aborted: %s", name, exc)
+
+        try:
+            await asyncio.to_thread(reader)
+        except asyncio.CancelledError:
+            pass
+
+    async def _terminate_proc(self) -> None:
+        """Reap the Lingma subprocess we spawned.
+
+        SIGTERM first with a short grace period, then SIGKILL. Blocking waits
+        are off-loaded to a thread so they don't stall the FastAPI shutdown
+        event loop. Idempotent: safe to call even if nothing was spawned.
+        """
+        proc = self._proc
+        if proc is None:
+            return
+        self._proc = None
+        try:
+            if proc.poll() is None:
+                try:
+                    proc.terminate()
+                except Exception as exc:
+                    logger.warning("[%s] proc.terminate failed: %s", self.name, exc)
+                try:
+                    await asyncio.wait_for(asyncio.to_thread(proc.wait), timeout=5.0)
+                except TIMEOUT_EXCEPTIONS:
+                    logger.warning(
+                        "[%s] lingma (pid=%s) didn't exit in 5s, sending SIGKILL",
+                        self.name,
+                        proc.pid,
+                    )
+                    with contextlib.suppress(Exception):
+                        proc.kill()
+                    with contextlib.suppress(Exception):
+                        await asyncio.wait_for(
+                            asyncio.to_thread(proc.wait), timeout=3.0
+                        )
+        finally:
+            # Close stderr pipe so the drain thread can exit cleanly.
+            if proc.stderr is not None:
+                with contextlib.suppress(Exception):
+                    proc.stderr.close()

    async def ensure_ready(self, timeout: float | None = None) -> None:
        """Block until the RPC connection is usable, (re)connecting on demand."""
@@ -413,12 +488,26 @@ class LingmaGatewayClient:
                self.lingma_bin,
                self.work_dir,
            )
-            subprocess.Popen(
+            # Reap any old proc from a previous connect attempt before spawning
+            # a fresh one so we never accumulate zombie Lingma instances.
+            await self._terminate_proc()
+            if self._stderr_task and not self._stderr_task.done():
+                self._stderr_task.cancel()
+                with contextlib.suppress(Exception):
+                    await self._stderr_task
+                self._stderr_task = None
+
+            self._proc = subprocess.Popen(
                [str(self.lingma_bin), "start", "--workDir", str(self.work_dir)],
                cwd=str(self.lingma_bin.parent),
                stdout=subprocess.DEVNULL,
-                stderr=subprocess.DEVNULL,
-                start_new_session=True,
+                stderr=subprocess.PIPE,
+            )
+            logger.info(
+                "[%s] lingma spawned (pid=%s)", self.name, self._proc.pid
+            )
+            self._stderr_task = asyncio.create_task(
+                self._drain_stderr(self._proc)
            )
            info, _, _ = _wait_info_any(info_paths, timeout_sec=self.startup_timeout)
            self.socket_port = info