diff --git a/app/lingma_client.py b/app/lingma_client.py
index 9c4c80d..b5fe213 100644
--- a/app/lingma_client.py
+++ b/app/lingma_client.py
@@ -568,6 +568,17 @@ class LingmaGatewayClient:
             },
         }
 
+    async def _kick_chat_ask(self, payload: dict) -> None:
+        """Fire chat/ask as a notification.
+
+        Lingma streams answers back via `chat/answer` + `chat/finish` and never
+        returns a JSON-RPC `result` for `chat/ask`. Waiting for one wasted
+        `rpc_timeout` seconds before the first byte could leave the gateway —
+        matching our previous 30s TTFB bug. `notify` sidesteps that entirely
+        by not registering a pending future.
+        """
+        await self.rpc.notify("chat/ask", payload)
+
     async def chat_complete(self, prompt: str, model_key: str, ask_mode: str) -> dict:
         await self.ensure_ready()
         request_id = str(uuid.uuid4())
@@ -575,13 +586,10 @@ class LingmaGatewayClient:
         payload = self._build_payload(prompt, model_key, ask_mode, session_id, request_id)
         self.rpc.create_stream(request_id)
         try:
-            try:
-                await self.rpc.request("chat/ask", payload, timeout=self.rpc_timeout)
-            except TIMEOUT_EXCEPTIONS:
-                # chat/ask often returns nothing until chat/finish arrives; tolerate.
-                pass
+            await self._kick_chat_ask(payload)
+            # Consume until chat/finish closes the stream or the upstream idles.
             async for _ in self.rpc.consume_stream(
-                request_id, timeout=max(20.0, self.rpc_timeout + 20.0)
+                request_id, timeout=max(60.0, self.rpc_timeout + 30.0)
             ):
                 pass
             result = self.rpc.get_stream_result(request_id)
@@ -601,12 +609,9 @@ class LingmaGatewayClient:
         payload = self._build_payload(prompt, model_key, ask_mode, session_id, request_id)
         self.rpc.create_stream(request_id)
         try:
-            try:
-                await self.rpc.request("chat/ask", payload, timeout=self.rpc_timeout)
-            except TIMEOUT_EXCEPTIONS:
-                pass
+            await self._kick_chat_ask(payload)
             async for chunk in self.rpc.consume_stream(
-                request_id, timeout=max(20.0, self.rpc_timeout + 40.0)
+                request_id, timeout=max(60.0, self.rpc_timeout + 60.0)
             ):
                 yield chunk
         finally: