feat: add Lingma OpenAI-compatible gateway service

2026-04-17 09:42:23 +08:00
commit 8139a5e97b
15 changed files with 1334 additions and 0 deletions
--- a/app/stats.py
+++ b/app/stats.py
@@ -0,0 +1,85 @@
+from __future__ import annotations
+
+import asyncio
+import time
+
+
+def estimate_tokens(text: str) -> int:
+    if not text:
+        return 0
+    # Heuristic only: roughly 1 token ~= 4 bytes.
+    return max(1, int(len(text.encode("utf-8")) / 4))
+
+
+class StatsCollector:
+    def __init__(self):
+        self._lock = asyncio.Lock()
+        self.started_at = int(time.time())
+        self.models_requests_total = 0
+        self.chat_requests_total = 0
+        self.chat_requests_success = 0
+        self.chat_requests_error = 0
+        self.chat_stream_requests = 0
+        self.chat_non_stream_requests = 0
+        self.prompt_tokens_estimated_total = 0
+        self.completion_tokens_estimated_total = 0
+
+    async def inc_models(self):
+        async with self._lock:
+            self.models_requests_total += 1
+
+    async def record_chat(self, *, stream: bool, success: bool, prompt_tokens: int, completion_tokens: int):
+        async with self._lock:
+            self.chat_requests_total += 1
+            if stream:
+                self.chat_stream_requests += 1
+            else:
+                self.chat_non_stream_requests += 1
+
+            if success:
+                self.chat_requests_success += 1
+            else:
+                self.chat_requests_error += 1
+
+            self.prompt_tokens_estimated_total += max(0, int(prompt_tokens))
+            self.completion_tokens_estimated_total += max(0, int(completion_tokens))
+
+    async def snapshot(self) -> dict:
+        async with self._lock:
+            total_tokens = self.prompt_tokens_estimated_total + self.completion_tokens_estimated_total
+            return {
+                "started_at": self.started_at,
+                "models_requests_total": self.models_requests_total,
+                "chat_requests_total": self.chat_requests_total,
+                "chat_requests_success": self.chat_requests_success,
+                "chat_requests_error": self.chat_requests_error,
+                "chat_stream_requests": self.chat_stream_requests,
+                "chat_non_stream_requests": self.chat_non_stream_requests,
+                "prompt_tokens_estimated_total": self.prompt_tokens_estimated_total,
+                "completion_tokens_estimated_total": self.completion_tokens_estimated_total,
+                "total_tokens_estimated": total_tokens,
+            }
+
+    async def prometheus_text(self) -> str:
+        s = await self.snapshot()
+        lines = [
+            "# TYPE gateway_models_requests_total counter",
+            f"gateway_models_requests_total {s['models_requests_total']}",
+            "# TYPE gateway_chat_requests_total counter",
+            f"gateway_chat_requests_total {s['chat_requests_total']}",
+            "# TYPE gateway_chat_requests_success counter",
+            f"gateway_chat_requests_success {s['chat_requests_success']}",
+            "# TYPE gateway_chat_requests_error counter",
+            f"gateway_chat_requests_error {s['chat_requests_error']}",
+            "# TYPE gateway_chat_stream_requests counter",
+            f"gateway_chat_stream_requests {s['chat_stream_requests']}",
+            "# TYPE gateway_chat_non_stream_requests counter",
+            f"gateway_chat_non_stream_requests {s['chat_non_stream_requests']}",
+            "# TYPE gateway_prompt_tokens_estimated_total counter",
+            f"gateway_prompt_tokens_estimated_total {s['prompt_tokens_estimated_total']}",
+            "# TYPE gateway_completion_tokens_estimated_total counter",
+            f"gateway_completion_tokens_estimated_total {s['completion_tokens_estimated_total']}",
+            "# TYPE gateway_total_tokens_estimated counter",
+            f"gateway_total_tokens_estimated {s['total_tokens_estimated']}",
+        ]
+        return "\n".join(lines) + "\n"