feat: add Lingma OpenAI-compatible gateway service
This commit is contained in:
85
app/stats.py
Normal file
85
app/stats.py
Normal file
@@ -0,0 +1,85 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import time
|
||||
|
||||
|
||||
def estimate_tokens(text: str) -> int:
|
||||
if not text:
|
||||
return 0
|
||||
# Heuristic only: roughly 1 token ~= 4 bytes.
|
||||
return max(1, int(len(text.encode("utf-8")) / 4))
|
||||
|
||||
|
||||
class StatsCollector:
|
||||
def __init__(self):
|
||||
self._lock = asyncio.Lock()
|
||||
self.started_at = int(time.time())
|
||||
self.models_requests_total = 0
|
||||
self.chat_requests_total = 0
|
||||
self.chat_requests_success = 0
|
||||
self.chat_requests_error = 0
|
||||
self.chat_stream_requests = 0
|
||||
self.chat_non_stream_requests = 0
|
||||
self.prompt_tokens_estimated_total = 0
|
||||
self.completion_tokens_estimated_total = 0
|
||||
|
||||
async def inc_models(self):
|
||||
async with self._lock:
|
||||
self.models_requests_total += 1
|
||||
|
||||
async def record_chat(self, *, stream: bool, success: bool, prompt_tokens: int, completion_tokens: int):
|
||||
async with self._lock:
|
||||
self.chat_requests_total += 1
|
||||
if stream:
|
||||
self.chat_stream_requests += 1
|
||||
else:
|
||||
self.chat_non_stream_requests += 1
|
||||
|
||||
if success:
|
||||
self.chat_requests_success += 1
|
||||
else:
|
||||
self.chat_requests_error += 1
|
||||
|
||||
self.prompt_tokens_estimated_total += max(0, int(prompt_tokens))
|
||||
self.completion_tokens_estimated_total += max(0, int(completion_tokens))
|
||||
|
||||
async def snapshot(self) -> dict:
|
||||
async with self._lock:
|
||||
total_tokens = self.prompt_tokens_estimated_total + self.completion_tokens_estimated_total
|
||||
return {
|
||||
"started_at": self.started_at,
|
||||
"models_requests_total": self.models_requests_total,
|
||||
"chat_requests_total": self.chat_requests_total,
|
||||
"chat_requests_success": self.chat_requests_success,
|
||||
"chat_requests_error": self.chat_requests_error,
|
||||
"chat_stream_requests": self.chat_stream_requests,
|
||||
"chat_non_stream_requests": self.chat_non_stream_requests,
|
||||
"prompt_tokens_estimated_total": self.prompt_tokens_estimated_total,
|
||||
"completion_tokens_estimated_total": self.completion_tokens_estimated_total,
|
||||
"total_tokens_estimated": total_tokens,
|
||||
}
|
||||
|
||||
async def prometheus_text(self) -> str:
|
||||
s = await self.snapshot()
|
||||
lines = [
|
||||
"# TYPE gateway_models_requests_total counter",
|
||||
f"gateway_models_requests_total {s['models_requests_total']}",
|
||||
"# TYPE gateway_chat_requests_total counter",
|
||||
f"gateway_chat_requests_total {s['chat_requests_total']}",
|
||||
"# TYPE gateway_chat_requests_success counter",
|
||||
f"gateway_chat_requests_success {s['chat_requests_success']}",
|
||||
"# TYPE gateway_chat_requests_error counter",
|
||||
f"gateway_chat_requests_error {s['chat_requests_error']}",
|
||||
"# TYPE gateway_chat_stream_requests counter",
|
||||
f"gateway_chat_stream_requests {s['chat_stream_requests']}",
|
||||
"# TYPE gateway_chat_non_stream_requests counter",
|
||||
f"gateway_chat_non_stream_requests {s['chat_non_stream_requests']}",
|
||||
"# TYPE gateway_prompt_tokens_estimated_total counter",
|
||||
f"gateway_prompt_tokens_estimated_total {s['prompt_tokens_estimated_total']}",
|
||||
"# TYPE gateway_completion_tokens_estimated_total counter",
|
||||
f"gateway_completion_tokens_estimated_total {s['completion_tokens_estimated_total']}",
|
||||
"# TYPE gateway_total_tokens_estimated counter",
|
||||
f"gateway_total_tokens_estimated {s['total_tokens_estimated']}",
|
||||
]
|
||||
return "\n".join(lines) + "\n"
|
||||
Reference in New Issue
Block a user