from __future__ import annotations import asyncio import time def estimate_tokens(text: str) -> int: if not text: return 0 # Heuristic only: roughly 1 token ~= 4 bytes. return max(1, int(len(text.encode("utf-8")) / 4)) class StatsCollector: def __init__(self): self._lock = asyncio.Lock() self.started_at = int(time.time()) self.models_requests_total = 0 self.chat_requests_total = 0 self.chat_requests_success = 0 self.chat_requests_error = 0 self.chat_stream_requests = 0 self.chat_non_stream_requests = 0 self.prompt_tokens_estimated_total = 0 self.completion_tokens_estimated_total = 0 async def inc_models(self): async with self._lock: self.models_requests_total += 1 async def record_chat(self, *, stream: bool, success: bool, prompt_tokens: int, completion_tokens: int): async with self._lock: self.chat_requests_total += 1 if stream: self.chat_stream_requests += 1 else: self.chat_non_stream_requests += 1 if success: self.chat_requests_success += 1 else: self.chat_requests_error += 1 self.prompt_tokens_estimated_total += max(0, int(prompt_tokens)) self.completion_tokens_estimated_total += max(0, int(completion_tokens)) async def snapshot(self) -> dict: async with self._lock: total_tokens = self.prompt_tokens_estimated_total + self.completion_tokens_estimated_total return { "started_at": self.started_at, "models_requests_total": self.models_requests_total, "chat_requests_total": self.chat_requests_total, "chat_requests_success": self.chat_requests_success, "chat_requests_error": self.chat_requests_error, "chat_stream_requests": self.chat_stream_requests, "chat_non_stream_requests": self.chat_non_stream_requests, "prompt_tokens_estimated_total": self.prompt_tokens_estimated_total, "completion_tokens_estimated_total": self.completion_tokens_estimated_total, "total_tokens_estimated": total_tokens, } async def prometheus_text(self) -> str: s = await self.snapshot() lines = [ "# TYPE gateway_models_requests_total counter", f"gateway_models_requests_total {s['models_requests_total']}", "# TYPE gateway_chat_requests_total counter", f"gateway_chat_requests_total {s['chat_requests_total']}", "# TYPE gateway_chat_requests_success counter", f"gateway_chat_requests_success {s['chat_requests_success']}", "# TYPE gateway_chat_requests_error counter", f"gateway_chat_requests_error {s['chat_requests_error']}", "# TYPE gateway_chat_stream_requests counter", f"gateway_chat_stream_requests {s['chat_stream_requests']}", "# TYPE gateway_chat_non_stream_requests counter", f"gateway_chat_non_stream_requests {s['chat_non_stream_requests']}", "# TYPE gateway_prompt_tokens_estimated_total counter", f"gateway_prompt_tokens_estimated_total {s['prompt_tokens_estimated_total']}", "# TYPE gateway_completion_tokens_estimated_total counter", f"gateway_completion_tokens_estimated_total {s['completion_tokens_estimated_total']}", "# TYPE gateway_total_tokens_estimated counter", f"gateway_total_tokens_estimated {s['total_tokens_estimated']}", ] return "\n".join(lines) + "\n"