lingma-openai-gateway/app/stats.py

from __future__ import annotations

import asyncio
import time


def estimate_tokens(text: str) -> int:
    if not text:
        return 0
    # Heuristic only: roughly 1 token ~= 4 bytes.
    return max(1, int(len(text.encode("utf-8")) / 4))


class StatsCollector:
    def __init__(self):
        self._lock = asyncio.Lock()
        self.started_at = int(time.time())
        self.models_requests_total = 0
        self.chat_requests_total = 0
        self.chat_requests_success = 0
        self.chat_requests_error = 0
        self.chat_stream_requests = 0
        self.chat_non_stream_requests = 0
        self.prompt_tokens_estimated_total = 0
        self.completion_tokens_estimated_total = 0

    async def inc_models(self):
        async with self._lock:
            self.models_requests_total += 1

    async def record_chat(self, *, stream: bool, success: bool, prompt_tokens: int, completion_tokens: int):
        async with self._lock:
            self.chat_requests_total += 1
            if stream:
                self.chat_stream_requests += 1
            else:
                self.chat_non_stream_requests += 1

            if success:
                self.chat_requests_success += 1
            else:
                self.chat_requests_error += 1

            self.prompt_tokens_estimated_total += max(0, int(prompt_tokens))
            self.completion_tokens_estimated_total += max(0, int(completion_tokens))

    async def snapshot(self) -> dict:
        async with self._lock:
            total_tokens = self.prompt_tokens_estimated_total + self.completion_tokens_estimated_total
            return {
                "started_at": self.started_at,
                "models_requests_total": self.models_requests_total,
                "chat_requests_total": self.chat_requests_total,
                "chat_requests_success": self.chat_requests_success,
                "chat_requests_error": self.chat_requests_error,
                "chat_stream_requests": self.chat_stream_requests,
                "chat_non_stream_requests": self.chat_non_stream_requests,
                "prompt_tokens_estimated_total": self.prompt_tokens_estimated_total,
                "completion_tokens_estimated_total": self.completion_tokens_estimated_total,
                "total_tokens_estimated": total_tokens,
            }

    async def prometheus_text(self) -> str:
        s = await self.snapshot()
        lines = [
            "# TYPE gateway_models_requests_total counter",
            f"gateway_models_requests_total {s['models_requests_total']}",
            "# TYPE gateway_chat_requests_total counter",
            f"gateway_chat_requests_total {s['chat_requests_total']}",
            "# TYPE gateway_chat_requests_success counter",
            f"gateway_chat_requests_success {s['chat_requests_success']}",
            "# TYPE gateway_chat_requests_error counter",
            f"gateway_chat_requests_error {s['chat_requests_error']}",
            "# TYPE gateway_chat_stream_requests counter",
            f"gateway_chat_stream_requests {s['chat_stream_requests']}",
            "# TYPE gateway_chat_non_stream_requests counter",
            f"gateway_chat_non_stream_requests {s['chat_non_stream_requests']}",
            "# TYPE gateway_prompt_tokens_estimated_total counter",
            f"gateway_prompt_tokens_estimated_total {s['prompt_tokens_estimated_total']}",
            "# TYPE gateway_completion_tokens_estimated_total counter",
            f"gateway_completion_tokens_estimated_total {s['completion_tokens_estimated_total']}",
            "# TYPE gateway_total_tokens_estimated counter",
            f"gateway_total_tokens_estimated {s['total_tokens_estimated']}",
        ]
        return "\n".join(lines) + "\n"