feat: add capability and admin introspection endpoints

Expose capability discovery plus admin-only config and request inspection endpoints so clients and operators can understand gateway behavior without reading code. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-12 14:30:08 +08:00
parent 94a8025ae5
commit b719bdeaa2
5 changed files with 780 additions and 93 deletions
--- a/app/main.py
+++ b/app/main.py
@@ -5,6 +5,7 @@ import hashlib
 import json
 import time
 import uuid
+from collections import deque
 from contextlib import asynccontextmanager
 from typing import Any

@@ -15,6 +16,7 @@ from .anthropic_schema import (
    AnthropicMessagesRequest,
    affinity_key_for_anthropic,
    anthropic_to_internal_messages,
+    flatten_anthropic_content,
 )
 from .auth import (
    AnthropicAuthError,
@@ -112,6 +114,8 @@ STREAMING_RESPONSE_HEADERS = {
    "Connection": "keep-alive",
 }

+_DEBUG_REQUEST_LOG: deque[dict[str, Any]] = deque(maxlen=100)
+

 def _require_pool() -> LingmaPool:
    if pool is None:
@@ -249,6 +253,63 @@ def _log_auth_posture() -> None:
        )


+def _safe_setting_value(key: str, value: Any) -> Any:
+    key_upper = key.upper()
+    if any(
+        marker in key_upper
+        for marker in {"KEY", "TOKEN", "PASSWORD", "SECRET", "BUNDLE"}
+    ):
+        if isinstance(value, list):
+            return ["***" for _ in value]
+        return "***"
+    return value
+
+
+def _redact_debug_value(path: tuple[str, ...], value: Any) -> Any:
+    if isinstance(value, dict):
+        return {
+            k: _redact_debug_value(path + (str(k).lower(),), v)
+            for k, v in value.items()
+        }
+    if isinstance(value, list):
+        return [_redact_debug_value(path + ("[]",), item) for item in value]
+    if isinstance(value, str):
+        lowered_path = "/".join(path)
+        if any(marker in lowered_path for marker in ("authorization", "x-api-key", "api_key", "token", "password", "secret", "session_bundle")):
+            return "***"
+        if value.startswith("data:"):
+            return "[redacted-data-url]"
+        if "session bundle" in value.lower():
+            return "[redacted-session-bundle]"
+        if any(part in {"args", "arguments"} for part in path) and len(value) > 2048:
+            return value[:1024] + "... [truncated]"
+    return value
+
+
+def _record_debug_request(protocol: str, path: str, body: dict[str, Any], request: Request) -> None:
+    _DEBUG_REQUEST_LOG.appendleft(
+        {
+            "timestamp": int(time.time()),
+            "protocol": protocol,
+            "path": path,
+            "request_id": request.headers.get("x-request-id", ""),
+            "body": _redact_debug_value((), body),
+        }
+    )
+
+
+@app.get("/internal/debug/requests", dependencies=[Depends(admin_auth_guard)])
+async def internal_debug_requests(limit: int = 20):
+    safe_limit = min(max(limit, 1), 100)
+    return JSONResponse(
+        content={
+            "ok": True,
+            "count": min(safe_limit, len(_DEBUG_REQUEST_LOG)),
+            "items": list(_DEBUG_REQUEST_LOG)[:safe_limit],
+        }
+    )
+
+
@app.get("/healthz")
 async def healthz():
    if pool is None:
@@ -267,6 +328,62 @@ async def healthz():
    }


+def _capabilities_payload() -> dict[str, Any]:
+    return {
+        "service": "lingma-openai-gateway",
+        "version": app.version,
+        "protocols": {
+            "openai": {
+                "models": True,
+                "chat_completions": True,
+                "responses": True,
+                "streaming": True,
+                "response_tool_calls": True,
+                "request_tools_forwarded": settings.tool_forward_enabled,
+            },
+            "anthropic": {
+                "messages": True,
+                "count_tokens": True,
+                "streaming": True,
+                "response_tool_use": True,
+                "request_tools_forwarded": settings.tool_forward_enabled,
+            },
+        },
+        "features": {
+            "session_reuse": {
+                "enabled": settings.session_reuse_enabled,
+                "cache_max_entries": settings.session_cache_max_entries,
+                "cache_ttl_sec": settings.session_cache_ttl_sec,
+            },
+            "tooling": {
+                "forward_enabled": settings.tool_forward_enabled,
+                "allowlist": settings.tool_allowlist,
+                "emulation_bridge_enabled": True,
+            },
+            "pool": {
+                "configured_instance_count": settings.instance_count,
+                "default_model": settings.default_model,
+                "default_ask_mode": settings.default_ask_mode,
+            },
+            "auth": {
+                "v1_requires_auth": bool(settings.api_keys),
+                "admin_token_configured": bool(settings.admin_token),
+                "metrics_public": settings.metrics_public,
+            },
+        },
+    }
+
+
+@app.get("/capabilities")
+async def capabilities():
+    return JSONResponse(content=_capabilities_payload())
+
+
+@app.get("/v1/capabilities", dependencies=[Depends(anthropic_auth_guard)])
+async def v1_capabilities():
+    return JSONResponse(content=_capabilities_payload())
+
+
 async def _ensure_instance_logged_in(inst: PoolInstance) -> dict:
    client = inst.client
    auto_login = inst.auto_login
@@ -433,6 +550,75 @@ def _messages_to_prompt(messages: list[dict]) -> str:
    return "\n".join(parts).strip()


+def _assistant_tool_calls_to_emulation_text(tool_calls: Any) -> str:
+    if not isinstance(tool_calls, list):
+        return ""
+    blocks: list[str] = []
+    for item in tool_calls:
+        if not isinstance(item, dict):
+            continue
+        fn = item.get("function") if isinstance(item.get("function"), dict) else None
+        name = str((fn or {}).get("name") or item.get("name") or "").strip()
+        if not name:
+            continue
+        arguments = (fn or {}).get("arguments")
+        if isinstance(arguments, str):
+            try:
+                arguments = json.loads(arguments)
+            except Exception:
+                arguments = {"raw": arguments}
+        if not isinstance(arguments, dict):
+            arguments = {}
+        blocks.append(
+            "```json action\n"
+            + json.dumps(
+                {"tool": name, "parameters": arguments}, ensure_ascii=False, indent=2
+            )
+            + "\n```"
+        )
+    return "\n\n".join(blocks)
+
+
+def _tool_action_block(name: str, arguments: dict[str, Any]) -> str:
+    return (
+        "```json action\n"
+        + json.dumps(
+            {"tool": name, "parameters": arguments}, ensure_ascii=False, indent=2
+        )
+        + "\n```"
+    )
+
+
+def _anthropic_flattened_tool_history_to_emulation_text(text: str) -> str:
+    if not text:
+        return ""
+    out: list[str] = []
+    for line in text.splitlines():
+        stripped = line.strip()
+        if stripped.startswith("[tool_use]"):
+            raw = stripped[len("[tool_use]") :].strip()
+            try:
+                payload = json.loads(raw)
+            except Exception:
+                out.append(line)
+                continue
+            if not isinstance(payload, dict):
+                out.append(line)
+                continue
+            name = str(payload.get("name") or "").strip()
+            arguments = payload.get("input")
+            if name and isinstance(arguments, dict):
+                out.append(_tool_action_block(name, arguments))
+            else:
+                out.append(line)
+            continue
+        if stripped.startswith("[tool_result]"):
+            out.append(action_output_prompt(None, stripped[len("[tool_result]") :].strip()))
+            continue
+        out.append(line)
+    return "\n".join(part for part in out if part).strip()
+
+
 def _messages_to_emulation_prompt(
    messages: list[dict[str, Any]],
    *,
@@ -446,6 +632,10 @@ def _messages_to_emulation_prompt(
        if role in {"system", "developer"}:
            continue
        text = flatten_content(message.get("content"))
+        if role == "assistant" and message.get("tool_calls"):
+            projected = _assistant_tool_calls_to_emulation_text(message.get("tool_calls"))
+            if projected:
+                text = "\n\n".join(part for part in [text, projected] if part)
        if role == "tool":
            text = action_output_prompt(message.get("tool_call_id"), text)
            role = "user"
@@ -472,6 +662,22 @@ def _messages_to_emulation_prompt(
    return "\n\n".join(parts).strip()


+def _effective_tool_config_for_emulation(
+    tool_config: dict[str, Any] | None,
+    *,
+    use_emulation: bool,
+) -> dict[str, Any] | None:
+    if use_emulation:
+        return None
+    return tool_config
+
+
+def _emulation_tools(raw_tools: list[dict[str, Any]] | None, tool_config: dict[str, Any] | None) -> list[dict[str, Any]] | None:
+    if isinstance(tool_config, dict) and isinstance(tool_config.get("tools"), list):
+        return tool_config.get("tools")
+    return raw_tools
+
+
 def _anthropic_messages_to_emulation_prompt(
    messages: list[dict[str, Any]],
    *,
@@ -483,6 +689,10 @@ def _anthropic_messages_to_emulation_prompt(
    for message in messages:
        role = str(message.get("role") or "").strip().lower()
        text = str(message.get("content") or "").strip()
+        if role == "assistant" and "[tool_use]" in text:
+            text = _anthropic_flattened_tool_history_to_emulation_text(text)
+        elif role == "user" and "[tool_result]" in text:
+            text = _anthropic_flattened_tool_history_to_emulation_text(text)
        if role == "tool":
            text = action_output_prompt(message.get("tool_call_id"), text)
            role = "user"
@@ -575,6 +785,7 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
    p = _require_pool()

    messages_dump = [m.model_dump() for m in req.messages]
+    _record_debug_request("openai", "/v1/chat/completions", req.model_dump(mode="json"), request)
    api_key = _extract_api_key(request) or "-"

    # ------------------------------------------------------------- session reuse
@@ -617,9 +828,11 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
    is_reply = execution.is_reply

    include_usage = _include_usage(req.stream_options)
-    em_tools = _em_extract_openai_tools(req.tools)
+    emulation_tools = _emulation_tools(req.tools, tool_config)
+    em_tools = _em_extract_openai_tools(emulation_tools)
    em_choice = _em_extract_openai_tool_choice(req.tool_choice)
-    if _em_has_tool_request(em_tools, em_choice):
+    use_emulation = has_tooling_context
+    if use_emulation:
        system_parts = [
            flatten_content(m.content)
            for m in req.messages
@@ -628,9 +841,14 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
        prompt = _messages_to_emulation_prompt(
            messages_dump,
            system_text="\n\n".join(system_parts),
-            tools=req.tools,
+            tools=emulation_tools,
            tool_choice=req.tool_choice,
        )
+        execution.prompt = prompt
+    effective_tool_config = _effective_tool_config_for_emulation(
+        tool_config,
+        use_emulation=use_emulation,
+    )

    try:
        started = await start_execution(
@@ -708,7 +926,7 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
                        ask_mode,
                        session_id=cached_session_id,
                        is_reply=is_reply,
-                        tool_config=tool_config,
+                        tool_config=effective_tool_config,
                        out_meta=_meta,
                    ):
                        if _stream_event_type(chunk) == "tool":
@@ -763,6 +981,8 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
                            continue
                        buffered_text_parts.append(text)
                        completion_tokens_holder["n"] += estimate_tokens(text)
+                        if use_emulation:
+                            continue
                        
                        full_text = "".join(buffered_text_parts)
                        if req.tools:
@@ -855,9 +1075,6 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
                            buffered_text_parts.clear()
                            yield f"data: {json.dumps(payload, ensure_ascii=False)}\n\n"

-                    if buffered_text_parts and forced_tool_name and saw_tool_call:
-                        buffered_text_parts.clear()
-
                    if buffered_text_parts and req.tools and not saw_tool_call:
                        merged_text = "".join(buffered_text_parts)
                        inferred = _infer_tool_event_from_declared_tools(
@@ -924,6 +1141,11 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
                                yield f"data: {json.dumps(payload, ensure_ascii=False)}\n\n"
                            buffered_text_parts = [remaining] if remaining else []

+                    if buffered_text_parts and saw_tool_call:
+                        text_to_yield = "".join(buffered_text_parts)
+                        buffered_text_parts.clear()
+                        yield _text_payload(text_to_yield)
+
                    done_payload = {
                        "id": completion_id,
                        "object": "chat.completion.chunk",
@@ -996,7 +1218,7 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
                protocol="chat",
                execution=execution,
                prompt_tokens=prompt_tokens,
-                tool_config=tool_config,
+                tool_config=effective_tool_config,
                logger=logger,
                stats_collector=stats_collector,
                session_cache=session_cache,
@@ -1095,7 +1317,7 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
                ask_mode,
                session_id=None,
                is_reply=False,
-                tool_config=tool_config,
+                tool_config=effective_tool_config,
            )
            retry_text = retry_result.get("text") or ""
            parsed_calls, remaining = parse_action_blocks(retry_text, em_tools)
@@ -1227,6 +1449,7 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
        )

    messages_dump = anthropic_to_internal_messages(req)
+    _record_debug_request("anthropic", "/v1/messages", req.model_dump(mode="json"), request)
    # Prefer the auth token actually accepted so session-cache bucketing is
    # consistent regardless of which auth header style the caller used.
    api_key = (
@@ -1284,16 +1507,23 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
    model = execution.model
    prompt = execution.prompt
    is_reply = execution.is_reply
-    em_anthropic_tools = _em_extract_anthropic_tools(req.tools)
+    emulation_tools = _emulation_tools(req.tools, tool_config)
+    em_anthropic_tools = _em_extract_anthropic_tools(emulation_tools)
    em_anthropic_choice = _em_extract_anthropic_tool_choice(req.tool_choice)
-    if _em_has_tool_request(em_anthropic_tools, em_anthropic_choice):
+    use_emulation = has_tooling_context
+    if use_emulation:
        system_text = flatten_anthropic_content(req.system) if req.system else ""
        prompt = _anthropic_messages_to_emulation_prompt(
            messages_dump,
            system_text=system_text,
-            tools=req.tools,
+            tools=emulation_tools,
            tool_choice=req.tool_choice,
        )
+        execution.prompt = prompt
+    effective_tool_config = _effective_tool_config_for_emulation(
+        tool_config,
+        use_emulation=use_emulation,
+    )

    try:
        started = await start_execution(
@@ -1372,7 +1602,7 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
                        ask_mode,
                        session_id=cached_session_id,
                        is_reply=is_reply,
-                        tool_config=tool_config,
+                        tool_config=effective_tool_config,
                        out_meta=_meta,
                    ):
                        if _stream_event_type(chunk) == "tool":
@@ -1703,7 +1933,7 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
                protocol="anthropic",
                execution=execution,
                prompt_tokens=prompt_tokens,
-                tool_config=tool_config,
+                tool_config=effective_tool_config,
                logger=logger,
                stats_collector=stats_collector,
                session_cache=session_cache,
@@ -1757,10 +1987,8 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
                text = remaining

        if not saw_tool_event and em_anthropic_tools:
-            inferred_call = infer_declared_tool_call_from_text(text, em_anthropic_tools)
-            if inferred_call is None:
-                inferred_calls = infer_tool_calls_from_text(text, em_anthropic_tools)
-                inferred_call = inferred_calls[0] if inferred_calls else None
+            inferred_calls = infer_tool_calls_from_text(text, em_anthropic_tools)
+            inferred_call = inferred_calls[0] if inferred_calls else None
            if inferred_call is not None:
                content_blocks = [
                    {
@@ -1774,7 +2002,7 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
                saw_pending_tool_use = True
                text = ""

-        if not saw_tool_event and em_anthropic_tools:
+        if not saw_tool_event and em_anthropic_tools and not text.strip():
            retry_prompt = f"{prompt}\n\n{force_tooling_prompt(em_anthropic_choice)}"
            retry_result = await inst.client.chat_complete(
                retry_prompt,
@@ -1782,53 +2010,7 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
                ask_mode,
                session_id=None,
                is_reply=False,
-                tool_config=tool_config,
-            )
-            retry_text = retry_result.get("text") or ""
-            parsed_calls, remaining = parse_action_blocks(retry_text, em_anthropic_tools)
-            if parsed_calls:
-                content_blocks = []
-                if remaining:
-                    content_blocks.append({"type": "text", "text": remaining})
-                for call in parsed_calls:
-                    content_blocks.append(
-                        {
-                            "type": "tool_use",
-                            "id": call.id,
-                            "name": call.name,
-                            "input": call.arguments,
-                        }
-                    )
-                saw_tool_event = True
-                saw_pending_tool_use = True
-                text = remaining
-            else:
-                inferred_call = infer_declared_tool_call_from_text(retry_text, em_anthropic_tools)
-                if inferred_call is None:
-                    inferred_calls = infer_tool_calls_from_text(retry_text, em_anthropic_tools)
-                    inferred_call = inferred_calls[0] if inferred_calls else None
-                if inferred_call is not None:
-                    content_blocks = [
-                        {
-                            "type": "tool_use",
-                            "id": inferred_call.id,
-                            "name": inferred_call.name,
-                            "input": inferred_call.arguments,
-                        }
-                    ]
-                    saw_tool_event = True
-                    saw_pending_tool_use = True
-                    text = ""
-
-        if not saw_tool_event and em_anthropic_tools and text.strip():
-            retry_prompt = f"{prompt}\n\n{force_tooling_prompt(em_anthropic_choice)}"
-            retry_result = await inst.client.chat_complete(
-                retry_prompt,
-                model,
-                ask_mode,
-                session_id=None,
-                is_reply=False,
-                tool_config=tool_config,
+                tool_config=effective_tool_config,
            )
            retry_text = retry_result.get("text") or ""
            parsed_calls, remaining = parse_action_blocks(retry_text, em_anthropic_tools)
@@ -2090,6 +2272,60 @@ async def internal_stats():
    }


+@app.get("/internal/effective-config", dependencies=[Depends(admin_auth_guard)])
+async def internal_effective_config():
+    cfg = settings
+    return JSONResponse(content={
+        "ok": True,
+        "settings": {
+            "host": cfg.host,
+            "port": cfg.port,
+            "api_keys": _safe_setting_value("api_keys", cfg.api_keys),
+            "metrics_token": _safe_setting_value("metrics_token", cfg.metrics_token),
+            "admin_token": _safe_setting_value("admin_token", cfg.admin_token),
+            "metrics_public": cfg.metrics_public,
+            "log_level": cfg.log_level,
+            "gateway_max_in_flight": cfg.gateway_max_in_flight,
+            "gateway_queue_timeout_sec": cfg.gateway_queue_timeout_sec,
+            "lingma_bin": cfg.lingma_bin,
+            "lingma_work_dir": cfg.lingma_work_dir,
+            "lingma_socket_port": cfg.lingma_socket_port,
+            "lingma_startup_timeout": cfg.lingma_startup_timeout,
+            "lingma_rpc_timeout": cfg.lingma_rpc_timeout,
+            "default_model": cfg.default_model,
+            "default_ask_mode": cfg.default_ask_mode,
+            "dedicated_domain_url": cfg.dedicated_domain_url,
+            "auto_login_enabled": cfg.auto_login_enabled,
+            "auto_login_headless": cfg.auto_login_headless,
+            "auto_login_timeout": cfg.auto_login_timeout,
+            "auto_login_max_retry": cfg.auto_login_max_retry,
+            "instance_count": cfg.instance_count,
+            "session_reuse_enabled": cfg.session_reuse_enabled,
+            "session_cache_max_entries": cfg.session_cache_max_entries,
+            "session_cache_ttl_sec": cfg.session_cache_ttl_sec,
+            "tool_forward_enabled": cfg.tool_forward_enabled,
+            "tool_allowlist": cfg.tool_allowlist,
+            "accounts": [
+                {
+                    "username": account.username,
+                    "password": _safe_setting_value("password", account.password),
+                    "session_bundle_b64": _safe_setting_value(
+                        "session_bundle_b64", account.session_bundle_b64
+                    ),
+                    "session_bundle_file": account.session_bundle_file,
+                }
+                for account in cfg.accounts
+            ],
+        },
+        "feature_flags": {
+            "tool_forward_enabled": cfg.tool_forward_enabled,
+            "session_reuse_enabled": cfg.session_reuse_enabled,
+            "metrics_public": cfg.metrics_public,
+            "auto_login_enabled": cfg.auto_login_enabled,
+        },
+    })
+
+
@app.get("/metrics", dependencies=[Depends(metrics_auth_guard)])
 async def metrics():
    base = await stats_collector.prometheus_text()