feat: add capability and admin introspection endpoints

Expose capability discovery plus admin-only config and request inspection endpoints so clients and operators can understand gateway behavior without reading code. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-12 14:30:08 +08:00
parent 94a8025ae5
commit b719bdeaa2
5 changed files with 780 additions and 93 deletions
--- a/README.md
+++ b/README.md
@@ -4,6 +4,8 @@
 - OpenAI：`/v1/models`、`/v1/chat/completions`（含 stream）
 - Anthropic：`/v1/messages`、`/v1/messages/count_tokens`（含 stream）
 - 能力探测：`/capabilities`、`/v1/capabilities`
 - 内省端点：`/internal/effective-config`、`/internal/debug/requests`
 - 内置：多实例池、会话复用、Prometheus 指标、登录态 bundle 注入
 - 工具事件桥接：Lingma 上游返回 `tool` 事件时，网关会输出为 OpenAI `tool_calls`（stream/non-stream）和 Anthropic `tool_use` / `tool_result`（stream/non-stream）；请求侧 `tools` / `tool_choice` 仅在 `TOOL_FORWARD_ENABLED=true` 时透传（默认开启，可显式关闭）
 - 工具模拟回退：当 Lingma 未稳定外显原生 `tool/*` 事件时，网关会把注入后的 `json action` / `#Tool Call` 等动作文本归一化为 OpenAI `tool_calls`，并支持 tool result continuation
@@ -56,6 +58,7 @@ API_KEY=$(grep '^API_KEYS=' .env | cut -d= -f2 | cut -d, -f1)
 curl -s "http://127.0.0.1:${PORT}/healthz"
 curl -s "http://127.0.0.1:${PORT}/v1/models" \
  -H "Authorization: Bearer ${API_KEY}"
 curl -s "http://127.0.0.1:${PORT}/capabilities"
 ```
 ---
@@ -172,6 +175,32 @@ curl -s "http://127.0.0.1:${PORT}/v1/messages/count_tokens" \
  }'
 ```
 ### 能力探测
 ```bash
 curl -s "http://127.0.0.1:${PORT}/capabilities"
 curl -s "http://127.0.0.1:${PORT}/v1/capabilities" \
  -H "x-api-key: ${API_KEY}" \
  -H "anthropic-version: 2023-06-01"
 ```
 ### 内省端点（admin）
 如果配置了 `ADMIN_TOKEN`，以下端点需要使用该 token；否则会回退复用 `API_KEYS`。
 ```bash
 ADMIN_TOKEN=${ADMIN_TOKEN:-$API_KEY}
 curl -s "http://127.0.0.1:${PORT}/internal/effective-config" \
  -H "Authorization: Bearer ${ADMIN_TOKEN}"
 curl -s "http://127.0.0.1:${PORT}/internal/debug/requests?limit=5" \
  -H "Authorization: Bearer ${ADMIN_TOKEN}"
 ```
 > `internal/debug/requests` 会对 token、session bundle、data URL 图片和超长工具参数做脱敏/截断。
 ---
 ## 部署与更新
--- a/app/main.py
+++ b/app/main.py
@@ -5,6 +5,7 @@ import hashlib
 import json
 import time
 import uuid
 from collections import deque
 from contextlib import asynccontextmanager
 from typing import Any
@@ -15,6 +16,7 @@ from .anthropic_schema import (
    AnthropicMessagesRequest,
    affinity_key_for_anthropic,
    anthropic_to_internal_messages,
    flatten_anthropic_content,
 )
 from .auth import (
    AnthropicAuthError,
@@ -112,6 +114,8 @@ STREAMING_RESPONSE_HEADERS = {
    "Connection": "keep-alive",
 }
 _DEBUG_REQUEST_LOG: deque[dict[str, Any]] = deque(maxlen=100)
 def _require_pool() -> LingmaPool:
    if pool is None:
@@ -249,6 +253,63 @@ def _log_auth_posture() -> None:
        )
 def _safe_setting_value(key: str, value: Any) -> Any:
    key_upper = key.upper()
    if any(
        marker in key_upper
        for marker in {"KEY", "TOKEN", "PASSWORD", "SECRET", "BUNDLE"}
    ):
        if isinstance(value, list):
            return ["***" for _ in value]
        return "***"
    return value
 def _redact_debug_value(path: tuple[str, ...], value: Any) -> Any:
    if isinstance(value, dict):
        return {
            k: _redact_debug_value(path + (str(k).lower(),), v)
            for k, v in value.items()
        }
    if isinstance(value, list):
        return [_redact_debug_value(path + ("[]",), item) for item in value]
    if isinstance(value, str):
        lowered_path = "/".join(path)
        if any(marker in lowered_path for marker in ("authorization", "x-api-key", "api_key", "token", "password", "secret", "session_bundle")):
            return "***"
        if value.startswith("data:"):
            return "[redacted-data-url]"
        if "session bundle" in value.lower():
            return "[redacted-session-bundle]"
        if any(part in {"args", "arguments"} for part in path) and len(value) > 2048:
            return value[:1024] + "... [truncated]"
    return value
 def _record_debug_request(protocol: str, path: str, body: dict[str, Any], request: Request) -> None:
    _DEBUG_REQUEST_LOG.appendleft(
        {
            "timestamp": int(time.time()),
            "protocol": protocol,
            "path": path,
            "request_id": request.headers.get("x-request-id", ""),
            "body": _redact_debug_value((), body),
        }
    )
@app.get("/internal/debug/requests", dependencies=[Depends(admin_auth_guard)])
 async def internal_debug_requests(limit: int = 20):
    safe_limit = min(max(limit, 1), 100)
    return JSONResponse(
        content={
            "ok": True,
            "count": min(safe_limit, len(_DEBUG_REQUEST_LOG)),
            "items": list(_DEBUG_REQUEST_LOG)[:safe_limit],
        }
    )
@app.get("/healthz")
 async def healthz():
    if pool is None:
@@ -267,6 +328,62 @@ async def healthz():
    }
 def _capabilities_payload() -> dict[str, Any]:
    return {
        "service": "lingma-openai-gateway",
        "version": app.version,
        "protocols": {
            "openai": {
                "models": True,
                "chat_completions": True,
                "responses": True,
                "streaming": True,
                "response_tool_calls": True,
                "request_tools_forwarded": settings.tool_forward_enabled,
            },
            "anthropic": {
                "messages": True,
                "count_tokens": True,
                "streaming": True,
                "response_tool_use": True,
                "request_tools_forwarded": settings.tool_forward_enabled,
            },
        },
        "features": {
            "session_reuse": {
                "enabled": settings.session_reuse_enabled,
                "cache_max_entries": settings.session_cache_max_entries,
                "cache_ttl_sec": settings.session_cache_ttl_sec,
            },
            "tooling": {
                "forward_enabled": settings.tool_forward_enabled,
                "allowlist": settings.tool_allowlist,
                "emulation_bridge_enabled": True,
            },
            "pool": {
                "configured_instance_count": settings.instance_count,
                "default_model": settings.default_model,
                "default_ask_mode": settings.default_ask_mode,
            },
            "auth": {
                "v1_requires_auth": bool(settings.api_keys),
                "admin_token_configured": bool(settings.admin_token),
                "metrics_public": settings.metrics_public,
            },
        },
    }
@app.get("/capabilities")
 async def capabilities():
    return JSONResponse(content=_capabilities_payload())
@app.get("/v1/capabilities", dependencies=[Depends(anthropic_auth_guard)])
 async def v1_capabilities():
    return JSONResponse(content=_capabilities_payload())
 async def _ensure_instance_logged_in(inst: PoolInstance) -> dict:
    client = inst.client
    auto_login = inst.auto_login
@@ -433,6 +550,75 @@ def _messages_to_prompt(messages: list[dict]) -> str:
    return "\n".join(parts).strip()
 def _assistant_tool_calls_to_emulation_text(tool_calls: Any) -> str:
    if not isinstance(tool_calls, list):
        return ""
    blocks: list[str] = []
    for item in tool_calls:
        if not isinstance(item, dict):
            continue
        fn = item.get("function") if isinstance(item.get("function"), dict) else None
        name = str((fn or {}).get("name") or item.get("name") or "").strip()
        if not name:
            continue
        arguments = (fn or {}).get("arguments")
        if isinstance(arguments, str):
            try:
                arguments = json.loads(arguments)
            except Exception:
                arguments = {"raw": arguments}
        if not isinstance(arguments, dict):
            arguments = {}
        blocks.append(
            "```json action\n"
            + json.dumps(
                {"tool": name, "parameters": arguments}, ensure_ascii=False, indent=2
            )
            + "\n```"
        )
    return "\n\n".join(blocks)
 def _tool_action_block(name: str, arguments: dict[str, Any]) -> str:
    return (
        "```json action\n"
        + json.dumps(
            {"tool": name, "parameters": arguments}, ensure_ascii=False, indent=2
        )
        + "\n```"
    )
 def _anthropic_flattened_tool_history_to_emulation_text(text: str) -> str:
    if not text:
        return ""
    out: list[str] = []
    for line in text.splitlines():
        stripped = line.strip()
        if stripped.startswith("[tool_use]"):
            raw = stripped[len("[tool_use]") :].strip()
            try:
                payload = json.loads(raw)
            except Exception:
                out.append(line)
                continue
            if not isinstance(payload, dict):
                out.append(line)
                continue
            name = str(payload.get("name") or "").strip()
            arguments = payload.get("input")
            if name and isinstance(arguments, dict):
                out.append(_tool_action_block(name, arguments))
            else:
                out.append(line)
            continue
        if stripped.startswith("[tool_result]"):
            out.append(action_output_prompt(None, stripped[len("[tool_result]") :].strip()))
            continue
        out.append(line)
    return "\n".join(part for part in out if part).strip()
 def _messages_to_emulation_prompt(
    messages: list[dict[str, Any]],
    *,
@@ -446,6 +632,10 @@ def _messages_to_emulation_prompt(
        if role in {"system", "developer"}:
            continue
        text = flatten_content(message.get("content"))
        if role == "assistant" and message.get("tool_calls"):
            projected = _assistant_tool_calls_to_emulation_text(message.get("tool_calls"))
            if projected:
                text = "\n\n".join(part for part in [text, projected] if part)
        if role == "tool":
            text = action_output_prompt(message.get("tool_call_id"), text)
            role = "user"
@@ -472,6 +662,22 @@ def _messages_to_emulation_prompt(
    return "\n\n".join(parts).strip()
 def _effective_tool_config_for_emulation(
    tool_config: dict[str, Any] | None,
    *,
    use_emulation: bool,
 ) -> dict[str, Any] | None:
    if use_emulation:
        return None
    return tool_config
 def _emulation_tools(raw_tools: list[dict[str, Any]] | None, tool_config: dict[str, Any] | None) -> list[dict[str, Any]] | None:
    if isinstance(tool_config, dict) and isinstance(tool_config.get("tools"), list):
        return tool_config.get("tools")
    return raw_tools
 def _anthropic_messages_to_emulation_prompt(
    messages: list[dict[str, Any]],
    *,
@@ -483,6 +689,10 @@ def _anthropic_messages_to_emulation_prompt(
    for message in messages:
        role = str(message.get("role") or "").strip().lower()
        text = str(message.get("content") or "").strip()
        if role == "assistant" and "[tool_use]" in text:
            text = _anthropic_flattened_tool_history_to_emulation_text(text)
        elif role == "user" and "[tool_result]" in text:
            text = _anthropic_flattened_tool_history_to_emulation_text(text)
        if role == "tool":
            text = action_output_prompt(message.get("tool_call_id"), text)
            role = "user"
@@ -575,6 +785,7 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
    p = _require_pool()
    messages_dump = [m.model_dump() for m in req.messages]
    _record_debug_request("openai", "/v1/chat/completions", req.model_dump(mode="json"), request)
    api_key = _extract_api_key(request) or "-"
    # ------------------------------------------------------------- session reuse
@@ -617,9 +828,11 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
    is_reply = execution.is_reply
    include_usage = _include_usage(req.stream_options)
-    em_tools = _em_extract_openai_tools(req.tools)
+    emulation_tools = _emulation_tools(req.tools, tool_config)
    em_tools = _em_extract_openai_tools(emulation_tools)
    em_choice = _em_extract_openai_tool_choice(req.tool_choice)
-    if _em_has_tool_request(em_tools, em_choice):
+    use_emulation = has_tooling_context
    if use_emulation:
        system_parts = [
            flatten_content(m.content)
            for m in req.messages
@@ -628,9 +841,14 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
        prompt = _messages_to_emulation_prompt(
            messages_dump,
            system_text="\n\n".join(system_parts),
-            tools=req.tools,
+            tools=emulation_tools,
            tool_choice=req.tool_choice,
        )
        execution.prompt = prompt
    effective_tool_config = _effective_tool_config_for_emulation(
        tool_config,
        use_emulation=use_emulation,
    )
    try:
        started = await start_execution(
@@ -708,7 +926,7 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
                        ask_mode,
                        session_id=cached_session_id,
                        is_reply=is_reply,
-                        tool_config=tool_config,
+                        tool_config=effective_tool_config,
                        out_meta=_meta,
                    ):
                        if _stream_event_type(chunk) == "tool":
@@ -763,6 +981,8 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
                            continue
                        buffered_text_parts.append(text)
                        completion_tokens_holder["n"] += estimate_tokens(text)
                        if use_emulation:
                            continue
                        full_text = "".join(buffered_text_parts)
                        if req.tools:
@@ -855,9 +1075,6 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
                            buffered_text_parts.clear()
                            yield f"data: {json.dumps(payload, ensure_ascii=False)}\n\n"
                    if buffered_text_parts and forced_tool_name and saw_tool_call:
                        buffered_text_parts.clear()
                    if buffered_text_parts and req.tools and not saw_tool_call:
                        merged_text = "".join(buffered_text_parts)
                        inferred = _infer_tool_event_from_declared_tools(
@@ -924,6 +1141,11 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
                                yield f"data: {json.dumps(payload, ensure_ascii=False)}\n\n"
                            buffered_text_parts = [remaining] if remaining else []
                    if buffered_text_parts and saw_tool_call:
                        text_to_yield = "".join(buffered_text_parts)
                        buffered_text_parts.clear()
                        yield _text_payload(text_to_yield)
                    done_payload = {
                        "id": completion_id,
                        "object": "chat.completion.chunk",
@@ -996,7 +1218,7 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
                protocol="chat",
                execution=execution,
                prompt_tokens=prompt_tokens,
-                tool_config=tool_config,
+                tool_config=effective_tool_config,
                logger=logger,
                stats_collector=stats_collector,
                session_cache=session_cache,
@@ -1095,7 +1317,7 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
                ask_mode,
                session_id=None,
                is_reply=False,
-                tool_config=tool_config,
+                tool_config=effective_tool_config,
            )
            retry_text = retry_result.get("text") or ""
            parsed_calls, remaining = parse_action_blocks(retry_text, em_tools)
@@ -1227,6 +1449,7 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
        )
    messages_dump = anthropic_to_internal_messages(req)
    _record_debug_request("anthropic", "/v1/messages", req.model_dump(mode="json"), request)
    # Prefer the auth token actually accepted so session-cache bucketing is
    # consistent regardless of which auth header style the caller used.
    api_key = (
@@ -1284,16 +1507,23 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
    model = execution.model
    prompt = execution.prompt
    is_reply = execution.is_reply
-    em_anthropic_tools = _em_extract_anthropic_tools(req.tools)
+    emulation_tools = _emulation_tools(req.tools, tool_config)
    em_anthropic_tools = _em_extract_anthropic_tools(emulation_tools)
    em_anthropic_choice = _em_extract_anthropic_tool_choice(req.tool_choice)
-    if _em_has_tool_request(em_anthropic_tools, em_anthropic_choice):
+    use_emulation = has_tooling_context
    if use_emulation:
        system_text = flatten_anthropic_content(req.system) if req.system else ""
        prompt = _anthropic_messages_to_emulation_prompt(
            messages_dump,
            system_text=system_text,
-            tools=req.tools,
+            tools=emulation_tools,
            tool_choice=req.tool_choice,
        )
        execution.prompt = prompt
    effective_tool_config = _effective_tool_config_for_emulation(
        tool_config,
        use_emulation=use_emulation,
    )
    try:
        started = await start_execution(
@@ -1372,7 +1602,7 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
                        ask_mode,
                        session_id=cached_session_id,
                        is_reply=is_reply,
-                        tool_config=tool_config,
+                        tool_config=effective_tool_config,
                        out_meta=_meta,
                    ):
                        if _stream_event_type(chunk) == "tool":
@@ -1703,7 +1933,7 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
                protocol="anthropic",
                execution=execution,
                prompt_tokens=prompt_tokens,
-                tool_config=tool_config,
+                tool_config=effective_tool_config,
                logger=logger,
                stats_collector=stats_collector,
                session_cache=session_cache,
@@ -1757,10 +1987,8 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
                text = remaining
        if not saw_tool_event and em_anthropic_tools:
-            inferred_call = infer_declared_tool_call_from_text(text, em_anthropic_tools)
+            inferred_calls = infer_tool_calls_from_text(text, em_anthropic_tools)
-            if inferred_call is None:
+            inferred_call = inferred_calls[0] if inferred_calls else None
                inferred_calls = infer_tool_calls_from_text(text, em_anthropic_tools)
                inferred_call = inferred_calls[0] if inferred_calls else None
            if inferred_call is not None:
                content_blocks = [
                    {
@@ -1774,7 +2002,7 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
                saw_pending_tool_use = True
                text = ""
-        if not saw_tool_event and em_anthropic_tools:
+        if not saw_tool_event and em_anthropic_tools and not text.strip():
            retry_prompt = f"{prompt}\n\n{force_tooling_prompt(em_anthropic_choice)}"
            retry_result = await inst.client.chat_complete(
                retry_prompt,
@@ -1782,53 +2010,7 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
                ask_mode,
                session_id=None,
                is_reply=False,
-                tool_config=tool_config,
+                tool_config=effective_tool_config,
            )
            retry_text = retry_result.get("text") or ""
            parsed_calls, remaining = parse_action_blocks(retry_text, em_anthropic_tools)
            if parsed_calls:
                content_blocks = []
                if remaining:
                    content_blocks.append({"type": "text", "text": remaining})
                for call in parsed_calls:
                    content_blocks.append(
                        {
                            "type": "tool_use",
                            "id": call.id,
                            "name": call.name,
                            "input": call.arguments,
                        }
                    )
                saw_tool_event = True
                saw_pending_tool_use = True
                text = remaining
            else:
                inferred_call = infer_declared_tool_call_from_text(retry_text, em_anthropic_tools)
                if inferred_call is None:
                    inferred_calls = infer_tool_calls_from_text(retry_text, em_anthropic_tools)
                    inferred_call = inferred_calls[0] if inferred_calls else None
                if inferred_call is not None:
                    content_blocks = [
                        {
                            "type": "tool_use",
                            "id": inferred_call.id,
                            "name": inferred_call.name,
                            "input": inferred_call.arguments,
                        }
                    ]
                    saw_tool_event = True
                    saw_pending_tool_use = True
                    text = ""
        if not saw_tool_event and em_anthropic_tools and text.strip():
            retry_prompt = f"{prompt}\n\n{force_tooling_prompt(em_anthropic_choice)}"
            retry_result = await inst.client.chat_complete(
                retry_prompt,
                model,
                ask_mode,
                session_id=None,
                is_reply=False,
                tool_config=tool_config,
            )
            retry_text = retry_result.get("text") or ""
            parsed_calls, remaining = parse_action_blocks(retry_text, em_anthropic_tools)
@@ -2090,6 +2272,60 @@ async def internal_stats():
    }
@app.get("/internal/effective-config", dependencies=[Depends(admin_auth_guard)])
 async def internal_effective_config():
    cfg = settings
    return JSONResponse(content={
        "ok": True,
        "settings": {
            "host": cfg.host,
            "port": cfg.port,
            "api_keys": _safe_setting_value("api_keys", cfg.api_keys),
            "metrics_token": _safe_setting_value("metrics_token", cfg.metrics_token),
            "admin_token": _safe_setting_value("admin_token", cfg.admin_token),
            "metrics_public": cfg.metrics_public,
            "log_level": cfg.log_level,
            "gateway_max_in_flight": cfg.gateway_max_in_flight,
            "gateway_queue_timeout_sec": cfg.gateway_queue_timeout_sec,
            "lingma_bin": cfg.lingma_bin,
            "lingma_work_dir": cfg.lingma_work_dir,
            "lingma_socket_port": cfg.lingma_socket_port,
            "lingma_startup_timeout": cfg.lingma_startup_timeout,
            "lingma_rpc_timeout": cfg.lingma_rpc_timeout,
            "default_model": cfg.default_model,
            "default_ask_mode": cfg.default_ask_mode,
            "dedicated_domain_url": cfg.dedicated_domain_url,
            "auto_login_enabled": cfg.auto_login_enabled,
            "auto_login_headless": cfg.auto_login_headless,
            "auto_login_timeout": cfg.auto_login_timeout,
            "auto_login_max_retry": cfg.auto_login_max_retry,
            "instance_count": cfg.instance_count,
            "session_reuse_enabled": cfg.session_reuse_enabled,
            "session_cache_max_entries": cfg.session_cache_max_entries,
            "session_cache_ttl_sec": cfg.session_cache_ttl_sec,
            "tool_forward_enabled": cfg.tool_forward_enabled,
            "tool_allowlist": cfg.tool_allowlist,
            "accounts": [
                {
                    "username": account.username,
                    "password": _safe_setting_value("password", account.password),
                    "session_bundle_b64": _safe_setting_value(
                        "session_bundle_b64", account.session_bundle_b64
                    ),
                    "session_bundle_file": account.session_bundle_file,
                }
                for account in cfg.accounts
            ],
        },
        "feature_flags": {
            "tool_forward_enabled": cfg.tool_forward_enabled,
            "session_reuse_enabled": cfg.session_reuse_enabled,
            "metrics_public": cfg.metrics_public,
            "auto_login_enabled": cfg.auto_login_enabled,
        },
    })
@app.get("/metrics", dependencies=[Depends(metrics_auth_guard)])
 async def metrics():
    base = await stats_collector.prometheus_text()
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,5 @@
 fastapi==0.115.0
 starlette==0.38.6
 uvicorn[standard]==0.30.6
 websockets==13.1
 pydantic==2.9.2
--- a/tests/test_auth_concurrency.py
+++ b/tests/test_auth_concurrency.py
@@ -1,14 +1,37 @@
 from __future__ import annotations
 import asyncio
 import sys
 import types
 import unittest
 from unittest.mock import patch
 from fastapi import HTTPException
 from fastapi.testclient import TestClient
 from starlette.requests import Request
 from app.auth import AnthropicAuthError, require_anthropic_key, require_bearer, require_metrics_access
 from app.concurrency import BackpressureRejected, InFlightGuard
 _playwright = types.ModuleType("playwright")
 _playwright_async = types.ModuleType("playwright.async_api")
 class _StubPlaywrightTimeoutError(Exception):
    pass
 async def _stub_async_playwright():
    raise RuntimeError("playwright is stubbed in unit tests")
 _playwright_async.TimeoutError = _StubPlaywrightTimeoutError
 _playwright_async.async_playwright = _stub_async_playwright
 sys.modules.setdefault("playwright", _playwright)
 sys.modules.setdefault("playwright.async_api", _playwright_async)
 import app.main as main
 def _req(headers: dict[str, str] | None = None) -> Request:
    pairs = []
@@ -82,5 +105,48 @@ class AuthAndConcurrencyTests(unittest.IsolatedAsyncioTestCase):
        self.assertEqual(guard.in_flight, 0)
 class DebugRequestRecordingTests(unittest.TestCase):
    def setUp(self) -> None:
        main._DEBUG_REQUEST_LOG.clear()
    def test_redacts_sensitive_fields_and_data_urls(self) -> None:
        body = {
            "authorization": "Bearer abc",
            "x-api-key": "secret",
            "session_bundle": "very-secret",
            "images": ["data:image/png;base64,ABC"],
            "tool": {"args": "x" * 3000},
        }
        redacted = main._redact_debug_value((), body)
        self.assertEqual(redacted["authorization"], "***")
        self.assertEqual(redacted["x-api-key"], "***")
        self.assertEqual(redacted["session_bundle"], "***")
        self.assertEqual(redacted["images"][0], "[redacted-data-url]")
        self.assertIn("[truncated]", redacted["tool"]["args"])
    def test_internal_debug_requests_requires_admin_and_returns_items(self) -> None:
        with patch.object(main.settings, "api_keys", ["k1"]), patch.object(main.settings, "admin_token", "admin-1"):
            client = TestClient(main.app)
            req_payload = {
                "model": "org_auto",
                "messages": [{"role": "user", "content": "hello"}],
            }
            main._record_debug_request("openai", "/v1/chat/completions", req_payload, _req({"x-request-id": "req-1"}))
            denied = client.get("/internal/debug/requests")
            self.assertEqual(denied.status_code, 401)
            ok = client.get(
                "/internal/debug/requests?limit=1",
                headers={"Authorization": "Bearer admin-1"},
            )
            self.assertEqual(ok.status_code, 200)
            data = ok.json()
            self.assertTrue(data["ok"])
            self.assertEqual(data["count"], 1)
            self.assertEqual(data["items"][0]["protocol"], "openai")
 if __name__ == "__main__":
    unittest.main()
--- a/tests/test_tool_call_bridge.py
+++ b/tests/test_tool_call_bridge.py
@@ -5,6 +5,7 @@ import sys
 import types
 import unittest
 import asyncio
 from types import SimpleNamespace
 from unittest.mock import AsyncMock, patch
@@ -1251,7 +1252,7 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
        self.assertIn('"type": "tool_result"', body)
        self.assertIn('"stop_reason": "end_turn"', body)
-    async def test_openai_non_stream_forwards_tool_config_when_enabled(self) -> None:
+    async def test_openai_non_stream_uses_emulation_instead_of_forwarding_tool_config(self) -> None:
        spy_client = _SpyClient(
            stream_events=[], complete_result={"text": "ok", "toolEvents": []}
        )
@@ -1279,13 +1280,10 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
            await main.v1_chat_completions(req, _make_request("/v1/chat/completions"))
        self.assertIn("tool_config", spy_client.last_complete_kwargs)
-        cfg = spy_client.last_complete_kwargs["tool_config"]
+        self.assertIsNone(spy_client.last_complete_kwargs["tool_config"])
        self.assertEqual(cfg["provider"], "openai")
        self.assertEqual(len(cfg["tools"]), 1)
        self.assertIsInstance(cfg["tool_choice"], dict)
        self.assertEqual(spy_client.last_complete_args[2], "agent")
-    async def test_openai_stream_forwards_tool_config_when_enabled(self) -> None:
+    async def test_openai_stream_uses_emulation_instead_of_forwarding_tool_config(self) -> None:
        spy_client = _SpyClient(
            stream_events=[{"type": "text", "text": "ok"}], complete_result={}
        )
@@ -1316,10 +1314,7 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
            await _collect_stream(response)
        self.assertIn("tool_config", spy_client.last_stream_kwargs)
-        cfg = spy_client.last_stream_kwargs["tool_config"]
+        self.assertIsNone(spy_client.last_stream_kwargs["tool_config"])
        self.assertEqual(cfg["provider"], "openai")
        self.assertEqual(len(cfg["tools"]), 1)
        self.assertIsInstance(cfg["tool_choice"], dict)
        self.assertEqual(spy_client.last_stream_args[2], "agent")
    async def test_openai_non_stream_does_not_forward_tool_config_when_disabled(
@@ -1355,7 +1350,7 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
        self.assertIsNone(spy_client.last_complete_kwargs["tool_config"])
        self.assertEqual(spy_client.last_complete_args[2], "agent")
-    async def test_openai_non_stream_filters_tools_by_allowlist(self) -> None:
+    async def test_openai_non_stream_filters_tools_by_allowlist_before_emulation(self) -> None:
        spy_client = _SpyClient(
            stream_events=[], complete_result={"text": "ok", "toolEvents": []}
        )
@@ -1386,11 +1381,9 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
        ):
            await main.v1_chat_completions(req, _make_request("/v1/chat/completions"))
-        cfg = spy_client.last_complete_kwargs["tool_config"]
+        prompt = spy_client.last_complete_args[0]
-        self.assertEqual(
+        self.assertIn("lookup(", prompt)
-            [tool["function"]["name"] for tool in cfg["tools"]], ["lookup"]
+        self.assertNotIn("write_file(", prompt)
        )
        self.assertEqual(cfg["tool_choice"], req.tool_choice)
    async def test_openai_non_stream_rejects_forced_tool_outside_allowlist(
        self,
@@ -1579,7 +1572,7 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
        self.assertEqual(openai_spy.last_complete_args[2], "chat")
        self.assertEqual(anthropic_spy.last_complete_args[2], "chat")
-    async def test_anthropic_stream_forwards_tool_config_when_enabled(self) -> None:
+    async def test_anthropic_stream_uses_emulation_instead_of_forwarding_tool_config(self) -> None:
        spy_client = _SpyClient(
            stream_events=[{"type": "text", "text": "ok"}], complete_result={}
        )
@@ -1619,9 +1612,7 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
            await _collect_stream(response)
        self.assertIn("tool_config", spy_client.last_stream_kwargs)
-        cfg = spy_client.last_stream_kwargs["tool_config"]
+        self.assertIsNone(spy_client.last_stream_kwargs["tool_config"])
        self.assertEqual(cfg["provider"], "anthropic")
        self.assertEqual(len(cfg["tools"]), 1)
        self.assertEqual(spy_client.last_stream_args[2], "agent")
    async def test_anthropic_non_stream_does_not_forward_tool_config_when_disabled(
@@ -1710,12 +1701,10 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
            )
        self.assertIn("tool_config", spy_client.last_complete_kwargs)
-        cfg = spy_client.last_complete_kwargs["tool_config"]
+        self.assertIsNone(spy_client.last_complete_kwargs["tool_config"])
        self.assertEqual(cfg["provider"], "anthropic")
        self.assertEqual(len(cfg["tools"]), 1)
        self.assertEqual(spy_client.last_complete_args[2], "agent")
-    async def test_anthropic_non_stream_filters_tools_by_allowlist(self) -> None:
+    async def test_anthropic_non_stream_filters_tools_by_allowlist_before_emulation(self) -> None:
        spy_client = _SpyClient(
            stream_events=[], complete_result={"text": "ok", "toolEvents": []}
        )
@@ -1760,9 +1749,9 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
                ),
            )
-        cfg = spy_client.last_complete_kwargs["tool_config"]
+        prompt = spy_client.last_complete_args[0]
-        self.assertEqual([tool["name"] for tool in cfg["tools"]], ["lookup"])
+        self.assertIn("lookup(", prompt)
-        self.assertEqual(cfg["tool_choice"], req.tool_choice)
+        self.assertNotIn("write_file(", prompt)
    async def test_anthropic_non_stream_rejects_forced_tool_outside_allowlist(
        self,
@@ -2183,6 +2172,201 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
        self.assertIn('{"temperature":"22C"}', prompt)
        self.assertIn("Assistant:", prompt)
    async def test_openai_assistant_tool_calls_are_projected_into_emulation_prompt(self) -> None:
        spy_client = _SpyClient(
            stream_events=[],
            complete_result={
                "text": "done",
                "toolEvents": [],
                "sessionId": "sess-emulated-tool-history",
            },
        )
        req = ChatCompletionsRequest(
            model="org_auto",
            messages=[
                {
                    "role": "assistant",
                    "content": "I will check that",
                    "tool_calls": [
                        {
                            "id": "call_1",
                            "type": "function",
                            "function": {
                                "name": "fetch_weather",
                                "arguments": '{"city":"Hangzhou"}',
                            },
                        }
                    ],
                },
                {"role": "user", "content": "continue"},
            ],
            stream=False,
            tools=[
                {
                    "type": "function",
                    "function": {
                        "name": "fetch_weather",
                        "parameters": {
                            "type": "object",
                            "properties": {"city": {"type": "string"}},
                            "required": ["city"],
                        },
                    },
                }
            ],
        )
        with (
            patch.object(main, "pool", _FakePool(_FakeInstance(spy_client))),
            patch.object(main, "chat_guard", _FakeGuard()),
            patch.object(
                main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"})
            ),
            patch.object(
                main.stats_collector, "record_chat", AsyncMock(return_value=None)
            ),
        ):
            await main.v1_chat_completions(req, _make_request("/v1/chat/completions"))
        prompt = spy_client.last_complete_args[0]
        self.assertIn("I will check that", prompt)
        self.assertIn('"tool": "fetch_weather"', prompt)
        self.assertIn('"city": "Hangzhou"', prompt)
    async def test_openai_emulation_prompt_includes_proxy_tool_guidance(self) -> None:
        spy_client = _SpyClient(
            stream_events=[],
            complete_result={"text": "done", "toolEvents": [], "sessionId": "sess-guidance"},
        )
        req = ChatCompletionsRequest(
            model="org_auto",
            messages=[{"role": "user", "content": "inspect README"}],
            stream=False,
            tools=[
                {
                    "type": "function",
                    "function": {
                        "name": "read_file",
                        "description": "Read a file",
                        "parameters": {
                            "type": "object",
                            "properties": {"path": {"type": "string"}},
                            "required": ["path"],
                        },
                    },
                },
                {
                    "type": "function",
                    "function": {
                        "name": "bash",
                        "parameters": {
                            "type": "object",
                            "properties": {"command": {"type": "string"}},
                        },
                    },
                },
            ],
        )
        with (
            patch.object(main, "pool", _FakePool(_FakeInstance(spy_client))),
            patch.object(main, "chat_guard", _FakeGuard()),
            patch.object(
                main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"})
            ),
            patch.object(
                main.stats_collector, "record_chat", AsyncMock(return_value=None)
            ),
        ):
            await main.v1_chat_completions(req, _make_request("/v1/chat/completions"))
        prompt = spy_client.last_complete_args[0]
        self.assertIn("DIRECT tool access inside an IDE", prompt)
        self.assertIn("Tool routing guide:", prompt)
        self.assertIn("Read a specific local file or code path: use read_file.", prompt)
        self.assertIn("Core tool syntax examples", prompt)
        self.assertIn("Coding and file-work discipline:", prompt)
        self.assertIn("NEVER say that tools are unavailable", prompt)
    async def test_anthropic_tool_history_is_projected_into_emulation_prompt(self) -> None:
        spy_client = _SpyClient(
            stream_events=[],
            complete_result={
                "text": "done",
                "toolEvents": [],
                "sessionId": "sess-anthropic-history",
            },
        )
        req = AnthropicMessagesRequest(
            model="claude-3-5-sonnet-20241022",
            max_tokens=128,
            messages=[
                {
                    "role": "assistant",
                    "content": [
                        {"type": "text", "text": "I will check"},
                        {
                            "type": "tool_use",
                            "id": "toolu_1",
                            "name": "fetch_weather",
                            "input": {"city": "Hangzhou"},
                        },
                    ],
                },
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "tool_result",
                            "tool_use_id": "toolu_1",
                            "content": '{"temperature":"22C"}',
                        }
                    ],
                },
                {"role": "user", "content": "continue"},
            ],
            stream=False,
            tools=[
                {
                    "name": "fetch_weather",
                    "input_schema": {
                        "type": "object",
                        "properties": {"city": {"type": "string"}},
                        "required": ["city"],
                    },
                }
            ],
        )
        with (
            patch.object(main, "pool", _FakePool(_FakeInstance(spy_client))),
            patch.object(main, "chat_guard", _FakeGuard()),
            patch.object(
                main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"})
            ),
            patch.object(
                main.stats_collector, "record_chat", AsyncMock(return_value=None)
            ),
            patch.object(main.settings, "api_keys", ["test-key"]),
        ):
            await main.v1_messages(
                req,
                _make_request(
                    "/v1/messages",
                    headers={
                        "x-api-key": "test-key",
                        "anthropic-version": "2023-06-01",
                    },
                ),
            )
        prompt = spy_client.last_complete_args[0]
        self.assertIn("I will check", prompt)
        self.assertIn('"tool": "fetch_weather"', prompt)
        self.assertIn('"city": "Hangzhou"', prompt)
        self.assertIn("Tool result:", prompt)
        self.assertIn('{"temperature":"22C"}', prompt)
    async def test_anthropic_non_stream_synthesizes_tool_use_from_json_action_block(
        self,
    ) -> None:
@@ -2434,6 +2618,177 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
        self.assertEqual(detail["error"]["message"], "invalid upstream response")
 class CapabilitiesEndpointTests(unittest.IsolatedAsyncioTestCase):
    async def test_capabilities_payload_shape(self) -> None:
        with (
            patch.object(main.settings, "tool_forward_enabled", True),
            patch.object(main.settings, "tool_allowlist", ["lookup"]),
            patch.object(main.settings, "session_reuse_enabled", True),
            patch.object(main.settings, "session_cache_max_entries", 123),
            patch.object(main.settings, "session_cache_ttl_sec", 45.0),
            patch.object(main.settings, "instance_count", 2),
            patch.object(main.settings, "default_model", "org_auto"),
            patch.object(main.settings, "default_ask_mode", "chat"),
            patch.object(main.settings, "api_keys", ["test-key"]),
            patch.object(main.settings, "admin_token", "adm"),
            patch.object(main.settings, "metrics_public", False),
        ):
            response = await main.capabilities()
        self.assertEqual(response.status_code, 200)
        payload = json.loads(response.body)
        self.assertEqual(payload["service"], "lingma-openai-gateway")
        self.assertIn("protocols", payload)
        self.assertIn("features", payload)
        self.assertTrue(payload["protocols"]["openai"]["chat_completions"])
        self.assertTrue(payload["protocols"]["anthropic"]["messages"])
        self.assertTrue(payload["protocols"]["openai"]["request_tools_forwarded"])
        self.assertEqual(payload["features"]["tooling"]["allowlist"], ["lookup"])
        self.assertEqual(payload["features"]["pool"]["configured_instance_count"], 2)
        self.assertTrue(payload["features"]["auth"]["v1_requires_auth"])
    async def test_v1_capabilities_auth_guard_requires_authentication(self) -> None:
        with patch.object(main.settings, "api_keys", ["test-key"]):
            with self.assertRaises(main.AnthropicAuthError) as ctx:
                main.anthropic_auth_guard(
                    _make_request(
                        "/v1/capabilities",
                        headers={"anthropic-version": "2023-06-01"},
                    )
                )
        self.assertEqual(ctx.exception.status_code, 401)
    async def test_v1_capabilities_returns_payload_with_auth(self) -> None:
        with (
            patch.object(main.settings, "api_keys", ["test-key"]),
            patch.object(main.settings, "tool_forward_enabled", False),
        ):
            main.anthropic_auth_guard(
                _make_request(
                    "/v1/capabilities",
                    headers={"x-api-key": "test-key", "anthropic-version": "2023-06-01"},
                )
            )
            response = await main.v1_capabilities()
        self.assertEqual(response.status_code, 200)
        payload = json.loads(response.body)
        self.assertFalse(payload["protocols"]["openai"]["request_tools_forwarded"])
 class AdminIntrospectionEndpointTests(unittest.IsolatedAsyncioTestCase):
    async def test_internal_effective_config_requires_admin_token(self) -> None:
        with (
            patch.object(main.settings, "api_keys", ["api-key"]),
            patch.object(main.settings, "admin_token", "admin-secret"),
        ):
            with self.assertRaises(main.HTTPException) as ctx:
                main.admin_auth_guard(
                    _make_request(
                        "/internal/effective-config",
                        headers={"authorization": "Bearer wrong-token"},
                    )
                )
        self.assertEqual(ctx.exception.status_code, 401)
    async def test_internal_effective_config_redacts_secrets(self) -> None:
        with (
            patch.object(main.settings, "api_keys", ["api-key-1", "api-key-2"]),
            patch.object(main.settings, "admin_token", "admin-secret"),
            patch.object(main.settings, "metrics_token", "metrics-secret"),
            patch.object(main.settings, "default_model", "org_auto"),
            patch.object(main.settings, "tool_forward_enabled", True),
            patch.object(main.settings, "session_reuse_enabled", True),
            patch.object(main.settings, "metrics_public", False),
            patch.object(main.settings, "auto_login_enabled", True),
            patch.object(
                main.settings,
                "accounts",
                [
                    SimpleNamespace(
                        username="user-a",
                        password="pass-a",
                        session_bundle_b64="bundle-a",
                        session_bundle_file="/secrets/bundle-a.txt",
                    )
                ],
            ),
        ):
            main.admin_auth_guard(
                _make_request(
                    "/internal/effective-config",
                    headers={"authorization": "Bearer admin-secret"},
                )
            )
            response = await main.internal_effective_config()
        self.assertEqual(response.status_code, 200)
        payload = json.loads(response.body)
        settings_payload = payload["settings"]
        self.assertEqual(settings_payload["api_keys"], ["***", "***"])
        self.assertEqual(settings_payload["admin_token"], "***")
        self.assertEqual(settings_payload["metrics_token"], "***")
        self.assertEqual(settings_payload["accounts"][0]["password"], "***")
        self.assertEqual(settings_payload["accounts"][0]["session_bundle_b64"], "***")
        self.assertEqual(settings_payload["accounts"][0]["username"], "user-a")
        self.assertEqual(
            settings_payload["accounts"][0]["session_bundle_file"],
            "/secrets/bundle-a.txt",
        )
        self.assertTrue(payload["feature_flags"]["tool_forward_enabled"])
        self.assertTrue(payload["feature_flags"]["session_reuse_enabled"])
    async def test_internal_debug_requests_redacts_sensitive_fields(self) -> None:
        main._DEBUG_REQUEST_LOG.clear()
        main._record_debug_request(
            "openai",
            "/v1/chat/completions",
            {
                "api_key": "secret-key",
                "session_bundle": "bundle-value",
                "image_url": "data:image/png;base64,abcd",
                "tool_calls": [
                    {
                        "function": {
                            "arguments": "x" * 3001,
                        }
                    }
                ],
            },
            _make_request("/v1/chat/completions", headers={"x-request-id": "req-123"}),
        )
        response = await main.internal_debug_requests(limit=10)
        self.assertEqual(response.status_code, 200)
        payload = json.loads(response.body)
        self.assertEqual(payload["count"], 1)
        item = payload["items"][0]
        self.assertEqual(item["request_id"], "req-123")
        self.assertEqual(item["body"]["api_key"], "***")
        self.assertEqual(item["body"]["session_bundle"], "***")
        self.assertEqual(item["body"]["image_url"], "[redacted-data-url]")
        self.assertTrue(item["body"]["tool_calls"][0]["function"]["arguments"].endswith("... [truncated]"))
    async def test_internal_debug_requests_requires_admin_token(self) -> None:
        with (
            patch.object(main.settings, "api_keys", ["api-key"]),
            patch.object(main.settings, "admin_token", "admin-secret"),
        ):
            with self.assertRaises(main.HTTPException) as ctx:
                main.admin_auth_guard(
                    _make_request(
                        "/internal/debug/requests",
                        headers={"authorization": "Bearer wrong-token"},
                    )
                )
        self.assertEqual(ctx.exception.status_code, 401)
 class SessionCacheToolFingerprintTests(unittest.TestCase):
    def test_build_key_changes_with_tool_config(self) -> None:
        from app.session_cache import SessionCache