feat: add capability and admin introspection endpoints
Expose capability discovery plus admin-only config and request inspection endpoints so clients and operators can understand gateway behavior without reading code. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
29
README.md
29
README.md
@@ -4,6 +4,8 @@
|
||||
|
||||
- OpenAI:`/v1/models`、`/v1/chat/completions`(含 stream)
|
||||
- Anthropic:`/v1/messages`、`/v1/messages/count_tokens`(含 stream)
|
||||
- 能力探测:`/capabilities`、`/v1/capabilities`
|
||||
- 内省端点:`/internal/effective-config`、`/internal/debug/requests`
|
||||
- 内置:多实例池、会话复用、Prometheus 指标、登录态 bundle 注入
|
||||
- 工具事件桥接:Lingma 上游返回 `tool` 事件时,网关会输出为 OpenAI `tool_calls`(stream/non-stream)和 Anthropic `tool_use` / `tool_result`(stream/non-stream);请求侧 `tools` / `tool_choice` 仅在 `TOOL_FORWARD_ENABLED=true` 时透传(默认开启,可显式关闭)
|
||||
- 工具模拟回退:当 Lingma 未稳定外显原生 `tool/*` 事件时,网关会把注入后的 `json action` / `#Tool Call` 等动作文本归一化为 OpenAI `tool_calls`,并支持 tool result continuation
|
||||
@@ -56,6 +58,7 @@ API_KEY=$(grep '^API_KEYS=' .env | cut -d= -f2 | cut -d, -f1)
|
||||
curl -s "http://127.0.0.1:${PORT}/healthz"
|
||||
curl -s "http://127.0.0.1:${PORT}/v1/models" \
|
||||
-H "Authorization: Bearer ${API_KEY}"
|
||||
curl -s "http://127.0.0.1:${PORT}/capabilities"
|
||||
```
|
||||
|
||||
---
|
||||
@@ -172,6 +175,32 @@ curl -s "http://127.0.0.1:${PORT}/v1/messages/count_tokens" \
|
||||
}'
|
||||
```
|
||||
|
||||
### 能力探测
|
||||
|
||||
```bash
|
||||
curl -s "http://127.0.0.1:${PORT}/capabilities"
|
||||
|
||||
curl -s "http://127.0.0.1:${PORT}/v1/capabilities" \
|
||||
-H "x-api-key: ${API_KEY}" \
|
||||
-H "anthropic-version: 2023-06-01"
|
||||
```
|
||||
|
||||
### 内省端点(admin)
|
||||
|
||||
如果配置了 `ADMIN_TOKEN`,以下端点需要使用该 token;否则会回退复用 `API_KEYS`。
|
||||
|
||||
```bash
|
||||
ADMIN_TOKEN=${ADMIN_TOKEN:-$API_KEY}
|
||||
|
||||
curl -s "http://127.0.0.1:${PORT}/internal/effective-config" \
|
||||
-H "Authorization: Bearer ${ADMIN_TOKEN}"
|
||||
|
||||
curl -s "http://127.0.0.1:${PORT}/internal/debug/requests?limit=5" \
|
||||
-H "Authorization: Bearer ${ADMIN_TOKEN}"
|
||||
```
|
||||
|
||||
> `internal/debug/requests` 会对 token、session bundle、data URL 图片和超长工具参数做脱敏/截断。
|
||||
|
||||
---
|
||||
|
||||
## 部署与更新
|
||||
|
||||
368
app/main.py
368
app/main.py
@@ -5,6 +5,7 @@ import hashlib
|
||||
import json
|
||||
import time
|
||||
import uuid
|
||||
from collections import deque
|
||||
from contextlib import asynccontextmanager
|
||||
from typing import Any
|
||||
|
||||
@@ -15,6 +16,7 @@ from .anthropic_schema import (
|
||||
AnthropicMessagesRequest,
|
||||
affinity_key_for_anthropic,
|
||||
anthropic_to_internal_messages,
|
||||
flatten_anthropic_content,
|
||||
)
|
||||
from .auth import (
|
||||
AnthropicAuthError,
|
||||
@@ -112,6 +114,8 @@ STREAMING_RESPONSE_HEADERS = {
|
||||
"Connection": "keep-alive",
|
||||
}
|
||||
|
||||
_DEBUG_REQUEST_LOG: deque[dict[str, Any]] = deque(maxlen=100)
|
||||
|
||||
|
||||
def _require_pool() -> LingmaPool:
|
||||
if pool is None:
|
||||
@@ -249,6 +253,63 @@ def _log_auth_posture() -> None:
|
||||
)
|
||||
|
||||
|
||||
def _safe_setting_value(key: str, value: Any) -> Any:
|
||||
key_upper = key.upper()
|
||||
if any(
|
||||
marker in key_upper
|
||||
for marker in {"KEY", "TOKEN", "PASSWORD", "SECRET", "BUNDLE"}
|
||||
):
|
||||
if isinstance(value, list):
|
||||
return ["***" for _ in value]
|
||||
return "***"
|
||||
return value
|
||||
|
||||
|
||||
def _redact_debug_value(path: tuple[str, ...], value: Any) -> Any:
|
||||
if isinstance(value, dict):
|
||||
return {
|
||||
k: _redact_debug_value(path + (str(k).lower(),), v)
|
||||
for k, v in value.items()
|
||||
}
|
||||
if isinstance(value, list):
|
||||
return [_redact_debug_value(path + ("[]",), item) for item in value]
|
||||
if isinstance(value, str):
|
||||
lowered_path = "/".join(path)
|
||||
if any(marker in lowered_path for marker in ("authorization", "x-api-key", "api_key", "token", "password", "secret", "session_bundle")):
|
||||
return "***"
|
||||
if value.startswith("data:"):
|
||||
return "[redacted-data-url]"
|
||||
if "session bundle" in value.lower():
|
||||
return "[redacted-session-bundle]"
|
||||
if any(part in {"args", "arguments"} for part in path) and len(value) > 2048:
|
||||
return value[:1024] + "... [truncated]"
|
||||
return value
|
||||
|
||||
|
||||
def _record_debug_request(protocol: str, path: str, body: dict[str, Any], request: Request) -> None:
|
||||
_DEBUG_REQUEST_LOG.appendleft(
|
||||
{
|
||||
"timestamp": int(time.time()),
|
||||
"protocol": protocol,
|
||||
"path": path,
|
||||
"request_id": request.headers.get("x-request-id", ""),
|
||||
"body": _redact_debug_value((), body),
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@app.get("/internal/debug/requests", dependencies=[Depends(admin_auth_guard)])
|
||||
async def internal_debug_requests(limit: int = 20):
|
||||
safe_limit = min(max(limit, 1), 100)
|
||||
return JSONResponse(
|
||||
content={
|
||||
"ok": True,
|
||||
"count": min(safe_limit, len(_DEBUG_REQUEST_LOG)),
|
||||
"items": list(_DEBUG_REQUEST_LOG)[:safe_limit],
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@app.get("/healthz")
|
||||
async def healthz():
|
||||
if pool is None:
|
||||
@@ -267,6 +328,62 @@ async def healthz():
|
||||
}
|
||||
|
||||
|
||||
def _capabilities_payload() -> dict[str, Any]:
|
||||
return {
|
||||
"service": "lingma-openai-gateway",
|
||||
"version": app.version,
|
||||
"protocols": {
|
||||
"openai": {
|
||||
"models": True,
|
||||
"chat_completions": True,
|
||||
"responses": True,
|
||||
"streaming": True,
|
||||
"response_tool_calls": True,
|
||||
"request_tools_forwarded": settings.tool_forward_enabled,
|
||||
},
|
||||
"anthropic": {
|
||||
"messages": True,
|
||||
"count_tokens": True,
|
||||
"streaming": True,
|
||||
"response_tool_use": True,
|
||||
"request_tools_forwarded": settings.tool_forward_enabled,
|
||||
},
|
||||
},
|
||||
"features": {
|
||||
"session_reuse": {
|
||||
"enabled": settings.session_reuse_enabled,
|
||||
"cache_max_entries": settings.session_cache_max_entries,
|
||||
"cache_ttl_sec": settings.session_cache_ttl_sec,
|
||||
},
|
||||
"tooling": {
|
||||
"forward_enabled": settings.tool_forward_enabled,
|
||||
"allowlist": settings.tool_allowlist,
|
||||
"emulation_bridge_enabled": True,
|
||||
},
|
||||
"pool": {
|
||||
"configured_instance_count": settings.instance_count,
|
||||
"default_model": settings.default_model,
|
||||
"default_ask_mode": settings.default_ask_mode,
|
||||
},
|
||||
"auth": {
|
||||
"v1_requires_auth": bool(settings.api_keys),
|
||||
"admin_token_configured": bool(settings.admin_token),
|
||||
"metrics_public": settings.metrics_public,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@app.get("/capabilities")
|
||||
async def capabilities():
|
||||
return JSONResponse(content=_capabilities_payload())
|
||||
|
||||
|
||||
@app.get("/v1/capabilities", dependencies=[Depends(anthropic_auth_guard)])
|
||||
async def v1_capabilities():
|
||||
return JSONResponse(content=_capabilities_payload())
|
||||
|
||||
|
||||
async def _ensure_instance_logged_in(inst: PoolInstance) -> dict:
|
||||
client = inst.client
|
||||
auto_login = inst.auto_login
|
||||
@@ -433,6 +550,75 @@ def _messages_to_prompt(messages: list[dict]) -> str:
|
||||
return "\n".join(parts).strip()
|
||||
|
||||
|
||||
def _assistant_tool_calls_to_emulation_text(tool_calls: Any) -> str:
|
||||
if not isinstance(tool_calls, list):
|
||||
return ""
|
||||
blocks: list[str] = []
|
||||
for item in tool_calls:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
fn = item.get("function") if isinstance(item.get("function"), dict) else None
|
||||
name = str((fn or {}).get("name") or item.get("name") or "").strip()
|
||||
if not name:
|
||||
continue
|
||||
arguments = (fn or {}).get("arguments")
|
||||
if isinstance(arguments, str):
|
||||
try:
|
||||
arguments = json.loads(arguments)
|
||||
except Exception:
|
||||
arguments = {"raw": arguments}
|
||||
if not isinstance(arguments, dict):
|
||||
arguments = {}
|
||||
blocks.append(
|
||||
"```json action\n"
|
||||
+ json.dumps(
|
||||
{"tool": name, "parameters": arguments}, ensure_ascii=False, indent=2
|
||||
)
|
||||
+ "\n```"
|
||||
)
|
||||
return "\n\n".join(blocks)
|
||||
|
||||
|
||||
def _tool_action_block(name: str, arguments: dict[str, Any]) -> str:
|
||||
return (
|
||||
"```json action\n"
|
||||
+ json.dumps(
|
||||
{"tool": name, "parameters": arguments}, ensure_ascii=False, indent=2
|
||||
)
|
||||
+ "\n```"
|
||||
)
|
||||
|
||||
|
||||
def _anthropic_flattened_tool_history_to_emulation_text(text: str) -> str:
|
||||
if not text:
|
||||
return ""
|
||||
out: list[str] = []
|
||||
for line in text.splitlines():
|
||||
stripped = line.strip()
|
||||
if stripped.startswith("[tool_use]"):
|
||||
raw = stripped[len("[tool_use]") :].strip()
|
||||
try:
|
||||
payload = json.loads(raw)
|
||||
except Exception:
|
||||
out.append(line)
|
||||
continue
|
||||
if not isinstance(payload, dict):
|
||||
out.append(line)
|
||||
continue
|
||||
name = str(payload.get("name") or "").strip()
|
||||
arguments = payload.get("input")
|
||||
if name and isinstance(arguments, dict):
|
||||
out.append(_tool_action_block(name, arguments))
|
||||
else:
|
||||
out.append(line)
|
||||
continue
|
||||
if stripped.startswith("[tool_result]"):
|
||||
out.append(action_output_prompt(None, stripped[len("[tool_result]") :].strip()))
|
||||
continue
|
||||
out.append(line)
|
||||
return "\n".join(part for part in out if part).strip()
|
||||
|
||||
|
||||
def _messages_to_emulation_prompt(
|
||||
messages: list[dict[str, Any]],
|
||||
*,
|
||||
@@ -446,6 +632,10 @@ def _messages_to_emulation_prompt(
|
||||
if role in {"system", "developer"}:
|
||||
continue
|
||||
text = flatten_content(message.get("content"))
|
||||
if role == "assistant" and message.get("tool_calls"):
|
||||
projected = _assistant_tool_calls_to_emulation_text(message.get("tool_calls"))
|
||||
if projected:
|
||||
text = "\n\n".join(part for part in [text, projected] if part)
|
||||
if role == "tool":
|
||||
text = action_output_prompt(message.get("tool_call_id"), text)
|
||||
role = "user"
|
||||
@@ -472,6 +662,22 @@ def _messages_to_emulation_prompt(
|
||||
return "\n\n".join(parts).strip()
|
||||
|
||||
|
||||
def _effective_tool_config_for_emulation(
|
||||
tool_config: dict[str, Any] | None,
|
||||
*,
|
||||
use_emulation: bool,
|
||||
) -> dict[str, Any] | None:
|
||||
if use_emulation:
|
||||
return None
|
||||
return tool_config
|
||||
|
||||
|
||||
def _emulation_tools(raw_tools: list[dict[str, Any]] | None, tool_config: dict[str, Any] | None) -> list[dict[str, Any]] | None:
|
||||
if isinstance(tool_config, dict) and isinstance(tool_config.get("tools"), list):
|
||||
return tool_config.get("tools")
|
||||
return raw_tools
|
||||
|
||||
|
||||
def _anthropic_messages_to_emulation_prompt(
|
||||
messages: list[dict[str, Any]],
|
||||
*,
|
||||
@@ -483,6 +689,10 @@ def _anthropic_messages_to_emulation_prompt(
|
||||
for message in messages:
|
||||
role = str(message.get("role") or "").strip().lower()
|
||||
text = str(message.get("content") or "").strip()
|
||||
if role == "assistant" and "[tool_use]" in text:
|
||||
text = _anthropic_flattened_tool_history_to_emulation_text(text)
|
||||
elif role == "user" and "[tool_result]" in text:
|
||||
text = _anthropic_flattened_tool_history_to_emulation_text(text)
|
||||
if role == "tool":
|
||||
text = action_output_prompt(message.get("tool_call_id"), text)
|
||||
role = "user"
|
||||
@@ -575,6 +785,7 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
|
||||
p = _require_pool()
|
||||
|
||||
messages_dump = [m.model_dump() for m in req.messages]
|
||||
_record_debug_request("openai", "/v1/chat/completions", req.model_dump(mode="json"), request)
|
||||
api_key = _extract_api_key(request) or "-"
|
||||
|
||||
# ------------------------------------------------------------- session reuse
|
||||
@@ -617,9 +828,11 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
|
||||
is_reply = execution.is_reply
|
||||
|
||||
include_usage = _include_usage(req.stream_options)
|
||||
em_tools = _em_extract_openai_tools(req.tools)
|
||||
emulation_tools = _emulation_tools(req.tools, tool_config)
|
||||
em_tools = _em_extract_openai_tools(emulation_tools)
|
||||
em_choice = _em_extract_openai_tool_choice(req.tool_choice)
|
||||
if _em_has_tool_request(em_tools, em_choice):
|
||||
use_emulation = has_tooling_context
|
||||
if use_emulation:
|
||||
system_parts = [
|
||||
flatten_content(m.content)
|
||||
for m in req.messages
|
||||
@@ -628,9 +841,14 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
|
||||
prompt = _messages_to_emulation_prompt(
|
||||
messages_dump,
|
||||
system_text="\n\n".join(system_parts),
|
||||
tools=req.tools,
|
||||
tools=emulation_tools,
|
||||
tool_choice=req.tool_choice,
|
||||
)
|
||||
execution.prompt = prompt
|
||||
effective_tool_config = _effective_tool_config_for_emulation(
|
||||
tool_config,
|
||||
use_emulation=use_emulation,
|
||||
)
|
||||
|
||||
try:
|
||||
started = await start_execution(
|
||||
@@ -708,7 +926,7 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
|
||||
ask_mode,
|
||||
session_id=cached_session_id,
|
||||
is_reply=is_reply,
|
||||
tool_config=tool_config,
|
||||
tool_config=effective_tool_config,
|
||||
out_meta=_meta,
|
||||
):
|
||||
if _stream_event_type(chunk) == "tool":
|
||||
@@ -763,6 +981,8 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
|
||||
continue
|
||||
buffered_text_parts.append(text)
|
||||
completion_tokens_holder["n"] += estimate_tokens(text)
|
||||
if use_emulation:
|
||||
continue
|
||||
|
||||
full_text = "".join(buffered_text_parts)
|
||||
if req.tools:
|
||||
@@ -855,9 +1075,6 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
|
||||
buffered_text_parts.clear()
|
||||
yield f"data: {json.dumps(payload, ensure_ascii=False)}\n\n"
|
||||
|
||||
if buffered_text_parts and forced_tool_name and saw_tool_call:
|
||||
buffered_text_parts.clear()
|
||||
|
||||
if buffered_text_parts and req.tools and not saw_tool_call:
|
||||
merged_text = "".join(buffered_text_parts)
|
||||
inferred = _infer_tool_event_from_declared_tools(
|
||||
@@ -924,6 +1141,11 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
|
||||
yield f"data: {json.dumps(payload, ensure_ascii=False)}\n\n"
|
||||
buffered_text_parts = [remaining] if remaining else []
|
||||
|
||||
if buffered_text_parts and saw_tool_call:
|
||||
text_to_yield = "".join(buffered_text_parts)
|
||||
buffered_text_parts.clear()
|
||||
yield _text_payload(text_to_yield)
|
||||
|
||||
done_payload = {
|
||||
"id": completion_id,
|
||||
"object": "chat.completion.chunk",
|
||||
@@ -996,7 +1218,7 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
|
||||
protocol="chat",
|
||||
execution=execution,
|
||||
prompt_tokens=prompt_tokens,
|
||||
tool_config=tool_config,
|
||||
tool_config=effective_tool_config,
|
||||
logger=logger,
|
||||
stats_collector=stats_collector,
|
||||
session_cache=session_cache,
|
||||
@@ -1095,7 +1317,7 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
|
||||
ask_mode,
|
||||
session_id=None,
|
||||
is_reply=False,
|
||||
tool_config=tool_config,
|
||||
tool_config=effective_tool_config,
|
||||
)
|
||||
retry_text = retry_result.get("text") or ""
|
||||
parsed_calls, remaining = parse_action_blocks(retry_text, em_tools)
|
||||
@@ -1227,6 +1449,7 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
|
||||
)
|
||||
|
||||
messages_dump = anthropic_to_internal_messages(req)
|
||||
_record_debug_request("anthropic", "/v1/messages", req.model_dump(mode="json"), request)
|
||||
# Prefer the auth token actually accepted so session-cache bucketing is
|
||||
# consistent regardless of which auth header style the caller used.
|
||||
api_key = (
|
||||
@@ -1284,16 +1507,23 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
|
||||
model = execution.model
|
||||
prompt = execution.prompt
|
||||
is_reply = execution.is_reply
|
||||
em_anthropic_tools = _em_extract_anthropic_tools(req.tools)
|
||||
emulation_tools = _emulation_tools(req.tools, tool_config)
|
||||
em_anthropic_tools = _em_extract_anthropic_tools(emulation_tools)
|
||||
em_anthropic_choice = _em_extract_anthropic_tool_choice(req.tool_choice)
|
||||
if _em_has_tool_request(em_anthropic_tools, em_anthropic_choice):
|
||||
use_emulation = has_tooling_context
|
||||
if use_emulation:
|
||||
system_text = flatten_anthropic_content(req.system) if req.system else ""
|
||||
prompt = _anthropic_messages_to_emulation_prompt(
|
||||
messages_dump,
|
||||
system_text=system_text,
|
||||
tools=req.tools,
|
||||
tools=emulation_tools,
|
||||
tool_choice=req.tool_choice,
|
||||
)
|
||||
execution.prompt = prompt
|
||||
effective_tool_config = _effective_tool_config_for_emulation(
|
||||
tool_config,
|
||||
use_emulation=use_emulation,
|
||||
)
|
||||
|
||||
try:
|
||||
started = await start_execution(
|
||||
@@ -1372,7 +1602,7 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
|
||||
ask_mode,
|
||||
session_id=cached_session_id,
|
||||
is_reply=is_reply,
|
||||
tool_config=tool_config,
|
||||
tool_config=effective_tool_config,
|
||||
out_meta=_meta,
|
||||
):
|
||||
if _stream_event_type(chunk) == "tool":
|
||||
@@ -1703,7 +1933,7 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
|
||||
protocol="anthropic",
|
||||
execution=execution,
|
||||
prompt_tokens=prompt_tokens,
|
||||
tool_config=tool_config,
|
||||
tool_config=effective_tool_config,
|
||||
logger=logger,
|
||||
stats_collector=stats_collector,
|
||||
session_cache=session_cache,
|
||||
@@ -1757,10 +1987,8 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
|
||||
text = remaining
|
||||
|
||||
if not saw_tool_event and em_anthropic_tools:
|
||||
inferred_call = infer_declared_tool_call_from_text(text, em_anthropic_tools)
|
||||
if inferred_call is None:
|
||||
inferred_calls = infer_tool_calls_from_text(text, em_anthropic_tools)
|
||||
inferred_call = inferred_calls[0] if inferred_calls else None
|
||||
inferred_calls = infer_tool_calls_from_text(text, em_anthropic_tools)
|
||||
inferred_call = inferred_calls[0] if inferred_calls else None
|
||||
if inferred_call is not None:
|
||||
content_blocks = [
|
||||
{
|
||||
@@ -1774,7 +2002,7 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
|
||||
saw_pending_tool_use = True
|
||||
text = ""
|
||||
|
||||
if not saw_tool_event and em_anthropic_tools:
|
||||
if not saw_tool_event and em_anthropic_tools and not text.strip():
|
||||
retry_prompt = f"{prompt}\n\n{force_tooling_prompt(em_anthropic_choice)}"
|
||||
retry_result = await inst.client.chat_complete(
|
||||
retry_prompt,
|
||||
@@ -1782,53 +2010,7 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
|
||||
ask_mode,
|
||||
session_id=None,
|
||||
is_reply=False,
|
||||
tool_config=tool_config,
|
||||
)
|
||||
retry_text = retry_result.get("text") or ""
|
||||
parsed_calls, remaining = parse_action_blocks(retry_text, em_anthropic_tools)
|
||||
if parsed_calls:
|
||||
content_blocks = []
|
||||
if remaining:
|
||||
content_blocks.append({"type": "text", "text": remaining})
|
||||
for call in parsed_calls:
|
||||
content_blocks.append(
|
||||
{
|
||||
"type": "tool_use",
|
||||
"id": call.id,
|
||||
"name": call.name,
|
||||
"input": call.arguments,
|
||||
}
|
||||
)
|
||||
saw_tool_event = True
|
||||
saw_pending_tool_use = True
|
||||
text = remaining
|
||||
else:
|
||||
inferred_call = infer_declared_tool_call_from_text(retry_text, em_anthropic_tools)
|
||||
if inferred_call is None:
|
||||
inferred_calls = infer_tool_calls_from_text(retry_text, em_anthropic_tools)
|
||||
inferred_call = inferred_calls[0] if inferred_calls else None
|
||||
if inferred_call is not None:
|
||||
content_blocks = [
|
||||
{
|
||||
"type": "tool_use",
|
||||
"id": inferred_call.id,
|
||||
"name": inferred_call.name,
|
||||
"input": inferred_call.arguments,
|
||||
}
|
||||
]
|
||||
saw_tool_event = True
|
||||
saw_pending_tool_use = True
|
||||
text = ""
|
||||
|
||||
if not saw_tool_event and em_anthropic_tools and text.strip():
|
||||
retry_prompt = f"{prompt}\n\n{force_tooling_prompt(em_anthropic_choice)}"
|
||||
retry_result = await inst.client.chat_complete(
|
||||
retry_prompt,
|
||||
model,
|
||||
ask_mode,
|
||||
session_id=None,
|
||||
is_reply=False,
|
||||
tool_config=tool_config,
|
||||
tool_config=effective_tool_config,
|
||||
)
|
||||
retry_text = retry_result.get("text") or ""
|
||||
parsed_calls, remaining = parse_action_blocks(retry_text, em_anthropic_tools)
|
||||
@@ -2090,6 +2272,60 @@ async def internal_stats():
|
||||
}
|
||||
|
||||
|
||||
@app.get("/internal/effective-config", dependencies=[Depends(admin_auth_guard)])
|
||||
async def internal_effective_config():
|
||||
cfg = settings
|
||||
return JSONResponse(content={
|
||||
"ok": True,
|
||||
"settings": {
|
||||
"host": cfg.host,
|
||||
"port": cfg.port,
|
||||
"api_keys": _safe_setting_value("api_keys", cfg.api_keys),
|
||||
"metrics_token": _safe_setting_value("metrics_token", cfg.metrics_token),
|
||||
"admin_token": _safe_setting_value("admin_token", cfg.admin_token),
|
||||
"metrics_public": cfg.metrics_public,
|
||||
"log_level": cfg.log_level,
|
||||
"gateway_max_in_flight": cfg.gateway_max_in_flight,
|
||||
"gateway_queue_timeout_sec": cfg.gateway_queue_timeout_sec,
|
||||
"lingma_bin": cfg.lingma_bin,
|
||||
"lingma_work_dir": cfg.lingma_work_dir,
|
||||
"lingma_socket_port": cfg.lingma_socket_port,
|
||||
"lingma_startup_timeout": cfg.lingma_startup_timeout,
|
||||
"lingma_rpc_timeout": cfg.lingma_rpc_timeout,
|
||||
"default_model": cfg.default_model,
|
||||
"default_ask_mode": cfg.default_ask_mode,
|
||||
"dedicated_domain_url": cfg.dedicated_domain_url,
|
||||
"auto_login_enabled": cfg.auto_login_enabled,
|
||||
"auto_login_headless": cfg.auto_login_headless,
|
||||
"auto_login_timeout": cfg.auto_login_timeout,
|
||||
"auto_login_max_retry": cfg.auto_login_max_retry,
|
||||
"instance_count": cfg.instance_count,
|
||||
"session_reuse_enabled": cfg.session_reuse_enabled,
|
||||
"session_cache_max_entries": cfg.session_cache_max_entries,
|
||||
"session_cache_ttl_sec": cfg.session_cache_ttl_sec,
|
||||
"tool_forward_enabled": cfg.tool_forward_enabled,
|
||||
"tool_allowlist": cfg.tool_allowlist,
|
||||
"accounts": [
|
||||
{
|
||||
"username": account.username,
|
||||
"password": _safe_setting_value("password", account.password),
|
||||
"session_bundle_b64": _safe_setting_value(
|
||||
"session_bundle_b64", account.session_bundle_b64
|
||||
),
|
||||
"session_bundle_file": account.session_bundle_file,
|
||||
}
|
||||
for account in cfg.accounts
|
||||
],
|
||||
},
|
||||
"feature_flags": {
|
||||
"tool_forward_enabled": cfg.tool_forward_enabled,
|
||||
"session_reuse_enabled": cfg.session_reuse_enabled,
|
||||
"metrics_public": cfg.metrics_public,
|
||||
"auto_login_enabled": cfg.auto_login_enabled,
|
||||
},
|
||||
})
|
||||
|
||||
|
||||
@app.get("/metrics", dependencies=[Depends(metrics_auth_guard)])
|
||||
async def metrics():
|
||||
base = await stats_collector.prometheus_text()
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
fastapi==0.115.0
|
||||
starlette==0.38.6
|
||||
uvicorn[standard]==0.30.6
|
||||
websockets==13.1
|
||||
pydantic==2.9.2
|
||||
|
||||
@@ -1,14 +1,37 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import sys
|
||||
import types
|
||||
import unittest
|
||||
from unittest.mock import patch
|
||||
|
||||
from fastapi import HTTPException
|
||||
from fastapi.testclient import TestClient
|
||||
from starlette.requests import Request
|
||||
|
||||
from app.auth import AnthropicAuthError, require_anthropic_key, require_bearer, require_metrics_access
|
||||
from app.concurrency import BackpressureRejected, InFlightGuard
|
||||
|
||||
_playwright = types.ModuleType("playwright")
|
||||
_playwright_async = types.ModuleType("playwright.async_api")
|
||||
|
||||
|
||||
class _StubPlaywrightTimeoutError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
async def _stub_async_playwright():
|
||||
raise RuntimeError("playwright is stubbed in unit tests")
|
||||
|
||||
|
||||
_playwright_async.TimeoutError = _StubPlaywrightTimeoutError
|
||||
_playwright_async.async_playwright = _stub_async_playwright
|
||||
sys.modules.setdefault("playwright", _playwright)
|
||||
sys.modules.setdefault("playwright.async_api", _playwright_async)
|
||||
|
||||
import app.main as main
|
||||
|
||||
|
||||
def _req(headers: dict[str, str] | None = None) -> Request:
|
||||
pairs = []
|
||||
@@ -82,5 +105,48 @@ class AuthAndConcurrencyTests(unittest.IsolatedAsyncioTestCase):
|
||||
self.assertEqual(guard.in_flight, 0)
|
||||
|
||||
|
||||
class DebugRequestRecordingTests(unittest.TestCase):
|
||||
def setUp(self) -> None:
|
||||
main._DEBUG_REQUEST_LOG.clear()
|
||||
|
||||
def test_redacts_sensitive_fields_and_data_urls(self) -> None:
|
||||
body = {
|
||||
"authorization": "Bearer abc",
|
||||
"x-api-key": "secret",
|
||||
"session_bundle": "very-secret",
|
||||
"images": ["data:image/png;base64,ABC"],
|
||||
"tool": {"args": "x" * 3000},
|
||||
}
|
||||
redacted = main._redact_debug_value((), body)
|
||||
|
||||
self.assertEqual(redacted["authorization"], "***")
|
||||
self.assertEqual(redacted["x-api-key"], "***")
|
||||
self.assertEqual(redacted["session_bundle"], "***")
|
||||
self.assertEqual(redacted["images"][0], "[redacted-data-url]")
|
||||
self.assertIn("[truncated]", redacted["tool"]["args"])
|
||||
|
||||
def test_internal_debug_requests_requires_admin_and_returns_items(self) -> None:
|
||||
with patch.object(main.settings, "api_keys", ["k1"]), patch.object(main.settings, "admin_token", "admin-1"):
|
||||
client = TestClient(main.app)
|
||||
req_payload = {
|
||||
"model": "org_auto",
|
||||
"messages": [{"role": "user", "content": "hello"}],
|
||||
}
|
||||
main._record_debug_request("openai", "/v1/chat/completions", req_payload, _req({"x-request-id": "req-1"}))
|
||||
|
||||
denied = client.get("/internal/debug/requests")
|
||||
self.assertEqual(denied.status_code, 401)
|
||||
|
||||
ok = client.get(
|
||||
"/internal/debug/requests?limit=1",
|
||||
headers={"Authorization": "Bearer admin-1"},
|
||||
)
|
||||
self.assertEqual(ok.status_code, 200)
|
||||
data = ok.json()
|
||||
self.assertTrue(data["ok"])
|
||||
self.assertEqual(data["count"], 1)
|
||||
self.assertEqual(data["items"][0]["protocol"], "openai")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
||||
@@ -5,6 +5,7 @@ import sys
|
||||
import types
|
||||
import unittest
|
||||
import asyncio
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import AsyncMock, patch
|
||||
|
||||
|
||||
@@ -1251,7 +1252,7 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
|
||||
self.assertIn('"type": "tool_result"', body)
|
||||
self.assertIn('"stop_reason": "end_turn"', body)
|
||||
|
||||
async def test_openai_non_stream_forwards_tool_config_when_enabled(self) -> None:
|
||||
async def test_openai_non_stream_uses_emulation_instead_of_forwarding_tool_config(self) -> None:
|
||||
spy_client = _SpyClient(
|
||||
stream_events=[], complete_result={"text": "ok", "toolEvents": []}
|
||||
)
|
||||
@@ -1279,13 +1280,10 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
|
||||
await main.v1_chat_completions(req, _make_request("/v1/chat/completions"))
|
||||
|
||||
self.assertIn("tool_config", spy_client.last_complete_kwargs)
|
||||
cfg = spy_client.last_complete_kwargs["tool_config"]
|
||||
self.assertEqual(cfg["provider"], "openai")
|
||||
self.assertEqual(len(cfg["tools"]), 1)
|
||||
self.assertIsInstance(cfg["tool_choice"], dict)
|
||||
self.assertIsNone(spy_client.last_complete_kwargs["tool_config"])
|
||||
self.assertEqual(spy_client.last_complete_args[2], "agent")
|
||||
|
||||
async def test_openai_stream_forwards_tool_config_when_enabled(self) -> None:
|
||||
async def test_openai_stream_uses_emulation_instead_of_forwarding_tool_config(self) -> None:
|
||||
spy_client = _SpyClient(
|
||||
stream_events=[{"type": "text", "text": "ok"}], complete_result={}
|
||||
)
|
||||
@@ -1316,10 +1314,7 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
|
||||
await _collect_stream(response)
|
||||
|
||||
self.assertIn("tool_config", spy_client.last_stream_kwargs)
|
||||
cfg = spy_client.last_stream_kwargs["tool_config"]
|
||||
self.assertEqual(cfg["provider"], "openai")
|
||||
self.assertEqual(len(cfg["tools"]), 1)
|
||||
self.assertIsInstance(cfg["tool_choice"], dict)
|
||||
self.assertIsNone(spy_client.last_stream_kwargs["tool_config"])
|
||||
self.assertEqual(spy_client.last_stream_args[2], "agent")
|
||||
|
||||
async def test_openai_non_stream_does_not_forward_tool_config_when_disabled(
|
||||
@@ -1355,7 +1350,7 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
|
||||
self.assertIsNone(spy_client.last_complete_kwargs["tool_config"])
|
||||
self.assertEqual(spy_client.last_complete_args[2], "agent")
|
||||
|
||||
async def test_openai_non_stream_filters_tools_by_allowlist(self) -> None:
|
||||
async def test_openai_non_stream_filters_tools_by_allowlist_before_emulation(self) -> None:
|
||||
spy_client = _SpyClient(
|
||||
stream_events=[], complete_result={"text": "ok", "toolEvents": []}
|
||||
)
|
||||
@@ -1386,11 +1381,9 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
|
||||
):
|
||||
await main.v1_chat_completions(req, _make_request("/v1/chat/completions"))
|
||||
|
||||
cfg = spy_client.last_complete_kwargs["tool_config"]
|
||||
self.assertEqual(
|
||||
[tool["function"]["name"] for tool in cfg["tools"]], ["lookup"]
|
||||
)
|
||||
self.assertEqual(cfg["tool_choice"], req.tool_choice)
|
||||
prompt = spy_client.last_complete_args[0]
|
||||
self.assertIn("lookup(", prompt)
|
||||
self.assertNotIn("write_file(", prompt)
|
||||
|
||||
async def test_openai_non_stream_rejects_forced_tool_outside_allowlist(
|
||||
self,
|
||||
@@ -1579,7 +1572,7 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
|
||||
self.assertEqual(openai_spy.last_complete_args[2], "chat")
|
||||
self.assertEqual(anthropic_spy.last_complete_args[2], "chat")
|
||||
|
||||
async def test_anthropic_stream_forwards_tool_config_when_enabled(self) -> None:
|
||||
async def test_anthropic_stream_uses_emulation_instead_of_forwarding_tool_config(self) -> None:
|
||||
spy_client = _SpyClient(
|
||||
stream_events=[{"type": "text", "text": "ok"}], complete_result={}
|
||||
)
|
||||
@@ -1619,9 +1612,7 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
|
||||
await _collect_stream(response)
|
||||
|
||||
self.assertIn("tool_config", spy_client.last_stream_kwargs)
|
||||
cfg = spy_client.last_stream_kwargs["tool_config"]
|
||||
self.assertEqual(cfg["provider"], "anthropic")
|
||||
self.assertEqual(len(cfg["tools"]), 1)
|
||||
self.assertIsNone(spy_client.last_stream_kwargs["tool_config"])
|
||||
self.assertEqual(spy_client.last_stream_args[2], "agent")
|
||||
|
||||
async def test_anthropic_non_stream_does_not_forward_tool_config_when_disabled(
|
||||
@@ -1710,12 +1701,10 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
|
||||
)
|
||||
|
||||
self.assertIn("tool_config", spy_client.last_complete_kwargs)
|
||||
cfg = spy_client.last_complete_kwargs["tool_config"]
|
||||
self.assertEqual(cfg["provider"], "anthropic")
|
||||
self.assertEqual(len(cfg["tools"]), 1)
|
||||
self.assertIsNone(spy_client.last_complete_kwargs["tool_config"])
|
||||
self.assertEqual(spy_client.last_complete_args[2], "agent")
|
||||
|
||||
async def test_anthropic_non_stream_filters_tools_by_allowlist(self) -> None:
|
||||
async def test_anthropic_non_stream_filters_tools_by_allowlist_before_emulation(self) -> None:
|
||||
spy_client = _SpyClient(
|
||||
stream_events=[], complete_result={"text": "ok", "toolEvents": []}
|
||||
)
|
||||
@@ -1760,9 +1749,9 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
|
||||
),
|
||||
)
|
||||
|
||||
cfg = spy_client.last_complete_kwargs["tool_config"]
|
||||
self.assertEqual([tool["name"] for tool in cfg["tools"]], ["lookup"])
|
||||
self.assertEqual(cfg["tool_choice"], req.tool_choice)
|
||||
prompt = spy_client.last_complete_args[0]
|
||||
self.assertIn("lookup(", prompt)
|
||||
self.assertNotIn("write_file(", prompt)
|
||||
|
||||
async def test_anthropic_non_stream_rejects_forced_tool_outside_allowlist(
|
||||
self,
|
||||
@@ -2183,6 +2172,201 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
|
||||
self.assertIn('{"temperature":"22C"}', prompt)
|
||||
self.assertIn("Assistant:", prompt)
|
||||
|
||||
async def test_openai_assistant_tool_calls_are_projected_into_emulation_prompt(self) -> None:
|
||||
spy_client = _SpyClient(
|
||||
stream_events=[],
|
||||
complete_result={
|
||||
"text": "done",
|
||||
"toolEvents": [],
|
||||
"sessionId": "sess-emulated-tool-history",
|
||||
},
|
||||
)
|
||||
req = ChatCompletionsRequest(
|
||||
model="org_auto",
|
||||
messages=[
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "I will check that",
|
||||
"tool_calls": [
|
||||
{
|
||||
"id": "call_1",
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "fetch_weather",
|
||||
"arguments": '{"city":"Hangzhou"}',
|
||||
},
|
||||
}
|
||||
],
|
||||
},
|
||||
{"role": "user", "content": "continue"},
|
||||
],
|
||||
stream=False,
|
||||
tools=[
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "fetch_weather",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {"city": {"type": "string"}},
|
||||
"required": ["city"],
|
||||
},
|
||||
},
|
||||
}
|
||||
],
|
||||
)
|
||||
|
||||
with (
|
||||
patch.object(main, "pool", _FakePool(_FakeInstance(spy_client))),
|
||||
patch.object(main, "chat_guard", _FakeGuard()),
|
||||
patch.object(
|
||||
main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"})
|
||||
),
|
||||
patch.object(
|
||||
main.stats_collector, "record_chat", AsyncMock(return_value=None)
|
||||
),
|
||||
):
|
||||
await main.v1_chat_completions(req, _make_request("/v1/chat/completions"))
|
||||
|
||||
prompt = spy_client.last_complete_args[0]
|
||||
self.assertIn("I will check that", prompt)
|
||||
self.assertIn('"tool": "fetch_weather"', prompt)
|
||||
self.assertIn('"city": "Hangzhou"', prompt)
|
||||
|
||||
async def test_openai_emulation_prompt_includes_proxy_tool_guidance(self) -> None:
|
||||
spy_client = _SpyClient(
|
||||
stream_events=[],
|
||||
complete_result={"text": "done", "toolEvents": [], "sessionId": "sess-guidance"},
|
||||
)
|
||||
req = ChatCompletionsRequest(
|
||||
model="org_auto",
|
||||
messages=[{"role": "user", "content": "inspect README"}],
|
||||
stream=False,
|
||||
tools=[
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "read_file",
|
||||
"description": "Read a file",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {"path": {"type": "string"}},
|
||||
"required": ["path"],
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "bash",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {"command": {"type": "string"}},
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
)
|
||||
|
||||
with (
|
||||
patch.object(main, "pool", _FakePool(_FakeInstance(spy_client))),
|
||||
patch.object(main, "chat_guard", _FakeGuard()),
|
||||
patch.object(
|
||||
main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"})
|
||||
),
|
||||
patch.object(
|
||||
main.stats_collector, "record_chat", AsyncMock(return_value=None)
|
||||
),
|
||||
):
|
||||
await main.v1_chat_completions(req, _make_request("/v1/chat/completions"))
|
||||
|
||||
prompt = spy_client.last_complete_args[0]
|
||||
self.assertIn("DIRECT tool access inside an IDE", prompt)
|
||||
self.assertIn("Tool routing guide:", prompt)
|
||||
self.assertIn("Read a specific local file or code path: use read_file.", prompt)
|
||||
self.assertIn("Core tool syntax examples", prompt)
|
||||
self.assertIn("Coding and file-work discipline:", prompt)
|
||||
self.assertIn("NEVER say that tools are unavailable", prompt)
|
||||
|
||||
async def test_anthropic_tool_history_is_projected_into_emulation_prompt(self) -> None:
|
||||
spy_client = _SpyClient(
|
||||
stream_events=[],
|
||||
complete_result={
|
||||
"text": "done",
|
||||
"toolEvents": [],
|
||||
"sessionId": "sess-anthropic-history",
|
||||
},
|
||||
)
|
||||
req = AnthropicMessagesRequest(
|
||||
model="claude-3-5-sonnet-20241022",
|
||||
max_tokens=128,
|
||||
messages=[
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{"type": "text", "text": "I will check"},
|
||||
{
|
||||
"type": "tool_use",
|
||||
"id": "toolu_1",
|
||||
"name": "fetch_weather",
|
||||
"input": {"city": "Hangzhou"},
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "tool_result",
|
||||
"tool_use_id": "toolu_1",
|
||||
"content": '{"temperature":"22C"}',
|
||||
}
|
||||
],
|
||||
},
|
||||
{"role": "user", "content": "continue"},
|
||||
],
|
||||
stream=False,
|
||||
tools=[
|
||||
{
|
||||
"name": "fetch_weather",
|
||||
"input_schema": {
|
||||
"type": "object",
|
||||
"properties": {"city": {"type": "string"}},
|
||||
"required": ["city"],
|
||||
},
|
||||
}
|
||||
],
|
||||
)
|
||||
|
||||
with (
|
||||
patch.object(main, "pool", _FakePool(_FakeInstance(spy_client))),
|
||||
patch.object(main, "chat_guard", _FakeGuard()),
|
||||
patch.object(
|
||||
main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"})
|
||||
),
|
||||
patch.object(
|
||||
main.stats_collector, "record_chat", AsyncMock(return_value=None)
|
||||
),
|
||||
patch.object(main.settings, "api_keys", ["test-key"]),
|
||||
):
|
||||
await main.v1_messages(
|
||||
req,
|
||||
_make_request(
|
||||
"/v1/messages",
|
||||
headers={
|
||||
"x-api-key": "test-key",
|
||||
"anthropic-version": "2023-06-01",
|
||||
},
|
||||
),
|
||||
)
|
||||
|
||||
prompt = spy_client.last_complete_args[0]
|
||||
self.assertIn("I will check", prompt)
|
||||
self.assertIn('"tool": "fetch_weather"', prompt)
|
||||
self.assertIn('"city": "Hangzhou"', prompt)
|
||||
self.assertIn("Tool result:", prompt)
|
||||
self.assertIn('{"temperature":"22C"}', prompt)
|
||||
|
||||
async def test_anthropic_non_stream_synthesizes_tool_use_from_json_action_block(
|
||||
self,
|
||||
) -> None:
|
||||
@@ -2434,6 +2618,177 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
|
||||
self.assertEqual(detail["error"]["message"], "invalid upstream response")
|
||||
|
||||
|
||||
class CapabilitiesEndpointTests(unittest.IsolatedAsyncioTestCase):
|
||||
async def test_capabilities_payload_shape(self) -> None:
|
||||
with (
|
||||
patch.object(main.settings, "tool_forward_enabled", True),
|
||||
patch.object(main.settings, "tool_allowlist", ["lookup"]),
|
||||
patch.object(main.settings, "session_reuse_enabled", True),
|
||||
patch.object(main.settings, "session_cache_max_entries", 123),
|
||||
patch.object(main.settings, "session_cache_ttl_sec", 45.0),
|
||||
patch.object(main.settings, "instance_count", 2),
|
||||
patch.object(main.settings, "default_model", "org_auto"),
|
||||
patch.object(main.settings, "default_ask_mode", "chat"),
|
||||
patch.object(main.settings, "api_keys", ["test-key"]),
|
||||
patch.object(main.settings, "admin_token", "adm"),
|
||||
patch.object(main.settings, "metrics_public", False),
|
||||
):
|
||||
response = await main.capabilities()
|
||||
|
||||
self.assertEqual(response.status_code, 200)
|
||||
payload = json.loads(response.body)
|
||||
self.assertEqual(payload["service"], "lingma-openai-gateway")
|
||||
self.assertIn("protocols", payload)
|
||||
self.assertIn("features", payload)
|
||||
self.assertTrue(payload["protocols"]["openai"]["chat_completions"])
|
||||
self.assertTrue(payload["protocols"]["anthropic"]["messages"])
|
||||
self.assertTrue(payload["protocols"]["openai"]["request_tools_forwarded"])
|
||||
self.assertEqual(payload["features"]["tooling"]["allowlist"], ["lookup"])
|
||||
self.assertEqual(payload["features"]["pool"]["configured_instance_count"], 2)
|
||||
self.assertTrue(payload["features"]["auth"]["v1_requires_auth"])
|
||||
|
||||
async def test_v1_capabilities_auth_guard_requires_authentication(self) -> None:
|
||||
with patch.object(main.settings, "api_keys", ["test-key"]):
|
||||
with self.assertRaises(main.AnthropicAuthError) as ctx:
|
||||
main.anthropic_auth_guard(
|
||||
_make_request(
|
||||
"/v1/capabilities",
|
||||
headers={"anthropic-version": "2023-06-01"},
|
||||
)
|
||||
)
|
||||
|
||||
self.assertEqual(ctx.exception.status_code, 401)
|
||||
|
||||
async def test_v1_capabilities_returns_payload_with_auth(self) -> None:
|
||||
with (
|
||||
patch.object(main.settings, "api_keys", ["test-key"]),
|
||||
patch.object(main.settings, "tool_forward_enabled", False),
|
||||
):
|
||||
main.anthropic_auth_guard(
|
||||
_make_request(
|
||||
"/v1/capabilities",
|
||||
headers={"x-api-key": "test-key", "anthropic-version": "2023-06-01"},
|
||||
)
|
||||
)
|
||||
response = await main.v1_capabilities()
|
||||
|
||||
self.assertEqual(response.status_code, 200)
|
||||
payload = json.loads(response.body)
|
||||
self.assertFalse(payload["protocols"]["openai"]["request_tools_forwarded"])
|
||||
|
||||
|
||||
|
||||
class AdminIntrospectionEndpointTests(unittest.IsolatedAsyncioTestCase):
|
||||
async def test_internal_effective_config_requires_admin_token(self) -> None:
|
||||
with (
|
||||
patch.object(main.settings, "api_keys", ["api-key"]),
|
||||
patch.object(main.settings, "admin_token", "admin-secret"),
|
||||
):
|
||||
with self.assertRaises(main.HTTPException) as ctx:
|
||||
main.admin_auth_guard(
|
||||
_make_request(
|
||||
"/internal/effective-config",
|
||||
headers={"authorization": "Bearer wrong-token"},
|
||||
)
|
||||
)
|
||||
|
||||
self.assertEqual(ctx.exception.status_code, 401)
|
||||
|
||||
async def test_internal_effective_config_redacts_secrets(self) -> None:
|
||||
with (
|
||||
patch.object(main.settings, "api_keys", ["api-key-1", "api-key-2"]),
|
||||
patch.object(main.settings, "admin_token", "admin-secret"),
|
||||
patch.object(main.settings, "metrics_token", "metrics-secret"),
|
||||
patch.object(main.settings, "default_model", "org_auto"),
|
||||
patch.object(main.settings, "tool_forward_enabled", True),
|
||||
patch.object(main.settings, "session_reuse_enabled", True),
|
||||
patch.object(main.settings, "metrics_public", False),
|
||||
patch.object(main.settings, "auto_login_enabled", True),
|
||||
patch.object(
|
||||
main.settings,
|
||||
"accounts",
|
||||
[
|
||||
SimpleNamespace(
|
||||
username="user-a",
|
||||
password="pass-a",
|
||||
session_bundle_b64="bundle-a",
|
||||
session_bundle_file="/secrets/bundle-a.txt",
|
||||
)
|
||||
],
|
||||
),
|
||||
):
|
||||
main.admin_auth_guard(
|
||||
_make_request(
|
||||
"/internal/effective-config",
|
||||
headers={"authorization": "Bearer admin-secret"},
|
||||
)
|
||||
)
|
||||
response = await main.internal_effective_config()
|
||||
|
||||
self.assertEqual(response.status_code, 200)
|
||||
payload = json.loads(response.body)
|
||||
settings_payload = payload["settings"]
|
||||
self.assertEqual(settings_payload["api_keys"], ["***", "***"])
|
||||
self.assertEqual(settings_payload["admin_token"], "***")
|
||||
self.assertEqual(settings_payload["metrics_token"], "***")
|
||||
self.assertEqual(settings_payload["accounts"][0]["password"], "***")
|
||||
self.assertEqual(settings_payload["accounts"][0]["session_bundle_b64"], "***")
|
||||
self.assertEqual(settings_payload["accounts"][0]["username"], "user-a")
|
||||
self.assertEqual(
|
||||
settings_payload["accounts"][0]["session_bundle_file"],
|
||||
"/secrets/bundle-a.txt",
|
||||
)
|
||||
self.assertTrue(payload["feature_flags"]["tool_forward_enabled"])
|
||||
self.assertTrue(payload["feature_flags"]["session_reuse_enabled"])
|
||||
|
||||
async def test_internal_debug_requests_redacts_sensitive_fields(self) -> None:
|
||||
main._DEBUG_REQUEST_LOG.clear()
|
||||
main._record_debug_request(
|
||||
"openai",
|
||||
"/v1/chat/completions",
|
||||
{
|
||||
"api_key": "secret-key",
|
||||
"session_bundle": "bundle-value",
|
||||
"image_url": "data:image/png;base64,abcd",
|
||||
"tool_calls": [
|
||||
{
|
||||
"function": {
|
||||
"arguments": "x" * 3001,
|
||||
}
|
||||
}
|
||||
],
|
||||
},
|
||||
_make_request("/v1/chat/completions", headers={"x-request-id": "req-123"}),
|
||||
)
|
||||
|
||||
response = await main.internal_debug_requests(limit=10)
|
||||
|
||||
self.assertEqual(response.status_code, 200)
|
||||
payload = json.loads(response.body)
|
||||
self.assertEqual(payload["count"], 1)
|
||||
item = payload["items"][0]
|
||||
self.assertEqual(item["request_id"], "req-123")
|
||||
self.assertEqual(item["body"]["api_key"], "***")
|
||||
self.assertEqual(item["body"]["session_bundle"], "***")
|
||||
self.assertEqual(item["body"]["image_url"], "[redacted-data-url]")
|
||||
self.assertTrue(item["body"]["tool_calls"][0]["function"]["arguments"].endswith("... [truncated]"))
|
||||
|
||||
async def test_internal_debug_requests_requires_admin_token(self) -> None:
|
||||
with (
|
||||
patch.object(main.settings, "api_keys", ["api-key"]),
|
||||
patch.object(main.settings, "admin_token", "admin-secret"),
|
||||
):
|
||||
with self.assertRaises(main.HTTPException) as ctx:
|
||||
main.admin_auth_guard(
|
||||
_make_request(
|
||||
"/internal/debug/requests",
|
||||
headers={"authorization": "Bearer wrong-token"},
|
||||
)
|
||||
)
|
||||
|
||||
self.assertEqual(ctx.exception.status_code, 401)
|
||||
|
||||
|
||||
class SessionCacheToolFingerprintTests(unittest.TestCase):
|
||||
def test_build_key_changes_with_tool_config(self) -> None:
|
||||
from app.session_cache import SessionCache
|
||||
|
||||
Reference in New Issue
Block a user