feat: add capability and admin introspection endpoints
Expose capability discovery plus admin-only config and request inspection endpoints so clients and operators can understand gateway behavior without reading code. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
29
README.md
29
README.md
@@ -4,6 +4,8 @@
|
|||||||
|
|
||||||
- OpenAI:`/v1/models`、`/v1/chat/completions`(含 stream)
|
- OpenAI:`/v1/models`、`/v1/chat/completions`(含 stream)
|
||||||
- Anthropic:`/v1/messages`、`/v1/messages/count_tokens`(含 stream)
|
- Anthropic:`/v1/messages`、`/v1/messages/count_tokens`(含 stream)
|
||||||
|
- 能力探测:`/capabilities`、`/v1/capabilities`
|
||||||
|
- 内省端点:`/internal/effective-config`、`/internal/debug/requests`
|
||||||
- 内置:多实例池、会话复用、Prometheus 指标、登录态 bundle 注入
|
- 内置:多实例池、会话复用、Prometheus 指标、登录态 bundle 注入
|
||||||
- 工具事件桥接:Lingma 上游返回 `tool` 事件时,网关会输出为 OpenAI `tool_calls`(stream/non-stream)和 Anthropic `tool_use` / `tool_result`(stream/non-stream);请求侧 `tools` / `tool_choice` 仅在 `TOOL_FORWARD_ENABLED=true` 时透传(默认开启,可显式关闭)
|
- 工具事件桥接:Lingma 上游返回 `tool` 事件时,网关会输出为 OpenAI `tool_calls`(stream/non-stream)和 Anthropic `tool_use` / `tool_result`(stream/non-stream);请求侧 `tools` / `tool_choice` 仅在 `TOOL_FORWARD_ENABLED=true` 时透传(默认开启,可显式关闭)
|
||||||
- 工具模拟回退:当 Lingma 未稳定外显原生 `tool/*` 事件时,网关会把注入后的 `json action` / `#Tool Call` 等动作文本归一化为 OpenAI `tool_calls`,并支持 tool result continuation
|
- 工具模拟回退:当 Lingma 未稳定外显原生 `tool/*` 事件时,网关会把注入后的 `json action` / `#Tool Call` 等动作文本归一化为 OpenAI `tool_calls`,并支持 tool result continuation
|
||||||
@@ -56,6 +58,7 @@ API_KEY=$(grep '^API_KEYS=' .env | cut -d= -f2 | cut -d, -f1)
|
|||||||
curl -s "http://127.0.0.1:${PORT}/healthz"
|
curl -s "http://127.0.0.1:${PORT}/healthz"
|
||||||
curl -s "http://127.0.0.1:${PORT}/v1/models" \
|
curl -s "http://127.0.0.1:${PORT}/v1/models" \
|
||||||
-H "Authorization: Bearer ${API_KEY}"
|
-H "Authorization: Bearer ${API_KEY}"
|
||||||
|
curl -s "http://127.0.0.1:${PORT}/capabilities"
|
||||||
```
|
```
|
||||||
|
|
||||||
---
|
---
|
||||||
@@ -172,6 +175,32 @@ curl -s "http://127.0.0.1:${PORT}/v1/messages/count_tokens" \
|
|||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### 能力探测
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -s "http://127.0.0.1:${PORT}/capabilities"
|
||||||
|
|
||||||
|
curl -s "http://127.0.0.1:${PORT}/v1/capabilities" \
|
||||||
|
-H "x-api-key: ${API_KEY}" \
|
||||||
|
-H "anthropic-version: 2023-06-01"
|
||||||
|
```
|
||||||
|
|
||||||
|
### 内省端点(admin)
|
||||||
|
|
||||||
|
如果配置了 `ADMIN_TOKEN`,以下端点需要使用该 token;否则会回退复用 `API_KEYS`。
|
||||||
|
|
||||||
|
```bash
|
||||||
|
ADMIN_TOKEN=${ADMIN_TOKEN:-$API_KEY}
|
||||||
|
|
||||||
|
curl -s "http://127.0.0.1:${PORT}/internal/effective-config" \
|
||||||
|
-H "Authorization: Bearer ${ADMIN_TOKEN}"
|
||||||
|
|
||||||
|
curl -s "http://127.0.0.1:${PORT}/internal/debug/requests?limit=5" \
|
||||||
|
-H "Authorization: Bearer ${ADMIN_TOKEN}"
|
||||||
|
```
|
||||||
|
|
||||||
|
> `internal/debug/requests` 会对 token、session bundle、data URL 图片和超长工具参数做脱敏/截断。
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## 部署与更新
|
## 部署与更新
|
||||||
|
|||||||
368
app/main.py
368
app/main.py
@@ -5,6 +5,7 @@ import hashlib
|
|||||||
import json
|
import json
|
||||||
import time
|
import time
|
||||||
import uuid
|
import uuid
|
||||||
|
from collections import deque
|
||||||
from contextlib import asynccontextmanager
|
from contextlib import asynccontextmanager
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
@@ -15,6 +16,7 @@ from .anthropic_schema import (
|
|||||||
AnthropicMessagesRequest,
|
AnthropicMessagesRequest,
|
||||||
affinity_key_for_anthropic,
|
affinity_key_for_anthropic,
|
||||||
anthropic_to_internal_messages,
|
anthropic_to_internal_messages,
|
||||||
|
flatten_anthropic_content,
|
||||||
)
|
)
|
||||||
from .auth import (
|
from .auth import (
|
||||||
AnthropicAuthError,
|
AnthropicAuthError,
|
||||||
@@ -112,6 +114,8 @@ STREAMING_RESPONSE_HEADERS = {
|
|||||||
"Connection": "keep-alive",
|
"Connection": "keep-alive",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
_DEBUG_REQUEST_LOG: deque[dict[str, Any]] = deque(maxlen=100)
|
||||||
|
|
||||||
|
|
||||||
def _require_pool() -> LingmaPool:
|
def _require_pool() -> LingmaPool:
|
||||||
if pool is None:
|
if pool is None:
|
||||||
@@ -249,6 +253,63 @@ def _log_auth_posture() -> None:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _safe_setting_value(key: str, value: Any) -> Any:
|
||||||
|
key_upper = key.upper()
|
||||||
|
if any(
|
||||||
|
marker in key_upper
|
||||||
|
for marker in {"KEY", "TOKEN", "PASSWORD", "SECRET", "BUNDLE"}
|
||||||
|
):
|
||||||
|
if isinstance(value, list):
|
||||||
|
return ["***" for _ in value]
|
||||||
|
return "***"
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
def _redact_debug_value(path: tuple[str, ...], value: Any) -> Any:
|
||||||
|
if isinstance(value, dict):
|
||||||
|
return {
|
||||||
|
k: _redact_debug_value(path + (str(k).lower(),), v)
|
||||||
|
for k, v in value.items()
|
||||||
|
}
|
||||||
|
if isinstance(value, list):
|
||||||
|
return [_redact_debug_value(path + ("[]",), item) for item in value]
|
||||||
|
if isinstance(value, str):
|
||||||
|
lowered_path = "/".join(path)
|
||||||
|
if any(marker in lowered_path for marker in ("authorization", "x-api-key", "api_key", "token", "password", "secret", "session_bundle")):
|
||||||
|
return "***"
|
||||||
|
if value.startswith("data:"):
|
||||||
|
return "[redacted-data-url]"
|
||||||
|
if "session bundle" in value.lower():
|
||||||
|
return "[redacted-session-bundle]"
|
||||||
|
if any(part in {"args", "arguments"} for part in path) and len(value) > 2048:
|
||||||
|
return value[:1024] + "... [truncated]"
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
def _record_debug_request(protocol: str, path: str, body: dict[str, Any], request: Request) -> None:
|
||||||
|
_DEBUG_REQUEST_LOG.appendleft(
|
||||||
|
{
|
||||||
|
"timestamp": int(time.time()),
|
||||||
|
"protocol": protocol,
|
||||||
|
"path": path,
|
||||||
|
"request_id": request.headers.get("x-request-id", ""),
|
||||||
|
"body": _redact_debug_value((), body),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/internal/debug/requests", dependencies=[Depends(admin_auth_guard)])
|
||||||
|
async def internal_debug_requests(limit: int = 20):
|
||||||
|
safe_limit = min(max(limit, 1), 100)
|
||||||
|
return JSONResponse(
|
||||||
|
content={
|
||||||
|
"ok": True,
|
||||||
|
"count": min(safe_limit, len(_DEBUG_REQUEST_LOG)),
|
||||||
|
"items": list(_DEBUG_REQUEST_LOG)[:safe_limit],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@app.get("/healthz")
|
@app.get("/healthz")
|
||||||
async def healthz():
|
async def healthz():
|
||||||
if pool is None:
|
if pool is None:
|
||||||
@@ -267,6 +328,62 @@ async def healthz():
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _capabilities_payload() -> dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"service": "lingma-openai-gateway",
|
||||||
|
"version": app.version,
|
||||||
|
"protocols": {
|
||||||
|
"openai": {
|
||||||
|
"models": True,
|
||||||
|
"chat_completions": True,
|
||||||
|
"responses": True,
|
||||||
|
"streaming": True,
|
||||||
|
"response_tool_calls": True,
|
||||||
|
"request_tools_forwarded": settings.tool_forward_enabled,
|
||||||
|
},
|
||||||
|
"anthropic": {
|
||||||
|
"messages": True,
|
||||||
|
"count_tokens": True,
|
||||||
|
"streaming": True,
|
||||||
|
"response_tool_use": True,
|
||||||
|
"request_tools_forwarded": settings.tool_forward_enabled,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"features": {
|
||||||
|
"session_reuse": {
|
||||||
|
"enabled": settings.session_reuse_enabled,
|
||||||
|
"cache_max_entries": settings.session_cache_max_entries,
|
||||||
|
"cache_ttl_sec": settings.session_cache_ttl_sec,
|
||||||
|
},
|
||||||
|
"tooling": {
|
||||||
|
"forward_enabled": settings.tool_forward_enabled,
|
||||||
|
"allowlist": settings.tool_allowlist,
|
||||||
|
"emulation_bridge_enabled": True,
|
||||||
|
},
|
||||||
|
"pool": {
|
||||||
|
"configured_instance_count": settings.instance_count,
|
||||||
|
"default_model": settings.default_model,
|
||||||
|
"default_ask_mode": settings.default_ask_mode,
|
||||||
|
},
|
||||||
|
"auth": {
|
||||||
|
"v1_requires_auth": bool(settings.api_keys),
|
||||||
|
"admin_token_configured": bool(settings.admin_token),
|
||||||
|
"metrics_public": settings.metrics_public,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/capabilities")
|
||||||
|
async def capabilities():
|
||||||
|
return JSONResponse(content=_capabilities_payload())
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/v1/capabilities", dependencies=[Depends(anthropic_auth_guard)])
|
||||||
|
async def v1_capabilities():
|
||||||
|
return JSONResponse(content=_capabilities_payload())
|
||||||
|
|
||||||
|
|
||||||
async def _ensure_instance_logged_in(inst: PoolInstance) -> dict:
|
async def _ensure_instance_logged_in(inst: PoolInstance) -> dict:
|
||||||
client = inst.client
|
client = inst.client
|
||||||
auto_login = inst.auto_login
|
auto_login = inst.auto_login
|
||||||
@@ -433,6 +550,75 @@ def _messages_to_prompt(messages: list[dict]) -> str:
|
|||||||
return "\n".join(parts).strip()
|
return "\n".join(parts).strip()
|
||||||
|
|
||||||
|
|
||||||
|
def _assistant_tool_calls_to_emulation_text(tool_calls: Any) -> str:
|
||||||
|
if not isinstance(tool_calls, list):
|
||||||
|
return ""
|
||||||
|
blocks: list[str] = []
|
||||||
|
for item in tool_calls:
|
||||||
|
if not isinstance(item, dict):
|
||||||
|
continue
|
||||||
|
fn = item.get("function") if isinstance(item.get("function"), dict) else None
|
||||||
|
name = str((fn or {}).get("name") or item.get("name") or "").strip()
|
||||||
|
if not name:
|
||||||
|
continue
|
||||||
|
arguments = (fn or {}).get("arguments")
|
||||||
|
if isinstance(arguments, str):
|
||||||
|
try:
|
||||||
|
arguments = json.loads(arguments)
|
||||||
|
except Exception:
|
||||||
|
arguments = {"raw": arguments}
|
||||||
|
if not isinstance(arguments, dict):
|
||||||
|
arguments = {}
|
||||||
|
blocks.append(
|
||||||
|
"```json action\n"
|
||||||
|
+ json.dumps(
|
||||||
|
{"tool": name, "parameters": arguments}, ensure_ascii=False, indent=2
|
||||||
|
)
|
||||||
|
+ "\n```"
|
||||||
|
)
|
||||||
|
return "\n\n".join(blocks)
|
||||||
|
|
||||||
|
|
||||||
|
def _tool_action_block(name: str, arguments: dict[str, Any]) -> str:
|
||||||
|
return (
|
||||||
|
"```json action\n"
|
||||||
|
+ json.dumps(
|
||||||
|
{"tool": name, "parameters": arguments}, ensure_ascii=False, indent=2
|
||||||
|
)
|
||||||
|
+ "\n```"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _anthropic_flattened_tool_history_to_emulation_text(text: str) -> str:
|
||||||
|
if not text:
|
||||||
|
return ""
|
||||||
|
out: list[str] = []
|
||||||
|
for line in text.splitlines():
|
||||||
|
stripped = line.strip()
|
||||||
|
if stripped.startswith("[tool_use]"):
|
||||||
|
raw = stripped[len("[tool_use]") :].strip()
|
||||||
|
try:
|
||||||
|
payload = json.loads(raw)
|
||||||
|
except Exception:
|
||||||
|
out.append(line)
|
||||||
|
continue
|
||||||
|
if not isinstance(payload, dict):
|
||||||
|
out.append(line)
|
||||||
|
continue
|
||||||
|
name = str(payload.get("name") or "").strip()
|
||||||
|
arguments = payload.get("input")
|
||||||
|
if name and isinstance(arguments, dict):
|
||||||
|
out.append(_tool_action_block(name, arguments))
|
||||||
|
else:
|
||||||
|
out.append(line)
|
||||||
|
continue
|
||||||
|
if stripped.startswith("[tool_result]"):
|
||||||
|
out.append(action_output_prompt(None, stripped[len("[tool_result]") :].strip()))
|
||||||
|
continue
|
||||||
|
out.append(line)
|
||||||
|
return "\n".join(part for part in out if part).strip()
|
||||||
|
|
||||||
|
|
||||||
def _messages_to_emulation_prompt(
|
def _messages_to_emulation_prompt(
|
||||||
messages: list[dict[str, Any]],
|
messages: list[dict[str, Any]],
|
||||||
*,
|
*,
|
||||||
@@ -446,6 +632,10 @@ def _messages_to_emulation_prompt(
|
|||||||
if role in {"system", "developer"}:
|
if role in {"system", "developer"}:
|
||||||
continue
|
continue
|
||||||
text = flatten_content(message.get("content"))
|
text = flatten_content(message.get("content"))
|
||||||
|
if role == "assistant" and message.get("tool_calls"):
|
||||||
|
projected = _assistant_tool_calls_to_emulation_text(message.get("tool_calls"))
|
||||||
|
if projected:
|
||||||
|
text = "\n\n".join(part for part in [text, projected] if part)
|
||||||
if role == "tool":
|
if role == "tool":
|
||||||
text = action_output_prompt(message.get("tool_call_id"), text)
|
text = action_output_prompt(message.get("tool_call_id"), text)
|
||||||
role = "user"
|
role = "user"
|
||||||
@@ -472,6 +662,22 @@ def _messages_to_emulation_prompt(
|
|||||||
return "\n\n".join(parts).strip()
|
return "\n\n".join(parts).strip()
|
||||||
|
|
||||||
|
|
||||||
|
def _effective_tool_config_for_emulation(
|
||||||
|
tool_config: dict[str, Any] | None,
|
||||||
|
*,
|
||||||
|
use_emulation: bool,
|
||||||
|
) -> dict[str, Any] | None:
|
||||||
|
if use_emulation:
|
||||||
|
return None
|
||||||
|
return tool_config
|
||||||
|
|
||||||
|
|
||||||
|
def _emulation_tools(raw_tools: list[dict[str, Any]] | None, tool_config: dict[str, Any] | None) -> list[dict[str, Any]] | None:
|
||||||
|
if isinstance(tool_config, dict) and isinstance(tool_config.get("tools"), list):
|
||||||
|
return tool_config.get("tools")
|
||||||
|
return raw_tools
|
||||||
|
|
||||||
|
|
||||||
def _anthropic_messages_to_emulation_prompt(
|
def _anthropic_messages_to_emulation_prompt(
|
||||||
messages: list[dict[str, Any]],
|
messages: list[dict[str, Any]],
|
||||||
*,
|
*,
|
||||||
@@ -483,6 +689,10 @@ def _anthropic_messages_to_emulation_prompt(
|
|||||||
for message in messages:
|
for message in messages:
|
||||||
role = str(message.get("role") or "").strip().lower()
|
role = str(message.get("role") or "").strip().lower()
|
||||||
text = str(message.get("content") or "").strip()
|
text = str(message.get("content") or "").strip()
|
||||||
|
if role == "assistant" and "[tool_use]" in text:
|
||||||
|
text = _anthropic_flattened_tool_history_to_emulation_text(text)
|
||||||
|
elif role == "user" and "[tool_result]" in text:
|
||||||
|
text = _anthropic_flattened_tool_history_to_emulation_text(text)
|
||||||
if role == "tool":
|
if role == "tool":
|
||||||
text = action_output_prompt(message.get("tool_call_id"), text)
|
text = action_output_prompt(message.get("tool_call_id"), text)
|
||||||
role = "user"
|
role = "user"
|
||||||
@@ -575,6 +785,7 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
|
|||||||
p = _require_pool()
|
p = _require_pool()
|
||||||
|
|
||||||
messages_dump = [m.model_dump() for m in req.messages]
|
messages_dump = [m.model_dump() for m in req.messages]
|
||||||
|
_record_debug_request("openai", "/v1/chat/completions", req.model_dump(mode="json"), request)
|
||||||
api_key = _extract_api_key(request) or "-"
|
api_key = _extract_api_key(request) or "-"
|
||||||
|
|
||||||
# ------------------------------------------------------------- session reuse
|
# ------------------------------------------------------------- session reuse
|
||||||
@@ -617,9 +828,11 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
|
|||||||
is_reply = execution.is_reply
|
is_reply = execution.is_reply
|
||||||
|
|
||||||
include_usage = _include_usage(req.stream_options)
|
include_usage = _include_usage(req.stream_options)
|
||||||
em_tools = _em_extract_openai_tools(req.tools)
|
emulation_tools = _emulation_tools(req.tools, tool_config)
|
||||||
|
em_tools = _em_extract_openai_tools(emulation_tools)
|
||||||
em_choice = _em_extract_openai_tool_choice(req.tool_choice)
|
em_choice = _em_extract_openai_tool_choice(req.tool_choice)
|
||||||
if _em_has_tool_request(em_tools, em_choice):
|
use_emulation = has_tooling_context
|
||||||
|
if use_emulation:
|
||||||
system_parts = [
|
system_parts = [
|
||||||
flatten_content(m.content)
|
flatten_content(m.content)
|
||||||
for m in req.messages
|
for m in req.messages
|
||||||
@@ -628,9 +841,14 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
|
|||||||
prompt = _messages_to_emulation_prompt(
|
prompt = _messages_to_emulation_prompt(
|
||||||
messages_dump,
|
messages_dump,
|
||||||
system_text="\n\n".join(system_parts),
|
system_text="\n\n".join(system_parts),
|
||||||
tools=req.tools,
|
tools=emulation_tools,
|
||||||
tool_choice=req.tool_choice,
|
tool_choice=req.tool_choice,
|
||||||
)
|
)
|
||||||
|
execution.prompt = prompt
|
||||||
|
effective_tool_config = _effective_tool_config_for_emulation(
|
||||||
|
tool_config,
|
||||||
|
use_emulation=use_emulation,
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
started = await start_execution(
|
started = await start_execution(
|
||||||
@@ -708,7 +926,7 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
|
|||||||
ask_mode,
|
ask_mode,
|
||||||
session_id=cached_session_id,
|
session_id=cached_session_id,
|
||||||
is_reply=is_reply,
|
is_reply=is_reply,
|
||||||
tool_config=tool_config,
|
tool_config=effective_tool_config,
|
||||||
out_meta=_meta,
|
out_meta=_meta,
|
||||||
):
|
):
|
||||||
if _stream_event_type(chunk) == "tool":
|
if _stream_event_type(chunk) == "tool":
|
||||||
@@ -763,6 +981,8 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
|
|||||||
continue
|
continue
|
||||||
buffered_text_parts.append(text)
|
buffered_text_parts.append(text)
|
||||||
completion_tokens_holder["n"] += estimate_tokens(text)
|
completion_tokens_holder["n"] += estimate_tokens(text)
|
||||||
|
if use_emulation:
|
||||||
|
continue
|
||||||
|
|
||||||
full_text = "".join(buffered_text_parts)
|
full_text = "".join(buffered_text_parts)
|
||||||
if req.tools:
|
if req.tools:
|
||||||
@@ -855,9 +1075,6 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
|
|||||||
buffered_text_parts.clear()
|
buffered_text_parts.clear()
|
||||||
yield f"data: {json.dumps(payload, ensure_ascii=False)}\n\n"
|
yield f"data: {json.dumps(payload, ensure_ascii=False)}\n\n"
|
||||||
|
|
||||||
if buffered_text_parts and forced_tool_name and saw_tool_call:
|
|
||||||
buffered_text_parts.clear()
|
|
||||||
|
|
||||||
if buffered_text_parts and req.tools and not saw_tool_call:
|
if buffered_text_parts and req.tools and not saw_tool_call:
|
||||||
merged_text = "".join(buffered_text_parts)
|
merged_text = "".join(buffered_text_parts)
|
||||||
inferred = _infer_tool_event_from_declared_tools(
|
inferred = _infer_tool_event_from_declared_tools(
|
||||||
@@ -924,6 +1141,11 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
|
|||||||
yield f"data: {json.dumps(payload, ensure_ascii=False)}\n\n"
|
yield f"data: {json.dumps(payload, ensure_ascii=False)}\n\n"
|
||||||
buffered_text_parts = [remaining] if remaining else []
|
buffered_text_parts = [remaining] if remaining else []
|
||||||
|
|
||||||
|
if buffered_text_parts and saw_tool_call:
|
||||||
|
text_to_yield = "".join(buffered_text_parts)
|
||||||
|
buffered_text_parts.clear()
|
||||||
|
yield _text_payload(text_to_yield)
|
||||||
|
|
||||||
done_payload = {
|
done_payload = {
|
||||||
"id": completion_id,
|
"id": completion_id,
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@@ -996,7 +1218,7 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
|
|||||||
protocol="chat",
|
protocol="chat",
|
||||||
execution=execution,
|
execution=execution,
|
||||||
prompt_tokens=prompt_tokens,
|
prompt_tokens=prompt_tokens,
|
||||||
tool_config=tool_config,
|
tool_config=effective_tool_config,
|
||||||
logger=logger,
|
logger=logger,
|
||||||
stats_collector=stats_collector,
|
stats_collector=stats_collector,
|
||||||
session_cache=session_cache,
|
session_cache=session_cache,
|
||||||
@@ -1095,7 +1317,7 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
|
|||||||
ask_mode,
|
ask_mode,
|
||||||
session_id=None,
|
session_id=None,
|
||||||
is_reply=False,
|
is_reply=False,
|
||||||
tool_config=tool_config,
|
tool_config=effective_tool_config,
|
||||||
)
|
)
|
||||||
retry_text = retry_result.get("text") or ""
|
retry_text = retry_result.get("text") or ""
|
||||||
parsed_calls, remaining = parse_action_blocks(retry_text, em_tools)
|
parsed_calls, remaining = parse_action_blocks(retry_text, em_tools)
|
||||||
@@ -1227,6 +1449,7 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
|
|||||||
)
|
)
|
||||||
|
|
||||||
messages_dump = anthropic_to_internal_messages(req)
|
messages_dump = anthropic_to_internal_messages(req)
|
||||||
|
_record_debug_request("anthropic", "/v1/messages", req.model_dump(mode="json"), request)
|
||||||
# Prefer the auth token actually accepted so session-cache bucketing is
|
# Prefer the auth token actually accepted so session-cache bucketing is
|
||||||
# consistent regardless of which auth header style the caller used.
|
# consistent regardless of which auth header style the caller used.
|
||||||
api_key = (
|
api_key = (
|
||||||
@@ -1284,16 +1507,23 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
|
|||||||
model = execution.model
|
model = execution.model
|
||||||
prompt = execution.prompt
|
prompt = execution.prompt
|
||||||
is_reply = execution.is_reply
|
is_reply = execution.is_reply
|
||||||
em_anthropic_tools = _em_extract_anthropic_tools(req.tools)
|
emulation_tools = _emulation_tools(req.tools, tool_config)
|
||||||
|
em_anthropic_tools = _em_extract_anthropic_tools(emulation_tools)
|
||||||
em_anthropic_choice = _em_extract_anthropic_tool_choice(req.tool_choice)
|
em_anthropic_choice = _em_extract_anthropic_tool_choice(req.tool_choice)
|
||||||
if _em_has_tool_request(em_anthropic_tools, em_anthropic_choice):
|
use_emulation = has_tooling_context
|
||||||
|
if use_emulation:
|
||||||
system_text = flatten_anthropic_content(req.system) if req.system else ""
|
system_text = flatten_anthropic_content(req.system) if req.system else ""
|
||||||
prompt = _anthropic_messages_to_emulation_prompt(
|
prompt = _anthropic_messages_to_emulation_prompt(
|
||||||
messages_dump,
|
messages_dump,
|
||||||
system_text=system_text,
|
system_text=system_text,
|
||||||
tools=req.tools,
|
tools=emulation_tools,
|
||||||
tool_choice=req.tool_choice,
|
tool_choice=req.tool_choice,
|
||||||
)
|
)
|
||||||
|
execution.prompt = prompt
|
||||||
|
effective_tool_config = _effective_tool_config_for_emulation(
|
||||||
|
tool_config,
|
||||||
|
use_emulation=use_emulation,
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
started = await start_execution(
|
started = await start_execution(
|
||||||
@@ -1372,7 +1602,7 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
|
|||||||
ask_mode,
|
ask_mode,
|
||||||
session_id=cached_session_id,
|
session_id=cached_session_id,
|
||||||
is_reply=is_reply,
|
is_reply=is_reply,
|
||||||
tool_config=tool_config,
|
tool_config=effective_tool_config,
|
||||||
out_meta=_meta,
|
out_meta=_meta,
|
||||||
):
|
):
|
||||||
if _stream_event_type(chunk) == "tool":
|
if _stream_event_type(chunk) == "tool":
|
||||||
@@ -1703,7 +1933,7 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
|
|||||||
protocol="anthropic",
|
protocol="anthropic",
|
||||||
execution=execution,
|
execution=execution,
|
||||||
prompt_tokens=prompt_tokens,
|
prompt_tokens=prompt_tokens,
|
||||||
tool_config=tool_config,
|
tool_config=effective_tool_config,
|
||||||
logger=logger,
|
logger=logger,
|
||||||
stats_collector=stats_collector,
|
stats_collector=stats_collector,
|
||||||
session_cache=session_cache,
|
session_cache=session_cache,
|
||||||
@@ -1757,10 +1987,8 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
|
|||||||
text = remaining
|
text = remaining
|
||||||
|
|
||||||
if not saw_tool_event and em_anthropic_tools:
|
if not saw_tool_event and em_anthropic_tools:
|
||||||
inferred_call = infer_declared_tool_call_from_text(text, em_anthropic_tools)
|
inferred_calls = infer_tool_calls_from_text(text, em_anthropic_tools)
|
||||||
if inferred_call is None:
|
inferred_call = inferred_calls[0] if inferred_calls else None
|
||||||
inferred_calls = infer_tool_calls_from_text(text, em_anthropic_tools)
|
|
||||||
inferred_call = inferred_calls[0] if inferred_calls else None
|
|
||||||
if inferred_call is not None:
|
if inferred_call is not None:
|
||||||
content_blocks = [
|
content_blocks = [
|
||||||
{
|
{
|
||||||
@@ -1774,7 +2002,7 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
|
|||||||
saw_pending_tool_use = True
|
saw_pending_tool_use = True
|
||||||
text = ""
|
text = ""
|
||||||
|
|
||||||
if not saw_tool_event and em_anthropic_tools:
|
if not saw_tool_event and em_anthropic_tools and not text.strip():
|
||||||
retry_prompt = f"{prompt}\n\n{force_tooling_prompt(em_anthropic_choice)}"
|
retry_prompt = f"{prompt}\n\n{force_tooling_prompt(em_anthropic_choice)}"
|
||||||
retry_result = await inst.client.chat_complete(
|
retry_result = await inst.client.chat_complete(
|
||||||
retry_prompt,
|
retry_prompt,
|
||||||
@@ -1782,53 +2010,7 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
|
|||||||
ask_mode,
|
ask_mode,
|
||||||
session_id=None,
|
session_id=None,
|
||||||
is_reply=False,
|
is_reply=False,
|
||||||
tool_config=tool_config,
|
tool_config=effective_tool_config,
|
||||||
)
|
|
||||||
retry_text = retry_result.get("text") or ""
|
|
||||||
parsed_calls, remaining = parse_action_blocks(retry_text, em_anthropic_tools)
|
|
||||||
if parsed_calls:
|
|
||||||
content_blocks = []
|
|
||||||
if remaining:
|
|
||||||
content_blocks.append({"type": "text", "text": remaining})
|
|
||||||
for call in parsed_calls:
|
|
||||||
content_blocks.append(
|
|
||||||
{
|
|
||||||
"type": "tool_use",
|
|
||||||
"id": call.id,
|
|
||||||
"name": call.name,
|
|
||||||
"input": call.arguments,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
saw_tool_event = True
|
|
||||||
saw_pending_tool_use = True
|
|
||||||
text = remaining
|
|
||||||
else:
|
|
||||||
inferred_call = infer_declared_tool_call_from_text(retry_text, em_anthropic_tools)
|
|
||||||
if inferred_call is None:
|
|
||||||
inferred_calls = infer_tool_calls_from_text(retry_text, em_anthropic_tools)
|
|
||||||
inferred_call = inferred_calls[0] if inferred_calls else None
|
|
||||||
if inferred_call is not None:
|
|
||||||
content_blocks = [
|
|
||||||
{
|
|
||||||
"type": "tool_use",
|
|
||||||
"id": inferred_call.id,
|
|
||||||
"name": inferred_call.name,
|
|
||||||
"input": inferred_call.arguments,
|
|
||||||
}
|
|
||||||
]
|
|
||||||
saw_tool_event = True
|
|
||||||
saw_pending_tool_use = True
|
|
||||||
text = ""
|
|
||||||
|
|
||||||
if not saw_tool_event and em_anthropic_tools and text.strip():
|
|
||||||
retry_prompt = f"{prompt}\n\n{force_tooling_prompt(em_anthropic_choice)}"
|
|
||||||
retry_result = await inst.client.chat_complete(
|
|
||||||
retry_prompt,
|
|
||||||
model,
|
|
||||||
ask_mode,
|
|
||||||
session_id=None,
|
|
||||||
is_reply=False,
|
|
||||||
tool_config=tool_config,
|
|
||||||
)
|
)
|
||||||
retry_text = retry_result.get("text") or ""
|
retry_text = retry_result.get("text") or ""
|
||||||
parsed_calls, remaining = parse_action_blocks(retry_text, em_anthropic_tools)
|
parsed_calls, remaining = parse_action_blocks(retry_text, em_anthropic_tools)
|
||||||
@@ -2090,6 +2272,60 @@ async def internal_stats():
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/internal/effective-config", dependencies=[Depends(admin_auth_guard)])
|
||||||
|
async def internal_effective_config():
|
||||||
|
cfg = settings
|
||||||
|
return JSONResponse(content={
|
||||||
|
"ok": True,
|
||||||
|
"settings": {
|
||||||
|
"host": cfg.host,
|
||||||
|
"port": cfg.port,
|
||||||
|
"api_keys": _safe_setting_value("api_keys", cfg.api_keys),
|
||||||
|
"metrics_token": _safe_setting_value("metrics_token", cfg.metrics_token),
|
||||||
|
"admin_token": _safe_setting_value("admin_token", cfg.admin_token),
|
||||||
|
"metrics_public": cfg.metrics_public,
|
||||||
|
"log_level": cfg.log_level,
|
||||||
|
"gateway_max_in_flight": cfg.gateway_max_in_flight,
|
||||||
|
"gateway_queue_timeout_sec": cfg.gateway_queue_timeout_sec,
|
||||||
|
"lingma_bin": cfg.lingma_bin,
|
||||||
|
"lingma_work_dir": cfg.lingma_work_dir,
|
||||||
|
"lingma_socket_port": cfg.lingma_socket_port,
|
||||||
|
"lingma_startup_timeout": cfg.lingma_startup_timeout,
|
||||||
|
"lingma_rpc_timeout": cfg.lingma_rpc_timeout,
|
||||||
|
"default_model": cfg.default_model,
|
||||||
|
"default_ask_mode": cfg.default_ask_mode,
|
||||||
|
"dedicated_domain_url": cfg.dedicated_domain_url,
|
||||||
|
"auto_login_enabled": cfg.auto_login_enabled,
|
||||||
|
"auto_login_headless": cfg.auto_login_headless,
|
||||||
|
"auto_login_timeout": cfg.auto_login_timeout,
|
||||||
|
"auto_login_max_retry": cfg.auto_login_max_retry,
|
||||||
|
"instance_count": cfg.instance_count,
|
||||||
|
"session_reuse_enabled": cfg.session_reuse_enabled,
|
||||||
|
"session_cache_max_entries": cfg.session_cache_max_entries,
|
||||||
|
"session_cache_ttl_sec": cfg.session_cache_ttl_sec,
|
||||||
|
"tool_forward_enabled": cfg.tool_forward_enabled,
|
||||||
|
"tool_allowlist": cfg.tool_allowlist,
|
||||||
|
"accounts": [
|
||||||
|
{
|
||||||
|
"username": account.username,
|
||||||
|
"password": _safe_setting_value("password", account.password),
|
||||||
|
"session_bundle_b64": _safe_setting_value(
|
||||||
|
"session_bundle_b64", account.session_bundle_b64
|
||||||
|
),
|
||||||
|
"session_bundle_file": account.session_bundle_file,
|
||||||
|
}
|
||||||
|
for account in cfg.accounts
|
||||||
|
],
|
||||||
|
},
|
||||||
|
"feature_flags": {
|
||||||
|
"tool_forward_enabled": cfg.tool_forward_enabled,
|
||||||
|
"session_reuse_enabled": cfg.session_reuse_enabled,
|
||||||
|
"metrics_public": cfg.metrics_public,
|
||||||
|
"auto_login_enabled": cfg.auto_login_enabled,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
@app.get("/metrics", dependencies=[Depends(metrics_auth_guard)])
|
@app.get("/metrics", dependencies=[Depends(metrics_auth_guard)])
|
||||||
async def metrics():
|
async def metrics():
|
||||||
base = await stats_collector.prometheus_text()
|
base = await stats_collector.prometheus_text()
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
fastapi==0.115.0
|
fastapi==0.115.0
|
||||||
|
starlette==0.38.6
|
||||||
uvicorn[standard]==0.30.6
|
uvicorn[standard]==0.30.6
|
||||||
websockets==13.1
|
websockets==13.1
|
||||||
pydantic==2.9.2
|
pydantic==2.9.2
|
||||||
|
|||||||
@@ -1,14 +1,37 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import sys
|
||||||
|
import types
|
||||||
import unittest
|
import unittest
|
||||||
|
from unittest.mock import patch
|
||||||
|
|
||||||
from fastapi import HTTPException
|
from fastapi import HTTPException
|
||||||
|
from fastapi.testclient import TestClient
|
||||||
from starlette.requests import Request
|
from starlette.requests import Request
|
||||||
|
|
||||||
from app.auth import AnthropicAuthError, require_anthropic_key, require_bearer, require_metrics_access
|
from app.auth import AnthropicAuthError, require_anthropic_key, require_bearer, require_metrics_access
|
||||||
from app.concurrency import BackpressureRejected, InFlightGuard
|
from app.concurrency import BackpressureRejected, InFlightGuard
|
||||||
|
|
||||||
|
_playwright = types.ModuleType("playwright")
|
||||||
|
_playwright_async = types.ModuleType("playwright.async_api")
|
||||||
|
|
||||||
|
|
||||||
|
class _StubPlaywrightTimeoutError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
async def _stub_async_playwright():
|
||||||
|
raise RuntimeError("playwright is stubbed in unit tests")
|
||||||
|
|
||||||
|
|
||||||
|
_playwright_async.TimeoutError = _StubPlaywrightTimeoutError
|
||||||
|
_playwright_async.async_playwright = _stub_async_playwright
|
||||||
|
sys.modules.setdefault("playwright", _playwright)
|
||||||
|
sys.modules.setdefault("playwright.async_api", _playwright_async)
|
||||||
|
|
||||||
|
import app.main as main
|
||||||
|
|
||||||
|
|
||||||
def _req(headers: dict[str, str] | None = None) -> Request:
|
def _req(headers: dict[str, str] | None = None) -> Request:
|
||||||
pairs = []
|
pairs = []
|
||||||
@@ -82,5 +105,48 @@ class AuthAndConcurrencyTests(unittest.IsolatedAsyncioTestCase):
|
|||||||
self.assertEqual(guard.in_flight, 0)
|
self.assertEqual(guard.in_flight, 0)
|
||||||
|
|
||||||
|
|
||||||
|
class DebugRequestRecordingTests(unittest.TestCase):
|
||||||
|
def setUp(self) -> None:
|
||||||
|
main._DEBUG_REQUEST_LOG.clear()
|
||||||
|
|
||||||
|
def test_redacts_sensitive_fields_and_data_urls(self) -> None:
|
||||||
|
body = {
|
||||||
|
"authorization": "Bearer abc",
|
||||||
|
"x-api-key": "secret",
|
||||||
|
"session_bundle": "very-secret",
|
||||||
|
"images": ["data:image/png;base64,ABC"],
|
||||||
|
"tool": {"args": "x" * 3000},
|
||||||
|
}
|
||||||
|
redacted = main._redact_debug_value((), body)
|
||||||
|
|
||||||
|
self.assertEqual(redacted["authorization"], "***")
|
||||||
|
self.assertEqual(redacted["x-api-key"], "***")
|
||||||
|
self.assertEqual(redacted["session_bundle"], "***")
|
||||||
|
self.assertEqual(redacted["images"][0], "[redacted-data-url]")
|
||||||
|
self.assertIn("[truncated]", redacted["tool"]["args"])
|
||||||
|
|
||||||
|
def test_internal_debug_requests_requires_admin_and_returns_items(self) -> None:
|
||||||
|
with patch.object(main.settings, "api_keys", ["k1"]), patch.object(main.settings, "admin_token", "admin-1"):
|
||||||
|
client = TestClient(main.app)
|
||||||
|
req_payload = {
|
||||||
|
"model": "org_auto",
|
||||||
|
"messages": [{"role": "user", "content": "hello"}],
|
||||||
|
}
|
||||||
|
main._record_debug_request("openai", "/v1/chat/completions", req_payload, _req({"x-request-id": "req-1"}))
|
||||||
|
|
||||||
|
denied = client.get("/internal/debug/requests")
|
||||||
|
self.assertEqual(denied.status_code, 401)
|
||||||
|
|
||||||
|
ok = client.get(
|
||||||
|
"/internal/debug/requests?limit=1",
|
||||||
|
headers={"Authorization": "Bearer admin-1"},
|
||||||
|
)
|
||||||
|
self.assertEqual(ok.status_code, 200)
|
||||||
|
data = ok.json()
|
||||||
|
self.assertTrue(data["ok"])
|
||||||
|
self.assertEqual(data["count"], 1)
|
||||||
|
self.assertEqual(data["items"][0]["protocol"], "openai")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import sys
|
|||||||
import types
|
import types
|
||||||
import unittest
|
import unittest
|
||||||
import asyncio
|
import asyncio
|
||||||
|
from types import SimpleNamespace
|
||||||
from unittest.mock import AsyncMock, patch
|
from unittest.mock import AsyncMock, patch
|
||||||
|
|
||||||
|
|
||||||
@@ -1251,7 +1252,7 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
|
|||||||
self.assertIn('"type": "tool_result"', body)
|
self.assertIn('"type": "tool_result"', body)
|
||||||
self.assertIn('"stop_reason": "end_turn"', body)
|
self.assertIn('"stop_reason": "end_turn"', body)
|
||||||
|
|
||||||
async def test_openai_non_stream_forwards_tool_config_when_enabled(self) -> None:
|
async def test_openai_non_stream_uses_emulation_instead_of_forwarding_tool_config(self) -> None:
|
||||||
spy_client = _SpyClient(
|
spy_client = _SpyClient(
|
||||||
stream_events=[], complete_result={"text": "ok", "toolEvents": []}
|
stream_events=[], complete_result={"text": "ok", "toolEvents": []}
|
||||||
)
|
)
|
||||||
@@ -1279,13 +1280,10 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
|
|||||||
await main.v1_chat_completions(req, _make_request("/v1/chat/completions"))
|
await main.v1_chat_completions(req, _make_request("/v1/chat/completions"))
|
||||||
|
|
||||||
self.assertIn("tool_config", spy_client.last_complete_kwargs)
|
self.assertIn("tool_config", spy_client.last_complete_kwargs)
|
||||||
cfg = spy_client.last_complete_kwargs["tool_config"]
|
self.assertIsNone(spy_client.last_complete_kwargs["tool_config"])
|
||||||
self.assertEqual(cfg["provider"], "openai")
|
|
||||||
self.assertEqual(len(cfg["tools"]), 1)
|
|
||||||
self.assertIsInstance(cfg["tool_choice"], dict)
|
|
||||||
self.assertEqual(spy_client.last_complete_args[2], "agent")
|
self.assertEqual(spy_client.last_complete_args[2], "agent")
|
||||||
|
|
||||||
async def test_openai_stream_forwards_tool_config_when_enabled(self) -> None:
|
async def test_openai_stream_uses_emulation_instead_of_forwarding_tool_config(self) -> None:
|
||||||
spy_client = _SpyClient(
|
spy_client = _SpyClient(
|
||||||
stream_events=[{"type": "text", "text": "ok"}], complete_result={}
|
stream_events=[{"type": "text", "text": "ok"}], complete_result={}
|
||||||
)
|
)
|
||||||
@@ -1316,10 +1314,7 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
|
|||||||
await _collect_stream(response)
|
await _collect_stream(response)
|
||||||
|
|
||||||
self.assertIn("tool_config", spy_client.last_stream_kwargs)
|
self.assertIn("tool_config", spy_client.last_stream_kwargs)
|
||||||
cfg = spy_client.last_stream_kwargs["tool_config"]
|
self.assertIsNone(spy_client.last_stream_kwargs["tool_config"])
|
||||||
self.assertEqual(cfg["provider"], "openai")
|
|
||||||
self.assertEqual(len(cfg["tools"]), 1)
|
|
||||||
self.assertIsInstance(cfg["tool_choice"], dict)
|
|
||||||
self.assertEqual(spy_client.last_stream_args[2], "agent")
|
self.assertEqual(spy_client.last_stream_args[2], "agent")
|
||||||
|
|
||||||
async def test_openai_non_stream_does_not_forward_tool_config_when_disabled(
|
async def test_openai_non_stream_does_not_forward_tool_config_when_disabled(
|
||||||
@@ -1355,7 +1350,7 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
|
|||||||
self.assertIsNone(spy_client.last_complete_kwargs["tool_config"])
|
self.assertIsNone(spy_client.last_complete_kwargs["tool_config"])
|
||||||
self.assertEqual(spy_client.last_complete_args[2], "agent")
|
self.assertEqual(spy_client.last_complete_args[2], "agent")
|
||||||
|
|
||||||
async def test_openai_non_stream_filters_tools_by_allowlist(self) -> None:
|
async def test_openai_non_stream_filters_tools_by_allowlist_before_emulation(self) -> None:
|
||||||
spy_client = _SpyClient(
|
spy_client = _SpyClient(
|
||||||
stream_events=[], complete_result={"text": "ok", "toolEvents": []}
|
stream_events=[], complete_result={"text": "ok", "toolEvents": []}
|
||||||
)
|
)
|
||||||
@@ -1386,11 +1381,9 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
|
|||||||
):
|
):
|
||||||
await main.v1_chat_completions(req, _make_request("/v1/chat/completions"))
|
await main.v1_chat_completions(req, _make_request("/v1/chat/completions"))
|
||||||
|
|
||||||
cfg = spy_client.last_complete_kwargs["tool_config"]
|
prompt = spy_client.last_complete_args[0]
|
||||||
self.assertEqual(
|
self.assertIn("lookup(", prompt)
|
||||||
[tool["function"]["name"] for tool in cfg["tools"]], ["lookup"]
|
self.assertNotIn("write_file(", prompt)
|
||||||
)
|
|
||||||
self.assertEqual(cfg["tool_choice"], req.tool_choice)
|
|
||||||
|
|
||||||
async def test_openai_non_stream_rejects_forced_tool_outside_allowlist(
|
async def test_openai_non_stream_rejects_forced_tool_outside_allowlist(
|
||||||
self,
|
self,
|
||||||
@@ -1579,7 +1572,7 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
|
|||||||
self.assertEqual(openai_spy.last_complete_args[2], "chat")
|
self.assertEqual(openai_spy.last_complete_args[2], "chat")
|
||||||
self.assertEqual(anthropic_spy.last_complete_args[2], "chat")
|
self.assertEqual(anthropic_spy.last_complete_args[2], "chat")
|
||||||
|
|
||||||
async def test_anthropic_stream_forwards_tool_config_when_enabled(self) -> None:
|
async def test_anthropic_stream_uses_emulation_instead_of_forwarding_tool_config(self) -> None:
|
||||||
spy_client = _SpyClient(
|
spy_client = _SpyClient(
|
||||||
stream_events=[{"type": "text", "text": "ok"}], complete_result={}
|
stream_events=[{"type": "text", "text": "ok"}], complete_result={}
|
||||||
)
|
)
|
||||||
@@ -1619,9 +1612,7 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
|
|||||||
await _collect_stream(response)
|
await _collect_stream(response)
|
||||||
|
|
||||||
self.assertIn("tool_config", spy_client.last_stream_kwargs)
|
self.assertIn("tool_config", spy_client.last_stream_kwargs)
|
||||||
cfg = spy_client.last_stream_kwargs["tool_config"]
|
self.assertIsNone(spy_client.last_stream_kwargs["tool_config"])
|
||||||
self.assertEqual(cfg["provider"], "anthropic")
|
|
||||||
self.assertEqual(len(cfg["tools"]), 1)
|
|
||||||
self.assertEqual(spy_client.last_stream_args[2], "agent")
|
self.assertEqual(spy_client.last_stream_args[2], "agent")
|
||||||
|
|
||||||
async def test_anthropic_non_stream_does_not_forward_tool_config_when_disabled(
|
async def test_anthropic_non_stream_does_not_forward_tool_config_when_disabled(
|
||||||
@@ -1710,12 +1701,10 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
|
|||||||
)
|
)
|
||||||
|
|
||||||
self.assertIn("tool_config", spy_client.last_complete_kwargs)
|
self.assertIn("tool_config", spy_client.last_complete_kwargs)
|
||||||
cfg = spy_client.last_complete_kwargs["tool_config"]
|
self.assertIsNone(spy_client.last_complete_kwargs["tool_config"])
|
||||||
self.assertEqual(cfg["provider"], "anthropic")
|
|
||||||
self.assertEqual(len(cfg["tools"]), 1)
|
|
||||||
self.assertEqual(spy_client.last_complete_args[2], "agent")
|
self.assertEqual(spy_client.last_complete_args[2], "agent")
|
||||||
|
|
||||||
async def test_anthropic_non_stream_filters_tools_by_allowlist(self) -> None:
|
async def test_anthropic_non_stream_filters_tools_by_allowlist_before_emulation(self) -> None:
|
||||||
spy_client = _SpyClient(
|
spy_client = _SpyClient(
|
||||||
stream_events=[], complete_result={"text": "ok", "toolEvents": []}
|
stream_events=[], complete_result={"text": "ok", "toolEvents": []}
|
||||||
)
|
)
|
||||||
@@ -1760,9 +1749,9 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
|
|||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
cfg = spy_client.last_complete_kwargs["tool_config"]
|
prompt = spy_client.last_complete_args[0]
|
||||||
self.assertEqual([tool["name"] for tool in cfg["tools"]], ["lookup"])
|
self.assertIn("lookup(", prompt)
|
||||||
self.assertEqual(cfg["tool_choice"], req.tool_choice)
|
self.assertNotIn("write_file(", prompt)
|
||||||
|
|
||||||
async def test_anthropic_non_stream_rejects_forced_tool_outside_allowlist(
|
async def test_anthropic_non_stream_rejects_forced_tool_outside_allowlist(
|
||||||
self,
|
self,
|
||||||
@@ -2183,6 +2172,201 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
|
|||||||
self.assertIn('{"temperature":"22C"}', prompt)
|
self.assertIn('{"temperature":"22C"}', prompt)
|
||||||
self.assertIn("Assistant:", prompt)
|
self.assertIn("Assistant:", prompt)
|
||||||
|
|
||||||
|
async def test_openai_assistant_tool_calls_are_projected_into_emulation_prompt(self) -> None:
|
||||||
|
spy_client = _SpyClient(
|
||||||
|
stream_events=[],
|
||||||
|
complete_result={
|
||||||
|
"text": "done",
|
||||||
|
"toolEvents": [],
|
||||||
|
"sessionId": "sess-emulated-tool-history",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
req = ChatCompletionsRequest(
|
||||||
|
model="org_auto",
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "assistant",
|
||||||
|
"content": "I will check that",
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"id": "call_1",
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "fetch_weather",
|
||||||
|
"arguments": '{"city":"Hangzhou"}',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{"role": "user", "content": "continue"},
|
||||||
|
],
|
||||||
|
stream=False,
|
||||||
|
tools=[
|
||||||
|
{
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "fetch_weather",
|
||||||
|
"parameters": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {"city": {"type": "string"}},
|
||||||
|
"required": ["city"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch.object(main, "pool", _FakePool(_FakeInstance(spy_client))),
|
||||||
|
patch.object(main, "chat_guard", _FakeGuard()),
|
||||||
|
patch.object(
|
||||||
|
main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"})
|
||||||
|
),
|
||||||
|
patch.object(
|
||||||
|
main.stats_collector, "record_chat", AsyncMock(return_value=None)
|
||||||
|
),
|
||||||
|
):
|
||||||
|
await main.v1_chat_completions(req, _make_request("/v1/chat/completions"))
|
||||||
|
|
||||||
|
prompt = spy_client.last_complete_args[0]
|
||||||
|
self.assertIn("I will check that", prompt)
|
||||||
|
self.assertIn('"tool": "fetch_weather"', prompt)
|
||||||
|
self.assertIn('"city": "Hangzhou"', prompt)
|
||||||
|
|
||||||
|
async def test_openai_emulation_prompt_includes_proxy_tool_guidance(self) -> None:
|
||||||
|
spy_client = _SpyClient(
|
||||||
|
stream_events=[],
|
||||||
|
complete_result={"text": "done", "toolEvents": [], "sessionId": "sess-guidance"},
|
||||||
|
)
|
||||||
|
req = ChatCompletionsRequest(
|
||||||
|
model="org_auto",
|
||||||
|
messages=[{"role": "user", "content": "inspect README"}],
|
||||||
|
stream=False,
|
||||||
|
tools=[
|
||||||
|
{
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "read_file",
|
||||||
|
"description": "Read a file",
|
||||||
|
"parameters": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {"path": {"type": "string"}},
|
||||||
|
"required": ["path"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "bash",
|
||||||
|
"parameters": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {"command": {"type": "string"}},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch.object(main, "pool", _FakePool(_FakeInstance(spy_client))),
|
||||||
|
patch.object(main, "chat_guard", _FakeGuard()),
|
||||||
|
patch.object(
|
||||||
|
main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"})
|
||||||
|
),
|
||||||
|
patch.object(
|
||||||
|
main.stats_collector, "record_chat", AsyncMock(return_value=None)
|
||||||
|
),
|
||||||
|
):
|
||||||
|
await main.v1_chat_completions(req, _make_request("/v1/chat/completions"))
|
||||||
|
|
||||||
|
prompt = spy_client.last_complete_args[0]
|
||||||
|
self.assertIn("DIRECT tool access inside an IDE", prompt)
|
||||||
|
self.assertIn("Tool routing guide:", prompt)
|
||||||
|
self.assertIn("Read a specific local file or code path: use read_file.", prompt)
|
||||||
|
self.assertIn("Core tool syntax examples", prompt)
|
||||||
|
self.assertIn("Coding and file-work discipline:", prompt)
|
||||||
|
self.assertIn("NEVER say that tools are unavailable", prompt)
|
||||||
|
|
||||||
|
async def test_anthropic_tool_history_is_projected_into_emulation_prompt(self) -> None:
|
||||||
|
spy_client = _SpyClient(
|
||||||
|
stream_events=[],
|
||||||
|
complete_result={
|
||||||
|
"text": "done",
|
||||||
|
"toolEvents": [],
|
||||||
|
"sessionId": "sess-anthropic-history",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
req = AnthropicMessagesRequest(
|
||||||
|
model="claude-3-5-sonnet-20241022",
|
||||||
|
max_tokens=128,
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "assistant",
|
||||||
|
"content": [
|
||||||
|
{"type": "text", "text": "I will check"},
|
||||||
|
{
|
||||||
|
"type": "tool_use",
|
||||||
|
"id": "toolu_1",
|
||||||
|
"name": "fetch_weather",
|
||||||
|
"input": {"city": "Hangzhou"},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "tool_result",
|
||||||
|
"tool_use_id": "toolu_1",
|
||||||
|
"content": '{"temperature":"22C"}',
|
||||||
|
}
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{"role": "user", "content": "continue"},
|
||||||
|
],
|
||||||
|
stream=False,
|
||||||
|
tools=[
|
||||||
|
{
|
||||||
|
"name": "fetch_weather",
|
||||||
|
"input_schema": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {"city": {"type": "string"}},
|
||||||
|
"required": ["city"],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch.object(main, "pool", _FakePool(_FakeInstance(spy_client))),
|
||||||
|
patch.object(main, "chat_guard", _FakeGuard()),
|
||||||
|
patch.object(
|
||||||
|
main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"})
|
||||||
|
),
|
||||||
|
patch.object(
|
||||||
|
main.stats_collector, "record_chat", AsyncMock(return_value=None)
|
||||||
|
),
|
||||||
|
patch.object(main.settings, "api_keys", ["test-key"]),
|
||||||
|
):
|
||||||
|
await main.v1_messages(
|
||||||
|
req,
|
||||||
|
_make_request(
|
||||||
|
"/v1/messages",
|
||||||
|
headers={
|
||||||
|
"x-api-key": "test-key",
|
||||||
|
"anthropic-version": "2023-06-01",
|
||||||
|
},
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
prompt = spy_client.last_complete_args[0]
|
||||||
|
self.assertIn("I will check", prompt)
|
||||||
|
self.assertIn('"tool": "fetch_weather"', prompt)
|
||||||
|
self.assertIn('"city": "Hangzhou"', prompt)
|
||||||
|
self.assertIn("Tool result:", prompt)
|
||||||
|
self.assertIn('{"temperature":"22C"}', prompt)
|
||||||
|
|
||||||
async def test_anthropic_non_stream_synthesizes_tool_use_from_json_action_block(
|
async def test_anthropic_non_stream_synthesizes_tool_use_from_json_action_block(
|
||||||
self,
|
self,
|
||||||
) -> None:
|
) -> None:
|
||||||
@@ -2434,6 +2618,177 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
|
|||||||
self.assertEqual(detail["error"]["message"], "invalid upstream response")
|
self.assertEqual(detail["error"]["message"], "invalid upstream response")
|
||||||
|
|
||||||
|
|
||||||
|
class CapabilitiesEndpointTests(unittest.IsolatedAsyncioTestCase):
|
||||||
|
async def test_capabilities_payload_shape(self) -> None:
|
||||||
|
with (
|
||||||
|
patch.object(main.settings, "tool_forward_enabled", True),
|
||||||
|
patch.object(main.settings, "tool_allowlist", ["lookup"]),
|
||||||
|
patch.object(main.settings, "session_reuse_enabled", True),
|
||||||
|
patch.object(main.settings, "session_cache_max_entries", 123),
|
||||||
|
patch.object(main.settings, "session_cache_ttl_sec", 45.0),
|
||||||
|
patch.object(main.settings, "instance_count", 2),
|
||||||
|
patch.object(main.settings, "default_model", "org_auto"),
|
||||||
|
patch.object(main.settings, "default_ask_mode", "chat"),
|
||||||
|
patch.object(main.settings, "api_keys", ["test-key"]),
|
||||||
|
patch.object(main.settings, "admin_token", "adm"),
|
||||||
|
patch.object(main.settings, "metrics_public", False),
|
||||||
|
):
|
||||||
|
response = await main.capabilities()
|
||||||
|
|
||||||
|
self.assertEqual(response.status_code, 200)
|
||||||
|
payload = json.loads(response.body)
|
||||||
|
self.assertEqual(payload["service"], "lingma-openai-gateway")
|
||||||
|
self.assertIn("protocols", payload)
|
||||||
|
self.assertIn("features", payload)
|
||||||
|
self.assertTrue(payload["protocols"]["openai"]["chat_completions"])
|
||||||
|
self.assertTrue(payload["protocols"]["anthropic"]["messages"])
|
||||||
|
self.assertTrue(payload["protocols"]["openai"]["request_tools_forwarded"])
|
||||||
|
self.assertEqual(payload["features"]["tooling"]["allowlist"], ["lookup"])
|
||||||
|
self.assertEqual(payload["features"]["pool"]["configured_instance_count"], 2)
|
||||||
|
self.assertTrue(payload["features"]["auth"]["v1_requires_auth"])
|
||||||
|
|
||||||
|
async def test_v1_capabilities_auth_guard_requires_authentication(self) -> None:
|
||||||
|
with patch.object(main.settings, "api_keys", ["test-key"]):
|
||||||
|
with self.assertRaises(main.AnthropicAuthError) as ctx:
|
||||||
|
main.anthropic_auth_guard(
|
||||||
|
_make_request(
|
||||||
|
"/v1/capabilities",
|
||||||
|
headers={"anthropic-version": "2023-06-01"},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertEqual(ctx.exception.status_code, 401)
|
||||||
|
|
||||||
|
async def test_v1_capabilities_returns_payload_with_auth(self) -> None:
|
||||||
|
with (
|
||||||
|
patch.object(main.settings, "api_keys", ["test-key"]),
|
||||||
|
patch.object(main.settings, "tool_forward_enabled", False),
|
||||||
|
):
|
||||||
|
main.anthropic_auth_guard(
|
||||||
|
_make_request(
|
||||||
|
"/v1/capabilities",
|
||||||
|
headers={"x-api-key": "test-key", "anthropic-version": "2023-06-01"},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
response = await main.v1_capabilities()
|
||||||
|
|
||||||
|
self.assertEqual(response.status_code, 200)
|
||||||
|
payload = json.loads(response.body)
|
||||||
|
self.assertFalse(payload["protocols"]["openai"]["request_tools_forwarded"])
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class AdminIntrospectionEndpointTests(unittest.IsolatedAsyncioTestCase):
|
||||||
|
async def test_internal_effective_config_requires_admin_token(self) -> None:
|
||||||
|
with (
|
||||||
|
patch.object(main.settings, "api_keys", ["api-key"]),
|
||||||
|
patch.object(main.settings, "admin_token", "admin-secret"),
|
||||||
|
):
|
||||||
|
with self.assertRaises(main.HTTPException) as ctx:
|
||||||
|
main.admin_auth_guard(
|
||||||
|
_make_request(
|
||||||
|
"/internal/effective-config",
|
||||||
|
headers={"authorization": "Bearer wrong-token"},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertEqual(ctx.exception.status_code, 401)
|
||||||
|
|
||||||
|
async def test_internal_effective_config_redacts_secrets(self) -> None:
|
||||||
|
with (
|
||||||
|
patch.object(main.settings, "api_keys", ["api-key-1", "api-key-2"]),
|
||||||
|
patch.object(main.settings, "admin_token", "admin-secret"),
|
||||||
|
patch.object(main.settings, "metrics_token", "metrics-secret"),
|
||||||
|
patch.object(main.settings, "default_model", "org_auto"),
|
||||||
|
patch.object(main.settings, "tool_forward_enabled", True),
|
||||||
|
patch.object(main.settings, "session_reuse_enabled", True),
|
||||||
|
patch.object(main.settings, "metrics_public", False),
|
||||||
|
patch.object(main.settings, "auto_login_enabled", True),
|
||||||
|
patch.object(
|
||||||
|
main.settings,
|
||||||
|
"accounts",
|
||||||
|
[
|
||||||
|
SimpleNamespace(
|
||||||
|
username="user-a",
|
||||||
|
password="pass-a",
|
||||||
|
session_bundle_b64="bundle-a",
|
||||||
|
session_bundle_file="/secrets/bundle-a.txt",
|
||||||
|
)
|
||||||
|
],
|
||||||
|
),
|
||||||
|
):
|
||||||
|
main.admin_auth_guard(
|
||||||
|
_make_request(
|
||||||
|
"/internal/effective-config",
|
||||||
|
headers={"authorization": "Bearer admin-secret"},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
response = await main.internal_effective_config()
|
||||||
|
|
||||||
|
self.assertEqual(response.status_code, 200)
|
||||||
|
payload = json.loads(response.body)
|
||||||
|
settings_payload = payload["settings"]
|
||||||
|
self.assertEqual(settings_payload["api_keys"], ["***", "***"])
|
||||||
|
self.assertEqual(settings_payload["admin_token"], "***")
|
||||||
|
self.assertEqual(settings_payload["metrics_token"], "***")
|
||||||
|
self.assertEqual(settings_payload["accounts"][0]["password"], "***")
|
||||||
|
self.assertEqual(settings_payload["accounts"][0]["session_bundle_b64"], "***")
|
||||||
|
self.assertEqual(settings_payload["accounts"][0]["username"], "user-a")
|
||||||
|
self.assertEqual(
|
||||||
|
settings_payload["accounts"][0]["session_bundle_file"],
|
||||||
|
"/secrets/bundle-a.txt",
|
||||||
|
)
|
||||||
|
self.assertTrue(payload["feature_flags"]["tool_forward_enabled"])
|
||||||
|
self.assertTrue(payload["feature_flags"]["session_reuse_enabled"])
|
||||||
|
|
||||||
|
async def test_internal_debug_requests_redacts_sensitive_fields(self) -> None:
|
||||||
|
main._DEBUG_REQUEST_LOG.clear()
|
||||||
|
main._record_debug_request(
|
||||||
|
"openai",
|
||||||
|
"/v1/chat/completions",
|
||||||
|
{
|
||||||
|
"api_key": "secret-key",
|
||||||
|
"session_bundle": "bundle-value",
|
||||||
|
"image_url": "data:image/png;base64,abcd",
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"function": {
|
||||||
|
"arguments": "x" * 3001,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
},
|
||||||
|
_make_request("/v1/chat/completions", headers={"x-request-id": "req-123"}),
|
||||||
|
)
|
||||||
|
|
||||||
|
response = await main.internal_debug_requests(limit=10)
|
||||||
|
|
||||||
|
self.assertEqual(response.status_code, 200)
|
||||||
|
payload = json.loads(response.body)
|
||||||
|
self.assertEqual(payload["count"], 1)
|
||||||
|
item = payload["items"][0]
|
||||||
|
self.assertEqual(item["request_id"], "req-123")
|
||||||
|
self.assertEqual(item["body"]["api_key"], "***")
|
||||||
|
self.assertEqual(item["body"]["session_bundle"], "***")
|
||||||
|
self.assertEqual(item["body"]["image_url"], "[redacted-data-url]")
|
||||||
|
self.assertTrue(item["body"]["tool_calls"][0]["function"]["arguments"].endswith("... [truncated]"))
|
||||||
|
|
||||||
|
async def test_internal_debug_requests_requires_admin_token(self) -> None:
|
||||||
|
with (
|
||||||
|
patch.object(main.settings, "api_keys", ["api-key"]),
|
||||||
|
patch.object(main.settings, "admin_token", "admin-secret"),
|
||||||
|
):
|
||||||
|
with self.assertRaises(main.HTTPException) as ctx:
|
||||||
|
main.admin_auth_guard(
|
||||||
|
_make_request(
|
||||||
|
"/internal/debug/requests",
|
||||||
|
headers={"authorization": "Bearer wrong-token"},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertEqual(ctx.exception.status_code, 401)
|
||||||
|
|
||||||
|
|
||||||
class SessionCacheToolFingerprintTests(unittest.TestCase):
|
class SessionCacheToolFingerprintTests(unittest.TestCase):
|
||||||
def test_build_key_changes_with_tool_config(self) -> None:
|
def test_build_key_changes_with_tool_config(self) -> None:
|
||||||
from app.session_cache import SessionCache
|
from app.session_cache import SessionCache
|
||||||
|
|||||||
Reference in New Issue
Block a user