feat: add emulated tool-calling bridge for Lingma

Add a proxy-side tool emulation layer so Lingma requests can surface stable OpenAI tool_calls and Anthropic tool_use blocks even when upstream tool events are missing or inconsistent.

Constraint: Keep native Lingma tool event bridging as the first path and layer emulation as a fallback

Rejected: Depend exclusively on Lingma native tool/invoke events | tool visibility remains inconsistent across models and transports

Confidence: high

Scope-risk: moderate
This commit is contained in:
mmc
2026-05-07 18:10:01 +08:00
parent 5911e4322e
commit 94a8025ae5
11 changed files with 1808 additions and 4 deletions

View File

@@ -6,6 +6,7 @@
- Anthropic`/v1/messages``/v1/messages/count_tokens`(含 stream - Anthropic`/v1/messages``/v1/messages/count_tokens`(含 stream
- 内置多实例池、会话复用、Prometheus 指标、登录态 bundle 注入 - 内置多实例池、会话复用、Prometheus 指标、登录态 bundle 注入
- 工具事件桥接Lingma 上游返回 `tool` 事件时,网关会输出为 OpenAI `tool_calls`stream/non-stream和 Anthropic `tool_use` / `tool_result`stream/non-stream请求侧 `tools` / `tool_choice` 仅在 `TOOL_FORWARD_ENABLED=true` 时透传(默认开启,可显式关闭) - 工具事件桥接Lingma 上游返回 `tool` 事件时,网关会输出为 OpenAI `tool_calls`stream/non-stream和 Anthropic `tool_use` / `tool_result`stream/non-stream请求侧 `tools` / `tool_choice` 仅在 `TOOL_FORWARD_ENABLED=true` 时透传(默认开启,可显式关闭)
- 工具模拟回退:当 Lingma 未稳定外显原生 `tool/*` 事件时,网关会把注入后的 `json action` / `#Tool Call` 等动作文本归一化为 OpenAI `tool_calls`,并支持 tool result continuation
- 多模态降级OpenAI `image_url` / `input_image``[image]``input_audio``[audio]`Anthropic `image``[image]` - 多模态降级OpenAI `image_url` / `input_image``[image]``input_audio``[audio]`Anthropic `image``[image]`
> 架构设计与二开细节请看 [`DESIGN.md`](./DESIGN.md)。 > 架构设计与二开细节请看 [`DESIGN.md`](./DESIGN.md)。
@@ -85,6 +86,9 @@ python3 -m unittest tests/test_tool_call_bridge.py
# 全量 unittest # 全量 unittest
python3 -m unittest discover -s tests -p "test_*.py" python3 -m unittest discover -s tests -p "test_*.py"
# Docker 端到端工具调用冒烟
bash scripts/smoke_tool_calls.sh
``` ```
--- ---
@@ -201,6 +205,7 @@ curl -s "http://127.0.0.1:${PORT}/healthz"
| `healthz` 正常但请求失败 | 用错端口 | 以 `.env``PORT` 为准,`docker compose ps` 再确认 | | `healthz` 正常但请求失败 | 用错端口 | 以 `.env``PORT` 为准,`docker compose ps` 再确认 |
| `git pull` 提示 not on a branch | 处于 detached HEAD | 执行 `git checkout -B main origin/main` | | `git pull` 提示 not on a branch | 处于 detached HEAD | 执行 `git checkout -B main origin/main` |
| 自动登录不稳定 | 浏览器流程波动 | 优先使用 `LINGMA_SESSION_BUNDLE(_FILE)` | | 自动登录不稳定 | 浏览器流程波动 | 优先使用 `LINGMA_SESSION_BUNDLE(_FILE)` |
| 日志出现 `extension main js path not found` / `ExtensionApi executor not inited` | Lingma 扩展运行时未完整提取MCP/工具执行器未初始化 | 重启容器触发 bootstrap 自愈;确认 `data/bin/<version>/extension/main.js` 已存在 |
| 工具调用未触发 | 模型未选择工具或当前协议路径不支持合成回退 | OpenAI 可配合 `tool_choice` 强制并约束输出 JSONAnthropic 当前仅 non-stream 支持合成 `tool_use` / `tool_result` 回退 | | 工具调用未触发 | 模型未选择工具或当前协议路径不支持合成回退 | OpenAI 可配合 `tool_choice` 强制并约束输出 JSONAnthropic 当前仅 non-stream 支持合成 `tool_use` / `tool_result` 回退 |
--- ---

View File

@@ -3,6 +3,7 @@ from __future__ import annotations
import io import io
import json import json
import os import os
import shutil
import time import time
import urllib.request import urllib.request
import zipfile import zipfile
@@ -70,6 +71,15 @@ def _extract_release_tree(
dst.write(src.read()) dst.write(src.read())
def _release_dir_for_binary(lingma_bin: Path, release_root: str | None) -> Path:
return lingma_bin.parent / ((release_root or "").strip() or "2.5.20")
def _release_has_required_assets(release_dir: Path) -> bool:
extension_main = release_dir / "extension" / "main.js"
return extension_main.exists() and extension_main.is_file()
def _query_marketplace_latest_vsix( def _query_marketplace_latest_vsix(
publisher: str, extension: str publisher: str, extension: str
) -> tuple[str, str, dict]: ) -> tuple[str, str, dict]:
@@ -168,8 +178,19 @@ def bootstrap_from_vsix() -> None:
) )
resolved_url = vsix_url resolved_url = vsix_url
current_release_dir = _release_dir_for_binary(
lingma_bin, old_marker.get("release_root") if isinstance(old_marker, dict) else None
)
release_ready = _release_has_required_assets(current_release_dir)
if lingma_bin.exists() and not release_ready:
print(
"[bootstrap] existing Lingma binary found but extension assets are incomplete; "
f"refreshing install under {current_release_dir}"
)
if ( if (
lingma_bin.exists() lingma_bin.exists()
and release_ready
and not force_refresh and not force_refresh
and ( and (
(not always_refresh) (not always_refresh)
@@ -215,7 +236,8 @@ def bootstrap_from_vsix() -> None:
lingma_bytes = inner_zip.read(lingma_member) lingma_bytes = inner_zip.read(lingma_member)
release_root = _infer_release_root(lingma_member) release_root = _infer_release_root(lingma_member)
lingma_bin.parent.mkdir(parents=True, exist_ok=True) lingma_bin.parent.mkdir(parents=True, exist_ok=True)
release_dir = lingma_bin.parent / (release_root or "2.5.20") release_dir = _release_dir_for_binary(lingma_bin, release_root)
shutil.rmtree(release_dir, ignore_errors=True)
_extract_release_tree(inner_zip, release_root, release_dir) _extract_release_tree(inner_zip, release_root, release_dir)
lingma_bin.write_bytes(lingma_bytes) lingma_bin.write_bytes(lingma_bytes)
@@ -224,7 +246,9 @@ def bootstrap_from_vsix() -> None:
if extension_main.exists(): if extension_main.exists():
print(f"[bootstrap] extension ready: {extension_main}") print(f"[bootstrap] extension ready: {extension_main}")
else: else:
print(f"[bootstrap] extension missing under: {release_dir}") raise RuntimeError(
f"extension assets missing after extraction under: {release_dir}"
)
marker = { marker = {
"source": source_type, "source": source_type,

View File

@@ -196,6 +196,33 @@ def _extract_tool_calls_from_text(text: str) -> list[dict[str, Any]] | None:
return None return None
def _extract_hash_tool_call_event_from_text(
text: str,
*,
forced_tool_name: str | None = None,
) -> dict[str, Any] | None:
raw = (text or "").strip()
if not raw:
return None
match = re.search(
r"#Tool Call\s*```([A-Za-z0-9_\-.]+)\s*(\{.*?\})\s*```",
raw,
flags=re.S,
)
if not match:
return None
name = match.group(1).strip()
if forced_tool_name and name != forced_tool_name:
return None
try:
arguments = json.loads(match.group(2))
except Exception:
return None
if not isinstance(arguments, dict):
return None
return {"name": name, "input": arguments}
def _tool_code_single_arg_name( def _tool_code_single_arg_name(
tools: list[dict[str, Any]] | None, forced_tool_name: str tools: list[dict[str, Any]] | None, forced_tool_name: str
) -> str | None: ) -> str | None:
@@ -342,6 +369,45 @@ def _forced_tool_fallback_event(
) )
def _declared_tool_names(tools: list[dict[str, Any]] | None) -> list[str]:
if not isinstance(tools, list):
return []
out: list[str] = []
for tool in tools:
name = _openai_tool_name(tool) or _anthropic_tool_name(tool)
if name and name not in out:
out.append(name)
return out
def _infer_tool_event_from_declared_tools(
text: str,
*,
tools: list[dict[str, Any]] | None,
) -> dict[str, Any] | None:
for tool_name in _declared_tool_names(tools):
inferred = _extract_function_call_event_from_text(
text,
forced_tool_name=tool_name,
)
if inferred is not None:
return inferred
inferred = _extract_hash_tool_call_event_from_text(
text,
forced_tool_name=tool_name,
)
if inferred is not None:
return inferred
inferred = _forced_tool_fallback_event(
text,
forced_tool_name=tool_name,
tools=tools,
)
if inferred is not None:
return inferred
return None
def _openai_tool_call( def _openai_tool_call(
tool: dict[str, Any], *, forced_id: str | None = None tool: dict[str, Any], *, forced_id: str | None = None
) -> dict[str, Any]: ) -> dict[str, Any]:

657
app/http/tool_emulation.py Normal file
View File

@@ -0,0 +1,657 @@
from __future__ import annotations
import json
import re
import uuid
from dataclasses import dataclass
from typing import Any
@dataclass
class EmulatedToolDef:
name: str
description: str
input_schema: dict[str, Any]
@dataclass
class EmulatedToolChoice:
mode: str
name: str = ""
@dataclass
class EmulatedToolCall:
id: str
name: str
arguments: dict[str, Any]
def extract_openai_tools(raw: Any) -> list[EmulatedToolDef]:
if not isinstance(raw, list):
return []
out: list[EmulatedToolDef] = []
for item in raw:
if not isinstance(item, dict):
continue
fn = item.get("function")
if not isinstance(fn, dict):
continue
name = str(fn.get("name") or "").strip()
if not name:
continue
schema = fn.get("parameters") if isinstance(fn.get("parameters"), dict) else {}
out.append(
EmulatedToolDef(
name=name,
description=str(fn.get("description") or "").strip(),
input_schema=dict(schema),
)
)
return out
def extract_anthropic_tools(raw: Any) -> list[EmulatedToolDef]:
if not isinstance(raw, list):
return []
out: list[EmulatedToolDef] = []
for item in raw:
if not isinstance(item, dict):
continue
tool_type = str(item.get("type") or "").strip()
if tool_type.startswith("web_search_"):
continue
name = str(item.get("name") or "").strip()
if not name:
continue
schema = item.get("input_schema") if isinstance(item.get("input_schema"), dict) else {}
out.append(
EmulatedToolDef(
name=name,
description=str(item.get("description") or "").strip(),
input_schema=dict(schema),
)
)
return out
def extract_openai_tool_choice(raw: Any) -> EmulatedToolChoice:
if raw is None:
return EmulatedToolChoice(mode="auto")
if isinstance(raw, str):
value = raw.strip()
if value in {"", "auto"}:
return EmulatedToolChoice(mode="auto")
if value == "none":
return EmulatedToolChoice(mode="none")
if value in {"required", "any"}:
return EmulatedToolChoice(mode="any")
return EmulatedToolChoice(mode="tool", name=value)
if not isinstance(raw, dict):
return EmulatedToolChoice(mode="auto")
type_name = str(raw.get("type") or "").strip()
if type_name in {"required", "any"}:
return EmulatedToolChoice(mode="any")
if type_name in {"none"}:
return EmulatedToolChoice(mode="none")
if type_name in {"function", "tool"}:
fn = raw.get("function")
if isinstance(fn, dict):
name = str(fn.get("name") or "").strip()
if name:
return EmulatedToolChoice(mode="tool", name=name)
name = str(raw.get("name") or "").strip()
if name:
return EmulatedToolChoice(mode="tool", name=name)
return EmulatedToolChoice(mode="auto")
def extract_anthropic_tool_choice(raw: Any) -> EmulatedToolChoice:
if raw is None:
return EmulatedToolChoice(mode="auto")
if not isinstance(raw, dict):
return extract_openai_tool_choice(raw)
type_name = str(raw.get("type") or "").strip()
if type_name in {"", "auto"}:
return EmulatedToolChoice(mode="auto")
if type_name == "none":
return EmulatedToolChoice(mode="none")
if type_name in {"any", "required"}:
return EmulatedToolChoice(mode="any")
if type_name == "tool":
name = str(raw.get("name") or "").strip()
if name:
return EmulatedToolChoice(mode="tool", name=name)
return EmulatedToolChoice(mode="auto")
def has_tool_request(tools: list[EmulatedToolDef], choice: EmulatedToolChoice) -> bool:
return bool(tools) or choice.mode not in {"", "auto"}
def inject_tooling(system: str, tools: list[EmulatedToolDef], choice: EmulatedToolChoice) -> str:
system = system.strip()
if not tools:
return system
tool_lines: list[str] = []
for tool in tools:
signature = _compact_schema(tool.input_schema)
line = f"{tool.name}({signature})"
if tool.description:
line += f" - {_truncate(tool.description, 120)}"
tool_lines.append(line)
parts = [
"You are an AI assistant with DIRECT tool access.",
"When a request needs local files, terminal state, browser state, current web data, or another external result, use the proxy tools listed below.",
"Do not claim tools are unavailable.",
"When you need to use a tool, output exactly one or more structured action blocks in this format:",
'```json action\n{"tool":"NAME","parameters":{"key":"value"}}\n```',
"Available tools:",
"\n".join(tool_lines),
"Rules:",
"- Use ```json action``` blocks for tool calls.",
"- If a tool is needed, do not explain first; emit the action block directly.",
"- If no tool is needed, answer normally.",
"- Never say tools are unavailable.",
_force_constraint(choice),
]
tooling = "\n\n".join(part for part in parts if part)
if not system:
return tooling
return f"{system}\n\n---\n\n{tooling}"
def action_output_prompt(tool_call_id: str | None, output: str) -> str:
output = (output or "").strip()
if not output:
return ""
suffix = (
"Based on the tool result above, answer the user's request directly if you have enough information. "
"Only use another tool call if a specific missing fact still requires it."
)
if tool_call_id and tool_call_id.strip():
return f"Tool result for {tool_call_id.strip()}:\n{output}\n\n{suffix}"
return f"Tool result:\n{output}\n\n{suffix}"
def parse_action_blocks(
text: str,
tools: list[EmulatedToolDef],
*,
max_scan_bytes: int = 0,
max_tool_calls: int = 8,
) -> tuple[list[EmulatedToolCall], str]:
if not text or not text.strip():
return [], ""
if max_scan_bytes > 0 and len(text) > max_scan_bytes:
text = text[:max_scan_bytes]
tool_name_map = {tool.name.lower(): tool.name for tool in tools if tool.name.strip()}
tool_schema_map = {tool.name: tool.input_schema for tool in tools if tool.name.strip()}
calls: list[EmulatedToolCall] = []
spans: list[tuple[int, int]] = []
seen: set[str] = set()
for match in re.finditer(r"```json(?:\s+action)?\s*(.*?)```", text, flags=re.S | re.I):
raw = (match.group(1) or "").strip()
if not raw:
continue
parsed = _parse_tool_call_json(raw)
if parsed is None:
continue
name, arguments = parsed
normalized = _normalize_tool_name(name, tool_name_map)
schema = tool_schema_map.get(normalized)
if schema:
arguments = _filter_args_by_schema(arguments, schema)
if not _has_required_args(arguments, schema):
continue
key = _tool_call_key(normalized, arguments)
if key in seen:
spans.append(match.span())
continue
seen.add(key)
calls.append(
EmulatedToolCall(
id=_stable_call_id(normalized, arguments),
name=normalized,
arguments=arguments,
)
)
spans.append(match.span())
if len(calls) >= max_tool_calls:
break
if not calls:
return [], text.strip()
clean = text
for start, end in reversed(spans):
clean = clean[:start] + clean[end:]
return calls, clean.strip()
def looks_like_refusal(text: str) -> bool:
lowered = (text or "").strip().lower()
if not lowered:
return False
needles = [
"tools are unavailable",
"cannot call tools",
"can't call tools",
"cannot execute",
"can't execute",
"没有可用的工具",
"工具不可用",
"不能调用工具",
"无法直接执行",
]
return any(needle in lowered for needle in needles)
def looks_like_missed_tool_use(text: str) -> bool:
lowered = (text or "").strip().lower()
if not lowered:
return False
needles = [
"let me use",
"i need to use",
"i will use",
"i need to run",
"i will run",
"我需要使用",
"让我使用",
"执行命令",
"读取文件",
"查看文件",
"查询天气",
"#tool call",
]
return any(needle in lowered for needle in needles)
def infer_tool_calls_from_text(
text: str,
tools: list[EmulatedToolDef],
) -> list[EmulatedToolCall]:
if not (looks_like_refusal(text) or looks_like_missed_tool_use(text)):
return []
direct = infer_declared_tool_call_from_text(text, tools)
return [direct] if direct is not None else []
def force_tooling_prompt(choice: EmulatedToolChoice) -> str:
prompt = (
"Your last response did not include any ```json action``` block. "
"You must respond with at least one valid action block now. "
"Select the single most appropriate available tool for the user request. "
"Do not explain. Do not say tools are unavailable. Output the action block directly."
)
if choice.mode == "tool" and choice.name.strip():
prompt += f' You must call "{choice.name.strip()}".'
return prompt
def infer_declared_tool_call_from_text(
text: str,
tools: list[EmulatedToolDef],
) -> EmulatedToolCall | None:
for tool in tools:
event = _extract_fenced_json_tool_call_event_from_text(
text, forced_tool_name=tool.name
)
if event is None:
event = _extract_hash_tool_call_event_from_text(text, forced_tool_name=tool.name)
if event is None:
event = _extract_function_call_event_from_text(text, forced_tool_name=tool.name)
if event is None:
event = _forced_tool_fallback_event(text, forced_tool_name=tool.name, tools=tools)
if event is None:
continue
schema = tool.input_schema
arguments = dict(event.get("input") or {})
if schema:
arguments = _filter_args_by_schema(arguments, schema)
if not _has_required_args(arguments, schema):
continue
return EmulatedToolCall(
id=_stable_call_id(tool.name, arguments),
name=tool.name,
arguments=arguments,
)
return None
def openai_tool_call_from_emulated(call: EmulatedToolCall) -> dict[str, Any]:
return {
"id": call.id,
"type": "function",
"function": {
"name": call.name,
"arguments": json.dumps(call.arguments, ensure_ascii=False),
},
}
def _extract_hash_tool_call_event_from_text(
text: str,
*,
forced_tool_name: str | None = None,
) -> dict[str, Any] | None:
raw = (text or "").strip()
match = re.search(
r"#Tool Call\s*```([A-Za-z0-9_\-.]+)\s*(\{.*?\})\s*```",
raw,
flags=re.S,
)
if not match:
return None
name = match.group(1).strip()
if forced_tool_name and name != forced_tool_name:
return None
try:
arguments = json.loads(match.group(2))
except Exception:
return None
if not isinstance(arguments, dict):
return None
return {"name": name, "input": arguments}
def _extract_fenced_json_tool_call_event_from_text(
text: str,
*,
forced_tool_name: str | None = None,
) -> dict[str, Any] | None:
raw = (text or "").strip()
match = re.search(r"```json(?:\s+action)?\s*(\{.*?\})\s*```", raw, flags=re.S | re.I)
if not match:
return None
try:
payload = json.loads(match.group(1))
except Exception:
return None
if not isinstance(payload, dict):
return None
name = str(payload.get("tool") or payload.get("name") or "").strip()
fn = payload.get("function")
if not name and isinstance(fn, dict):
name = str(fn.get("name") or "").strip()
if not name:
return None
if forced_tool_name and name != forced_tool_name:
return None
arguments = payload.get("parameters")
if arguments is None:
arguments = payload.get("arguments")
if arguments is None:
arguments = payload.get("input")
if arguments is None and isinstance(fn, dict):
arguments = fn.get("arguments")
if isinstance(arguments, str):
try:
arguments = json.loads(arguments)
except Exception:
return None
if arguments is None:
arguments = {}
if not isinstance(arguments, dict):
return None
return {"name": name, "input": arguments}
def _extract_function_call_event_from_text(
text: str,
*,
forced_tool_name: str | None = None,
) -> dict[str, Any] | None:
raw = (text or "").strip()
match = re.search(r"<function_calls>\s*(\{.*?\})\s*</function_calls>", raw, flags=re.S)
if not match:
return None
try:
payload = json.loads(match.group(1))
except Exception:
return None
if not isinstance(payload, dict):
return None
name = str(payload.get("name") or "").strip()
if not name:
return None
if forced_tool_name and name != forced_tool_name:
return None
arguments = payload.get("arguments")
if isinstance(arguments, str):
try:
arguments = json.loads(arguments)
except Exception:
return None
if arguments is None:
arguments = {}
if not isinstance(arguments, dict):
return None
return {"name": name, "input": arguments}
def _forced_tool_fallback_event(
text: str,
*,
forced_tool_name: str | None,
tools: list[EmulatedToolDef],
) -> dict[str, Any] | None:
if not forced_tool_name:
return None
parsed = _tool_code_object_from_text(
text,
forced_tool_name,
single_arg_name=_tool_code_single_arg_name(tools, forced_tool_name),
)
if parsed is None:
try:
parsed = json.loads((text or "").strip())
except Exception:
return None
if not isinstance(parsed, dict):
return None
explicit_name = parsed.get("name") or parsed.get("tool")
if explicit_name is not None and str(explicit_name) != forced_tool_name:
return None
tool_input = parsed.get("input")
if tool_input is None and "arguments" in parsed:
tool_input = parsed.get("arguments")
if isinstance(tool_input, str):
try:
tool_input = json.loads(tool_input)
except Exception:
return None
if tool_input is None:
reserved = {"name", "tool", "function", "arguments", "input", "result"}
tool_input = {k: v for k, v in parsed.items() if k not in reserved}
if not isinstance(tool_input, dict):
return None
return {"name": forced_tool_name, "input": tool_input}
def _tool_code_single_arg_name(
tools: list[EmulatedToolDef], forced_tool_name: str
) -> str | None:
for tool in tools:
if tool.name != forced_tool_name:
continue
properties = tool.input_schema.get("properties")
if not isinstance(properties, dict) or len(properties) != 1:
return None
only_name = next(iter(properties.keys()), None)
return only_name if isinstance(only_name, str) and only_name.strip() else None
return None
def _tool_code_object_from_text(
text: str,
forced_tool_name: str,
*,
single_arg_name: str | None = None,
) -> dict[str, Any] | None:
raw = (text or "").strip()
if not raw.startswith("```") or not raw.endswith("```"):
return None
lines = raw.splitlines()
if len(lines) < 2:
return None
fence = lines[0].strip().lower()
language = fence[3:].strip()
if language and language not in {"tool_code", "python", "py"}:
return None
body = "\n".join(lines[1:-1]).strip()
call_match = re.fullmatch(rf"{re.escape(forced_tool_name)}\((.*)\)", body, flags=re.S)
if not call_match:
return None
arguments_text = call_match.group(1).strip()
if not arguments_text:
return {"arguments": {}}
if single_arg_name and not re.search(r"\w+\s*=", arguments_text):
try:
value = json.loads(arguments_text)
except Exception:
value = arguments_text.strip('"\'')
return {"arguments": {single_arg_name: value}}
arguments: dict[str, Any] = {}
for part in [p.strip() for p in arguments_text.split(",") if p.strip()]:
if "=" not in part:
return None
key, value_text = part.split("=", 1)
key = key.strip()
value_text = value_text.strip()
try:
value = json.loads(value_text)
except Exception:
value = value_text.strip('"\'')
arguments[key] = value
return {"arguments": arguments}
def _parse_tool_call_json(raw: str) -> tuple[str, dict[str, Any]] | None:
try:
obj = json.loads(_normalize_json(raw))
except Exception:
return None
if not isinstance(obj, dict):
return None
name = str(obj.get("tool") or obj.get("name") or "").strip()
fn = obj.get("function")
if not name and isinstance(fn, dict):
name = str(fn.get("name") or "").strip()
if not name:
return None
arguments = obj.get("parameters")
if arguments is None:
arguments = obj.get("arguments")
if arguments is None:
arguments = obj.get("input")
if arguments is None and isinstance(fn, dict):
arguments = fn.get("arguments")
if isinstance(arguments, str):
try:
arguments = json.loads(arguments)
except Exception:
arguments = {}
if arguments is None:
arguments = {k: v for k, v in obj.items() if k not in {"tool", "name"}}
if not isinstance(arguments, dict):
return None
return name, arguments
def _normalize_tool_name(raw: str, available: dict[str, str]) -> str:
name = raw.strip()
if not name:
return ""
exact = available.get(name.lower())
if exact:
return exact
key = name.lower().replace("-", "_").replace(" ", "_")
aliases = {
"bash": "terminal",
"shell": "terminal",
"read": "read_file",
"grep": "search_files",
"glob": "search_files",
"edit": "patch",
"write": "write_file",
}
mapped = aliases.get(key)
if mapped and mapped in available:
return available[mapped]
return name
def _filter_args_by_schema(args: dict[str, Any], schema: dict[str, Any]) -> dict[str, Any]:
properties = schema.get("properties")
if not isinstance(properties, dict) or not properties:
return args
return {k: v for k, v in args.items() if k in properties}
def _has_required_args(args: dict[str, Any], schema: dict[str, Any]) -> bool:
required = schema.get("required")
if not isinstance(required, list):
return True
for key in required:
if not isinstance(key, str):
continue
if key not in args:
return False
value = args.get(key)
if isinstance(value, str) and not value.strip():
return False
return True
def _compact_schema(schema: dict[str, Any]) -> str:
properties = schema.get("properties")
if not isinstance(properties, dict) or not properties:
return ""
required = {item for item in schema.get("required", []) if isinstance(item, str)}
parts: list[str] = []
for key in sorted(properties.keys()):
parts.append(key if key in required else f"{key}?")
return ", ".join(parts)
def _truncate(text: str, max_len: int) -> str:
text = text.strip()
if len(text) <= max_len:
return text
return text[:max_len] + "..."
def _force_constraint(choice: EmulatedToolChoice) -> str:
if choice.mode == "any":
return "- You must output at least one ```json action``` block in this reply."
if choice.mode == "tool" and choice.name.strip():
return f'- You must call "{choice.name.strip()}" in this reply.'
return ""
def _normalize_json(text: str) -> str:
return (
text.strip()
.replace("", '"')
.replace("", '"')
.replace(",\n}", "\n}")
.replace(",\n]", "\n]")
)
def _tool_call_key(name: str, arguments: dict[str, Any]) -> str:
return f"{name.lower()}\0{json.dumps(arguments, ensure_ascii=False, sort_keys=True)}"
def _stable_call_id(name: str, arguments: dict[str, Any]) -> str:
key = _tool_call_key(name, arguments)
return "call_" + uuid.uuid5(uuid.NAMESPACE_OID, key).hex[:16]

View File

@@ -419,6 +419,17 @@ class LspWsRpcClient:
method = msg.get("method") method = msg.get("method")
params = msg.get("params") or {} params = msg.get("params") or {}
if method and (
method.startswith("tool/")
or method.startswith("mcp/")
or method in {"chat/answer", "chat/finish"}
):
logger.info(
"lingma server message method=%s params=%s",
method,
params,
)
if method == "chat/answer": if method == "chat/answer":
req_id = params.get("requestId") req_id = params.get("requestId")
stream = self._chat_streams.get(req_id) stream = self._chat_streams.get(req_id)

View File

@@ -36,6 +36,20 @@ from .http.execution_core import (
release_execution, release_execution,
start_execution, start_execution,
) )
from .http.tool_emulation import (
action_output_prompt,
extract_anthropic_tool_choice as _em_extract_anthropic_tool_choice,
extract_anthropic_tools as _em_extract_anthropic_tools,
extract_openai_tool_choice as _em_extract_openai_tool_choice,
extract_openai_tools as _em_extract_openai_tools,
force_tooling_prompt,
has_tool_request as _em_has_tool_request,
infer_declared_tool_call_from_text,
infer_tool_calls_from_text,
inject_tooling,
openai_tool_call_from_emulated,
parse_action_blocks,
)
from .http.openai_responses import handle_responses from .http.openai_responses import handle_responses
from .http.tool_bridge import ( from .http.tool_bridge import (
_allowed_stream_tool_event, _allowed_stream_tool_event,
@@ -44,8 +58,10 @@ from .http.tool_bridge import (
_anthropic_tool_result_block, _anthropic_tool_result_block,
_anthropic_tool_use_block, _anthropic_tool_use_block,
_extract_function_call_event_from_text, _extract_function_call_event_from_text,
_extract_hash_tool_call_event_from_text,
_extract_tool_calls_from_text, _extract_tool_calls_from_text,
_forced_tool_fallback_event, _forced_tool_fallback_event,
_infer_tool_event_from_declared_tools,
_json_string, _json_string,
_openai_forced_tool_name, _openai_forced_tool_name,
_openai_tool_call, _openai_tool_call,
@@ -417,6 +433,82 @@ def _messages_to_prompt(messages: list[dict]) -> str:
return "\n".join(parts).strip() return "\n".join(parts).strip()
def _messages_to_emulation_prompt(
messages: list[dict[str, Any]],
*,
system_text: str,
tools: list[dict[str, Any]] | None,
tool_choice: Any,
) -> str:
filtered: list[tuple[str, str]] = []
for message in messages:
role = str(message.get("role") or "").strip().lower()
if role in {"system", "developer"}:
continue
text = flatten_content(message.get("content"))
if role == "tool":
text = action_output_prompt(message.get("tool_call_id"), text)
role = "user"
if not text:
continue
if role not in {"user", "assistant"}:
continue
filtered.append((role, text))
if not filtered:
return system_text.strip()
em_tools = _em_extract_openai_tools(tools)
em_choice = _em_extract_openai_tool_choice(tool_choice)
injected_system = inject_tooling(system_text, em_tools, em_choice)
parts: list[str] = []
for role, text in filtered:
label = "User" if role == "user" else "Assistant"
parts.append(f"{label}: {text}")
if injected_system:
parts.append(injected_system)
parts.append("Assistant:")
return "\n\n".join(parts).strip()
def _anthropic_messages_to_emulation_prompt(
messages: list[dict[str, Any]],
*,
system_text: str,
tools: list[dict[str, Any]] | None,
tool_choice: Any,
) -> str:
filtered: list[tuple[str, str]] = []
for message in messages:
role = str(message.get("role") or "").strip().lower()
text = str(message.get("content") or "").strip()
if role == "tool":
text = action_output_prompt(message.get("tool_call_id"), text)
role = "user"
if not text:
continue
if role not in {"user", "assistant"}:
continue
filtered.append((role, text))
if not filtered:
return system_text.strip()
em_tools = _em_extract_anthropic_tools(tools)
em_choice = _em_extract_anthropic_tool_choice(tool_choice)
injected_system = inject_tooling(system_text, em_tools, em_choice)
parts: list[str] = []
for role, text in filtered:
label = "User" if role == "user" else "Assistant"
parts.append(f"{label}: {text}")
if injected_system:
parts.append(injected_system)
parts.append("Assistant:")
return "\n\n".join(parts).strip()
def _include_usage(stream_options: dict | None) -> bool: def _include_usage(stream_options: dict | None) -> bool:
if not isinstance(stream_options, dict): if not isinstance(stream_options, dict):
return False return False
@@ -525,6 +617,20 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
is_reply = execution.is_reply is_reply = execution.is_reply
include_usage = _include_usage(req.stream_options) include_usage = _include_usage(req.stream_options)
em_tools = _em_extract_openai_tools(req.tools)
em_choice = _em_extract_openai_tool_choice(req.tool_choice)
if _em_has_tool_request(em_tools, em_choice):
system_parts = [
flatten_content(m.content)
for m in req.messages
if m.role in {"system", "developer"} and flatten_content(m.content)
]
prompt = _messages_to_emulation_prompt(
messages_dump,
system_text="\n\n".join(system_parts),
tools=req.tools,
tool_choice=req.tool_choice,
)
try: try:
started = await start_execution( started = await start_execution(
@@ -709,6 +815,11 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
merged_text, merged_text,
forced_tool_name=forced_tool_name, forced_tool_name=forced_tool_name,
) )
if inferred is None:
inferred = _extract_hash_tool_call_event_from_text(
merged_text,
forced_tool_name=forced_tool_name,
)
if inferred is None: if inferred is None:
inferred = _forced_tool_fallback_event( inferred = _forced_tool_fallback_event(
merged_text, merged_text,
@@ -747,6 +858,72 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
if buffered_text_parts and forced_tool_name and saw_tool_call: if buffered_text_parts and forced_tool_name and saw_tool_call:
buffered_text_parts.clear() buffered_text_parts.clear()
if buffered_text_parts and req.tools and not saw_tool_call:
merged_text = "".join(buffered_text_parts)
inferred = _infer_tool_event_from_declared_tools(
merged_text,
tools=req.tools,
)
if inferred is not None:
tool_id = "call_inferred_0"
tool_call_indexes[tool_id] = 0
saw_tool_call = True
payload = {
"id": completion_id,
"object": "chat.completion.chunk",
"created": created,
"model": model,
"choices": [
{
"index": 0,
"delta": {
"tool_calls": [
{
"index": 0,
**_openai_tool_call(
inferred, forced_id=tool_id
),
}
]
},
"finish_reason": None,
}
],
}
buffered_text_parts.clear()
yield f"data: {json.dumps(payload, ensure_ascii=False)}\n\n"
if buffered_text_parts and req.tools and not saw_tool_call:
merged_text = "".join(buffered_text_parts)
parsed_calls, remaining = parse_action_blocks(merged_text, em_tools)
if parsed_calls:
saw_tool_call = True
for i, call in enumerate(parsed_calls):
tool_id = call.id or f"call_inferred_{i}"
tool_call_indexes[tool_id] = i
payload = {
"id": completion_id,
"object": "chat.completion.chunk",
"created": created,
"model": model,
"choices": [
{
"index": 0,
"delta": {
"tool_calls": [
{
"index": i,
**openai_tool_call_from_emulated(call),
}
]
},
"finish_reason": None,
}
],
}
yield f"data: {json.dumps(payload, ensure_ascii=False)}\n\n"
buffered_text_parts = [remaining] if remaining else []
done_payload = { done_payload = {
"id": completion_id, "id": completion_id,
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@@ -866,6 +1043,11 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
message_content, message_content,
forced_tool_name=forced_tool_name, forced_tool_name=forced_tool_name,
) )
if inferred is None:
inferred = _extract_hash_tool_call_event_from_text(
message_content,
forced_tool_name=forced_tool_name,
)
if inferred is None: if inferred is None:
inferred = _forced_tool_fallback_event( inferred = _forced_tool_fallback_event(
message_content, message_content,
@@ -878,6 +1060,59 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
) )
saw_tool_call = True saw_tool_call = True
message_content = "" message_content = ""
if not saw_tool_call and req.tools:
inferred = _infer_tool_event_from_declared_tools(
message_content,
tools=req.tools,
)
if inferred is not None:
tool_calls.append(
_openai_tool_call(inferred, forced_id="call_inferred_0")
)
saw_tool_call = True
message_content = ""
if not saw_tool_call and em_tools:
parsed_calls, remaining = parse_action_blocks(message_content, em_tools)
if parsed_calls:
for call in parsed_calls:
tool_calls.append(openai_tool_call_from_emulated(call))
saw_tool_call = True
message_content = remaining
if not saw_tool_call and em_tools:
inferred_call = infer_declared_tool_call_from_text(message_content, em_tools)
if inferred_call is None:
inferred_calls = infer_tool_calls_from_text(message_content, em_tools)
inferred_call = inferred_calls[0] if inferred_calls else None
if inferred_call is not None:
tool_calls.append(openai_tool_call_from_emulated(inferred_call))
saw_tool_call = True
message_content = ""
if not saw_tool_call and em_tools:
retry_prompt = f"{prompt}\n\n{force_tooling_prompt(em_choice)}"
retry_result = await inst.client.chat_complete(
retry_prompt,
model,
ask_mode,
session_id=None,
is_reply=False,
tool_config=tool_config,
)
retry_text = retry_result.get("text") or ""
parsed_calls, remaining = parse_action_blocks(retry_text, em_tools)
if parsed_calls:
for call in parsed_calls:
tool_calls.append(openai_tool_call_from_emulated(call))
saw_tool_call = True
message_content = remaining
else:
inferred_call = infer_declared_tool_call_from_text(retry_text, em_tools)
if inferred_call is None:
inferred_calls = infer_tool_calls_from_text(retry_text, em_tools)
inferred_call = inferred_calls[0] if inferred_calls else None
if inferred_call is not None:
tool_calls.append(openai_tool_call_from_emulated(inferred_call))
saw_tool_call = True
message_content = ""
response = ChatCompletionResponse( response = ChatCompletionResponse(
id=f"chatcmpl-{uuid.uuid4().hex}", id=f"chatcmpl-{uuid.uuid4().hex}",
created=int(time.time()), created=int(time.time()),
@@ -1049,6 +1284,16 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
model = execution.model model = execution.model
prompt = execution.prompt prompt = execution.prompt
is_reply = execution.is_reply is_reply = execution.is_reply
em_anthropic_tools = _em_extract_anthropic_tools(req.tools)
em_anthropic_choice = _em_extract_anthropic_tool_choice(req.tool_choice)
if _em_has_tool_request(em_anthropic_tools, em_anthropic_choice):
system_text = flatten_anthropic_content(req.system) if req.system else ""
prompt = _anthropic_messages_to_emulation_prompt(
messages_dump,
system_text=system_text,
tools=req.tools,
tool_choice=req.tool_choice,
)
try: try:
started = await start_execution( started = await start_execution(
@@ -1090,12 +1335,14 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
stream_meta: dict = {} stream_meta: dict = {}
max_tokens = req.max_tokens max_tokens = req.max_tokens
forced_tool_name = _anthropic_forced_tool_name(req.tool_choice) forced_tool_name = _anthropic_forced_tool_name(req.tool_choice)
aggregate_emulated_tools = bool(em_anthropic_tools)
async def event_stream(_ticket=ticket, _inst=inst, _meta=stream_meta): async def event_stream(_ticket=ticket, _inst=inst, _meta=stream_meta):
success = False success = False
block_index = 0 block_index = 0
text_block_open = False text_block_open = False
saw_pending_tool_use = False saw_pending_tool_use = False
buffered_text_parts: list[str] = []
try: try:
# 1) message_start — Anthropic SDKs read this first to get # 1) message_start — Anthropic SDKs read this first to get
# the message envelope (id/model/initial usage). # the message envelope (id/model/initial usage).
@@ -1196,7 +1443,78 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
text = _stream_text(chunk) text = _stream_text(chunk)
if not text: if not text:
continue continue
completion_tokens_holder["n"] += estimate_tokens(text) if aggregate_emulated_tools:
buffered_text_parts.append(text)
completion_tokens_holder["n"] += estimate_tokens(text)
continue
buffered_text_parts.append(text)
merged_text = "".join(buffered_text_parts)
parsed_calls, remaining = parse_action_blocks(
merged_text, em_anthropic_tools
)
if not parsed_calls:
inferred = infer_declared_tool_call_from_text(
merged_text,
em_anthropic_tools,
)
if inferred is None:
inferred_calls = infer_tool_calls_from_text(
merged_text,
em_anthropic_tools,
)
inferred = inferred_calls[0] if inferred_calls else None
if inferred is not None:
parsed_calls = [inferred]
remaining = ""
if parsed_calls:
if text_block_open:
yield _sse(
"content_block_stop",
{"type": "content_block_stop", "index": block_index},
)
block_index += 1
text_block_open = False
saw_pending_tool_use = True
for call in parsed_calls:
yield _sse(
"content_block_start",
{
"type": "content_block_start",
"index": block_index,
"content_block": {
"type": "tool_use",
"id": call.id,
"name": call.name,
"input": {},
},
},
)
yield _sse(
"content_block_delta",
{
"type": "content_block_delta",
"index": block_index,
"delta": {
"type": "input_json_delta",
"partial_json": json.dumps(call.arguments, ensure_ascii=False),
},
},
)
yield _sse(
"content_block_stop",
{"type": "content_block_stop", "index": block_index},
)
block_index += 1
buffered_text_parts = [remaining] if remaining else []
if not buffered_text_parts:
continue
text_to_emit = "".join(buffered_text_parts)
buffered_text_parts.clear()
completion_tokens_holder["n"] += estimate_tokens(text_to_emit)
if not text_block_open: if not text_block_open:
yield _sse( yield _sse(
"content_block_start", "content_block_start",
@@ -1213,10 +1531,106 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
{ {
"type": "content_block_delta", "type": "content_block_delta",
"index": block_index, "index": block_index,
"delta": {"type": "text_delta", "text": text}, "delta": {"type": "text_delta", "text": text_to_emit},
}, },
) )
if aggregate_emulated_tools:
merged_text = "".join(buffered_text_parts)
parsed_calls, remaining = parse_action_blocks(
merged_text, em_anthropic_tools
)
if not parsed_calls:
inferred = infer_declared_tool_call_from_text(
merged_text,
em_anthropic_tools,
)
if inferred is None:
inferred_calls = infer_tool_calls_from_text(
merged_text,
em_anthropic_tools,
)
inferred = inferred_calls[0] if inferred_calls else None
if inferred is not None:
parsed_calls = [inferred]
remaining = ""
if parsed_calls:
if remaining.strip():
yield _sse(
"content_block_start",
{
"type": "content_block_start",
"index": block_index,
"content_block": {"type": "text", "text": ""},
},
)
yield _sse(
"content_block_delta",
{
"type": "content_block_delta",
"index": block_index,
"delta": {"type": "text_delta", "text": remaining},
},
)
yield _sse(
"content_block_stop",
{"type": "content_block_stop", "index": block_index},
)
block_index += 1
for call in parsed_calls:
saw_pending_tool_use = True
yield _sse(
"content_block_start",
{
"type": "content_block_start",
"index": block_index,
"content_block": {
"type": "tool_use",
"id": call.id,
"name": call.name,
"input": {},
},
},
)
yield _sse(
"content_block_delta",
{
"type": "content_block_delta",
"index": block_index,
"delta": {
"type": "input_json_delta",
"partial_json": json.dumps(call.arguments, ensure_ascii=False),
},
},
)
yield _sse(
"content_block_stop",
{"type": "content_block_stop", "index": block_index},
)
block_index += 1
elif merged_text.strip():
yield _sse(
"content_block_start",
{
"type": "content_block_start",
"index": block_index,
"content_block": {"type": "text", "text": ""},
},
)
yield _sse(
"content_block_delta",
{
"type": "content_block_delta",
"index": block_index,
"delta": {"type": "text_delta", "text": merged_text},
},
)
yield _sse(
"content_block_stop",
{"type": "content_block_stop", "index": block_index},
)
if text_block_open: if text_block_open:
yield _sse( yield _sse(
"content_block_stop", "content_block_stop",
@@ -1323,6 +1737,135 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
else: else:
saw_pending_tool_use = True saw_pending_tool_use = True
if not saw_tool_event and em_anthropic_tools:
parsed_calls, remaining = parse_action_blocks(text, em_anthropic_tools)
if parsed_calls:
content_blocks = []
if remaining:
content_blocks.append({"type": "text", "text": remaining})
for call in parsed_calls:
content_blocks.append(
{
"type": "tool_use",
"id": call.id,
"name": call.name,
"input": call.arguments,
}
)
saw_tool_event = True
saw_pending_tool_use = True
text = remaining
if not saw_tool_event and em_anthropic_tools:
inferred_call = infer_declared_tool_call_from_text(text, em_anthropic_tools)
if inferred_call is None:
inferred_calls = infer_tool_calls_from_text(text, em_anthropic_tools)
inferred_call = inferred_calls[0] if inferred_calls else None
if inferred_call is not None:
content_blocks = [
{
"type": "tool_use",
"id": inferred_call.id,
"name": inferred_call.name,
"input": inferred_call.arguments,
}
]
saw_tool_event = True
saw_pending_tool_use = True
text = ""
if not saw_tool_event and em_anthropic_tools:
retry_prompt = f"{prompt}\n\n{force_tooling_prompt(em_anthropic_choice)}"
retry_result = await inst.client.chat_complete(
retry_prompt,
model,
ask_mode,
session_id=None,
is_reply=False,
tool_config=tool_config,
)
retry_text = retry_result.get("text") or ""
parsed_calls, remaining = parse_action_blocks(retry_text, em_anthropic_tools)
if parsed_calls:
content_blocks = []
if remaining:
content_blocks.append({"type": "text", "text": remaining})
for call in parsed_calls:
content_blocks.append(
{
"type": "tool_use",
"id": call.id,
"name": call.name,
"input": call.arguments,
}
)
saw_tool_event = True
saw_pending_tool_use = True
text = remaining
else:
inferred_call = infer_declared_tool_call_from_text(retry_text, em_anthropic_tools)
if inferred_call is None:
inferred_calls = infer_tool_calls_from_text(retry_text, em_anthropic_tools)
inferred_call = inferred_calls[0] if inferred_calls else None
if inferred_call is not None:
content_blocks = [
{
"type": "tool_use",
"id": inferred_call.id,
"name": inferred_call.name,
"input": inferred_call.arguments,
}
]
saw_tool_event = True
saw_pending_tool_use = True
text = ""
if not saw_tool_event and em_anthropic_tools and text.strip():
retry_prompt = f"{prompt}\n\n{force_tooling_prompt(em_anthropic_choice)}"
retry_result = await inst.client.chat_complete(
retry_prompt,
model,
ask_mode,
session_id=None,
is_reply=False,
tool_config=tool_config,
)
retry_text = retry_result.get("text") or ""
parsed_calls, remaining = parse_action_blocks(retry_text, em_anthropic_tools)
if parsed_calls:
content_blocks = []
if remaining:
content_blocks.append({"type": "text", "text": remaining})
for call in parsed_calls:
content_blocks.append(
{
"type": "tool_use",
"id": call.id,
"name": call.name,
"input": call.arguments,
}
)
saw_tool_event = True
saw_pending_tool_use = True
text = remaining
else:
inferred_call = infer_declared_tool_call_from_text(retry_text, em_anthropic_tools)
if inferred_call is None:
inferred_calls = infer_tool_calls_from_text(retry_text, em_anthropic_tools)
inferred_call = inferred_calls[0] if inferred_calls else None
if inferred_call is not None:
content_blocks = [
{
"type": "tool_use",
"id": inferred_call.id,
"name": inferred_call.name,
"input": inferred_call.arguments,
}
]
saw_tool_event = True
saw_pending_tool_use = True
text = ""
if not saw_tool_event and forced_tool_name: if not saw_tool_event and forced_tool_name:
inferred = _extract_function_call_event_from_text( inferred = _extract_function_call_event_from_text(
text, text,

View File

@@ -3,3 +3,4 @@ uvicorn[standard]==0.30.6
websockets==13.1 websockets==13.1
pydantic==2.9.2 pydantic==2.9.2
playwright==1.52.0 playwright==1.52.0
mcp==1.12.4

117
scripts/smoke_tool_calls.sh Normal file
View File

@@ -0,0 +1,117 @@
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
ENV_FILE="$ROOT_DIR/.env"
if [[ ! -f "$ENV_FILE" ]]; then
printf 'missing .env: %s\n' "$ENV_FILE" >&2
exit 1
fi
PORT="$(python3 - <<'PY'
from pathlib import Path
env = Path("/root/lingma-openai-gateway/.env")
vals = {}
for line in env.read_text().splitlines():
line = line.strip()
if not line or line.startswith('#') or '=' not in line:
continue
k, v = line.split('=', 1)
vals[k.strip()] = v.strip()
print(vals.get('PORT', '13013'))
PY
)"
API_KEY="$(python3 - <<'PY'
from pathlib import Path
env = Path("/root/lingma-openai-gateway/.env")
vals = {}
for line in env.read_text().splitlines():
line = line.strip()
if not line or line.startswith('#') or '=' not in line:
continue
k, v = line.split('=', 1)
vals[k.strip()] = v.strip()
keys = vals.get('API_KEYS', '')
print(keys.split(',')[0].strip())
PY
)"
BASE_URL="http://127.0.0.1:${PORT}"
printf '\n[1/5] /v1/models\n'
curl -fsS "$BASE_URL/v1/models" \
-H "Authorization: Bearer ${API_KEY}" | python3 -m json.tool
printf '\n[2/5] OpenAI non-stream tool call\n'
curl -fsS "$BASE_URL/v1/chat/completions" \
-H "Authorization: Bearer ${API_KEY}" \
-H 'Content-Type: application/json' \
-d '{
"model": "org_auto",
"stream": false,
"messages": [
{"role": "system", "content": "Use tools when available."},
{"role": "user", "content": "Use fetch_weather for Hangzhou and return the tool call."}
],
"tools": [
{"type": "function", "function": {"name": "fetch_weather", "description": "Get weather for a city", "parameters": {"type": "object", "properties": {"city": {"type": "string"}}, "required": ["city"]}}}
],
"tool_choice": {"type": "function", "function": {"name": "fetch_weather"}}
}' | python3 -m json.tool
printf '\n[3/5] Anthropic non-stream tool use\n'
curl -fsS "$BASE_URL/v1/messages" \
-H "x-api-key: ${API_KEY}" \
-H 'anthropic-version: 2023-06-01' \
-H 'Content-Type: application/json' \
-d '{
"model": "claude-3-5-sonnet-20241022",
"max_tokens": 256,
"stream": false,
"messages": [
{"role": "user", "content": "Use fetch_weather for Hangzhou and return the tool call."}
],
"tools": [
{"name": "fetch_weather", "description": "Get weather for a city", "input_schema": {"type": "object", "properties": {"city": {"type": "string"}}, "required": ["city"]}}
],
"tool_choice": {"type": "tool", "name": "fetch_weather"}
}' | python3 -m json.tool
printf '\n[4/5] OpenAI stream tool call\n'
curl -fsS -N "$BASE_URL/v1/chat/completions" \
-H "Authorization: Bearer ${API_KEY}" \
-H 'Content-Type: application/json' \
-d '{
"model": "org_auto",
"stream": true,
"messages": [
{"role": "system", "content": "Use tools when available."},
{"role": "user", "content": "Use fetch_weather for Hangzhou and return the tool call."}
],
"tools": [
{"type": "function", "function": {"name": "fetch_weather", "description": "Get weather for a city", "parameters": {"type": "object", "properties": {"city": {"type": "string"}}, "required": ["city"]}}}
],
"tool_choice": {"type": "function", "function": {"name": "fetch_weather"}}
}'
printf '\n[5/5] Anthropic stream tool use\n'
curl -fsS -N "$BASE_URL/v1/messages" \
-H "x-api-key: ${API_KEY}" \
-H 'anthropic-version: 2023-06-01' \
-H 'Content-Type: application/json' \
-d '{
"model": "claude-3-5-sonnet-20241022",
"max_tokens": 256,
"stream": true,
"messages": [
{"role": "user", "content": "Use fetch_weather for Hangzhou and return the tool call."}
],
"tools": [
{"name": "fetch_weather", "description": "Get weather for a city", "input_schema": {"type": "object", "properties": {"city": {"type": "string"}}, "required": ["city"]}}
],
"tool_choice": {"type": "tool", "name": "fetch_weather"}
}'
printf '\nsmoke tool-call checks completed\n'

View File

@@ -42,6 +42,7 @@
1. 定点执行新增测试文件。 1. 定点执行新增测试文件。
2. 全量执行 `tests/``test_*.py` 2. 全量执行 `tests/``test_*.py`
3. 汇总通过率与失败项(若失败,给出定位与修复建议)。 3. 汇总通过率与失败项(若失败,给出定位与修复建议)。
4. Docker 运行态执行 `bash scripts/smoke_tool_calls.sh`,验证 OpenAI / Anthropic 的 stream / non-stream 工具调用。
## 6. 执行命令 ## 6. 执行命令
```bash ```bash
@@ -50,4 +51,5 @@ python3 -m unittest tests/test_session_cache_tooling.py
python3 -m unittest tests/test_schema_normalization.py python3 -m unittest tests/test_schema_normalization.py
python3 -m unittest tests/test_tool_call_bridge.py python3 -m unittest tests/test_tool_call_bridge.py
python3 -m unittest discover -s tests -p "test_*.py" python3 -m unittest discover -s tests -p "test_*.py"
bash scripts/smoke_tool_calls.sh
``` ```

View File

@@ -3,10 +3,12 @@ from __future__ import annotations
import json import json
import os import os
import sys import sys
import tempfile
import types import types
import unittest import unittest
from types import SimpleNamespace from types import SimpleNamespace
from unittest.mock import patch from unittest.mock import patch
import zipfile
# app.lingma_pool imports auto_login; tests here don't execute Playwright paths. # app.lingma_pool imports auto_login; tests here don't execute Playwright paths.
# Stub module import so test environments without playwright can import pool code. # Stub module import so test environments without playwright can import pool code.
@@ -28,6 +30,7 @@ sys.modules.setdefault("playwright", _playwright)
sys.modules.setdefault("playwright.async_api", _playwright_async) sys.modules.setdefault("playwright.async_api", _playwright_async)
from app.config import _parse_accounts, load_settings from app.config import _parse_accounts, load_settings
from app.bootstrap_lingma import bootstrap_from_vsix
from app.lingma_pool import LingmaPool from app.lingma_pool import LingmaPool
from app.stats import StatsCollector, estimate_tokens from app.stats import StatsCollector, estimate_tokens
@@ -212,5 +215,57 @@ class ConfigParsingTests(unittest.TestCase):
self.assertEqual(settings.tool_allowlist, []) self.assertEqual(settings.tool_allowlist, [])
class BootstrapLingmaTests(unittest.TestCase):
def _make_test_vsix(self, root: str) -> str:
nested_zip_path = os.path.join(root, "nested.zip")
with zipfile.ZipFile(nested_zip_path, "w") as nested:
nested.writestr("2.5.20/x86_64_linux/Lingma", b"new-binary")
nested.writestr("2.5.20/extension/main.js", b"console.log('ok')")
vsix_path = os.path.join(root, "test.vsix")
with zipfile.ZipFile(vsix_path, "w") as vsix:
with open(nested_zip_path, "rb") as nested_file:
vsix.writestr(
"extension/dist/bin/lingma-2.5.20.zip",
nested_file.read(),
)
return vsix_path
def test_bootstrap_refreshes_when_extension_assets_missing(self) -> None:
with tempfile.TemporaryDirectory() as tmpdir:
bin_dir = os.path.join(tmpdir, "data", "bin")
release_dir = os.path.join(bin_dir, "2.5.20")
os.makedirs(release_dir, exist_ok=True)
lingma_bin = os.path.join(bin_dir, "Lingma")
with open(lingma_bin, "wb") as f:
f.write(b"old-binary")
marker = {
"version": "2.5.20",
"release_root": "2.5.20",
}
with open(os.path.join(bin_dir, ".lingma-bootstrap.json"), "w", encoding="utf-8") as f:
json.dump(marker, f)
vsix_path = self._make_test_vsix(tmpdir)
env = {
"LINGMA_BIN": lingma_bin,
"LINGMA_SOURCE_TYPE": "vsix",
"LINGMA_VSIX_URL": f"file://{vsix_path}",
"LINGMA_BOOTSTRAP_ALWAYS": "false",
"LINGMA_FORCE_REFRESH": "false",
}
with patch.dict(os.environ, env, clear=False):
bootstrap_from_vsix()
with open(lingma_bin, "rb") as f:
self.assertEqual(f.read(), b"new-binary")
self.assertTrue(
os.path.exists(os.path.join(release_dir, "extension", "main.js"))
)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()

View File

@@ -388,6 +388,169 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
{"query": "gateway"}, {"query": "gateway"},
) )
async def test_openai_non_stream_synthesizes_tool_call_from_hash_tool_call_block(
self,
) -> None:
fake_client = _FakeClient(
stream_events=[],
complete_result={
"text": '#Tool Call\n```fetch_weather\n{"city": "Hangzhou"}\n```\n',
"toolEvents": [],
"sessionId": "sess-fallback-hash-tool-call-openai",
},
)
req = ChatCompletionsRequest(
model="org_auto",
messages=[{"role": "user", "content": "hi"}],
stream=False,
tools=[
{
"type": "function",
"function": {
"name": "fetch_weather",
"parameters": {
"type": "object",
"properties": {"city": {"type": "string"}},
"required": ["city"],
},
},
}
],
tool_choice={"type": "function", "function": {"name": "fetch_weather"}},
)
with (
patch.object(main, "pool", _FakePool(_FakeInstance(fake_client))),
patch.object(main, "chat_guard", _FakeGuard()),
patch.object(
main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"})
),
patch.object(
main.stats_collector, "record_chat", AsyncMock(return_value=None)
),
):
response = await main.v1_chat_completions(
req, _make_request("/v1/chat/completions")
)
payload = json.loads(response.body)
message = payload["choices"][0]["message"]
self.assertEqual(payload["choices"][0]["finish_reason"], "tool_calls")
self.assertEqual(message["content"], "")
self.assertEqual(message["tool_calls"][0]["function"]["name"], "fetch_weather")
self.assertEqual(
json.loads(message["tool_calls"][0]["function"]["arguments"]),
{"city": "Hangzhou"},
)
async def test_openai_non_stream_synthesizes_tool_call_from_hash_tool_call_block_without_tool_choice(
self,
) -> None:
fake_client = _FakeClient(
stream_events=[],
complete_result={
"text": '#Tool Call\n```fetch_weather\n{"city": "Hangzhou"}\n```\n',
"toolEvents": [],
"sessionId": "sess-fallback-hash-tool-call-openai-no-choice",
},
)
req = ChatCompletionsRequest(
model="org_auto",
messages=[{"role": "user", "content": "hi"}],
stream=False,
tools=[
{
"type": "function",
"function": {
"name": "fetch_weather",
"parameters": {
"type": "object",
"properties": {"city": {"type": "string"}},
"required": ["city"],
},
},
}
],
)
with (
patch.object(main, "pool", _FakePool(_FakeInstance(fake_client))),
patch.object(main, "chat_guard", _FakeGuard()),
patch.object(
main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"})
),
patch.object(
main.stats_collector, "record_chat", AsyncMock(return_value=None)
),
):
response = await main.v1_chat_completions(
req, _make_request("/v1/chat/completions")
)
payload = json.loads(response.body)
message = payload["choices"][0]["message"]
self.assertEqual(payload["choices"][0]["finish_reason"], "tool_calls")
self.assertEqual(message["content"], "")
self.assertEqual(message["tool_calls"][0]["function"]["name"], "fetch_weather")
self.assertEqual(
json.loads(message["tool_calls"][0]["function"]["arguments"]),
{"city": "Hangzhou"},
)
async def test_openai_non_stream_synthesizes_tool_call_from_json_action_block(
self,
) -> None:
fake_client = _FakeClient(
stream_events=[],
complete_result={
"text": '```json action\n{"tool":"fetch_weather","parameters":{"city":"Hangzhou"}}\n```',
"toolEvents": [],
"sessionId": "sess-action-block-openai",
},
)
req = ChatCompletionsRequest(
model="org_auto",
messages=[{"role": "user", "content": "hi"}],
stream=False,
tools=[
{
"type": "function",
"function": {
"name": "fetch_weather",
"parameters": {
"type": "object",
"properties": {"city": {"type": "string"}},
"required": ["city"],
},
},
}
],
)
with (
patch.object(main, "pool", _FakePool(_FakeInstance(fake_client))),
patch.object(main, "chat_guard", _FakeGuard()),
patch.object(
main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"})
),
patch.object(
main.stats_collector, "record_chat", AsyncMock(return_value=None)
),
):
response = await main.v1_chat_completions(
req, _make_request("/v1/chat/completions")
)
payload = json.loads(response.body)
message = payload["choices"][0]["message"]
self.assertEqual(payload["choices"][0]["finish_reason"], "tool_calls")
self.assertEqual(message["content"], "")
self.assertEqual(message["tool_calls"][0]["function"]["name"], "fetch_weather")
self.assertEqual(
json.loads(message["tool_calls"][0]["function"]["arguments"]),
{"city": "Hangzhou"},
)
async def test_openai_stream_synthesizes_tool_call_from_tool_code( async def test_openai_stream_synthesizes_tool_call_from_tool_code(
self, self,
) -> None: ) -> None:
@@ -439,6 +602,55 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
self.assertIn('"finish_reason": "tool_calls"', body) self.assertIn('"finish_reason": "tool_calls"', body)
self.assertIn("data: [DONE]", body) self.assertIn("data: [DONE]", body)
async def test_openai_stream_synthesizes_tool_call_from_hash_tool_call_block_without_tool_choice(
self,
) -> None:
fake_client = _FakeClient(
stream_events=[
{"type": "text", "text": "#Tool Call\n```fetch_weather\n"},
{"type": "text", "text": '{"city": "Hangzhou"}\n'},
{"type": "text", "text": "```\n"},
],
complete_result={},
)
req = ChatCompletionsRequest(
model="org_auto",
messages=[{"role": "user", "content": "hi"}],
stream=True,
tools=[
{
"type": "function",
"function": {
"name": "fetch_weather",
"parameters": {
"type": "object",
"properties": {"city": {"type": "string"}},
"required": ["city"],
},
},
}
],
)
with (
patch.object(main, "pool", _FakePool(_FakeInstance(fake_client))),
patch.object(main, "chat_guard", _FakeGuard()),
patch.object(
main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"})
),
patch.object(
main.stats_collector, "record_chat", AsyncMock(return_value=None)
),
):
response = await main.v1_chat_completions(
req, _make_request("/v1/chat/completions")
)
body = await _collect_stream(response)
self.assertIn('"tool_calls"', body)
self.assertIn('"fetch_weather"', body)
self.assertIn('"finish_reason": "tool_calls"', body)
async def test_openai_non_stream_synthesizes_tool_call_from_json_array(self) -> None: async def test_openai_non_stream_synthesizes_tool_call_from_json_array(self) -> None:
fake_client = _FakeClient( fake_client = _FakeClient(
stream_events=[], stream_events=[],
@@ -1918,6 +2130,117 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
self.assertEqual(messages_dump[3]["role"], "user") self.assertEqual(messages_dump[3]["role"], "user")
self.assertEqual(messages_dump[3]["content"], "follow up") self.assertEqual(messages_dump[3]["content"], "follow up")
async def test_openai_tool_result_is_emulated_into_followup_prompt(self) -> None:
spy_client = _SpyClient(
stream_events=[],
complete_result={
"text": "done",
"toolEvents": [],
"sessionId": "sess-emulated-tool-result",
},
)
req = ChatCompletionsRequest(
model="org_auto",
messages=[
{"role": "assistant", "content": None, "tool_calls": [{
"id": "call_1",
"type": "function",
"function": {"name": "fetch_weather", "arguments": '{"city":"Hangzhou"}'},
}]},
{"role": "tool", "tool_call_id": "call_1", "content": '{"temperature":"22C"}'},
{"role": "user", "content": "continue"},
],
stream=False,
tools=[
{
"type": "function",
"function": {
"name": "fetch_weather",
"parameters": {
"type": "object",
"properties": {"city": {"type": "string"}},
"required": ["city"],
},
},
}
],
)
with (
patch.object(main, "pool", _FakePool(_FakeInstance(spy_client))),
patch.object(main, "chat_guard", _FakeGuard()),
patch.object(
main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"})
),
patch.object(
main.stats_collector, "record_chat", AsyncMock(return_value=None)
),
):
await main.v1_chat_completions(req, _make_request("/v1/chat/completions"))
prompt = spy_client.last_complete_args[0]
self.assertIn("Tool result for call_1:", prompt)
self.assertIn('{"temperature":"22C"}', prompt)
self.assertIn("Assistant:", prompt)
async def test_anthropic_non_stream_synthesizes_tool_use_from_json_action_block(
self,
) -> None:
fake_client = _FakeClient(
stream_events=[],
complete_result={
"text": '```json action\n{"tool":"fetch_weather","parameters":{"city":"Hangzhou"}}\n```',
"toolEvents": [],
"sessionId": "sess-anthropic-action-block",
},
)
req = AnthropicMessagesRequest(
model="claude-3-5-sonnet-20241022",
max_tokens=64,
messages=[{"role": "user", "content": "weather"}],
stream=False,
tools=[
{
"name": "fetch_weather",
"description": "Get weather for a city",
"input_schema": {
"type": "object",
"properties": {"city": {"type": "string"}},
"required": ["city"],
},
}
],
tool_choice={"type": "tool", "name": "fetch_weather"},
)
with (
patch.object(main, "pool", _FakePool(_FakeInstance(fake_client))),
patch.object(main, "chat_guard", _FakeGuard()),
patch.object(
main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"})
),
patch.object(
main.stats_collector, "record_chat", AsyncMock(return_value=None)
),
patch.object(main.settings, "api_keys", ["test-key"]),
):
response = await main.v1_messages(
req,
_make_request(
"/v1/messages",
headers={
"x-api-key": "test-key",
"anthropic-version": "2023-06-01",
},
),
)
payload = json.loads(response.body)
tool_blocks = [item for item in payload["content"] if item["type"] == "tool_use"]
self.assertEqual(payload["stop_reason"], "tool_use")
self.assertEqual(tool_blocks[0]["name"], "fetch_weather")
self.assertEqual(tool_blocks[0]["input"], {"city": "Hangzhou"})
async def test_responses_stream_bridges_text_tool_and_completed_events( async def test_responses_stream_bridges_text_tool_and_completed_events(
self, self,
) -> None: ) -> None: