diff --git a/README.md b/README.md index 9ab3234..522ae92 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,7 @@ - Anthropic:`/v1/messages`、`/v1/messages/count_tokens`(含 stream) - 内置:多实例池、会话复用、Prometheus 指标、登录态 bundle 注入 - 工具事件桥接:Lingma 上游返回 `tool` 事件时,网关会输出为 OpenAI `tool_calls`(stream/non-stream)和 Anthropic `tool_use` / `tool_result`(stream/non-stream);请求侧 `tools` / `tool_choice` 仅在 `TOOL_FORWARD_ENABLED=true` 时透传(默认开启,可显式关闭) +- 工具模拟回退:当 Lingma 未稳定外显原生 `tool/*` 事件时,网关会把注入后的 `json action` / `#Tool Call` 等动作文本归一化为 OpenAI `tool_calls`,并支持 tool result continuation - 多模态降级:OpenAI `image_url` / `input_image` 转 `[image]`,`input_audio` 转 `[audio]`;Anthropic `image` 转 `[image]` > 架构设计与二开细节请看 [`DESIGN.md`](./DESIGN.md)。 @@ -85,6 +86,9 @@ python3 -m unittest tests/test_tool_call_bridge.py # 全量 unittest python3 -m unittest discover -s tests -p "test_*.py" + +# Docker 端到端工具调用冒烟 +bash scripts/smoke_tool_calls.sh ``` --- @@ -201,6 +205,7 @@ curl -s "http://127.0.0.1:${PORT}/healthz" | `healthz` 正常但请求失败 | 用错端口 | 以 `.env` 的 `PORT` 为准,`docker compose ps` 再确认 | | `git pull` 提示 not on a branch | 处于 detached HEAD | 执行 `git checkout -B main origin/main` | | 自动登录不稳定 | 浏览器流程波动 | 优先使用 `LINGMA_SESSION_BUNDLE(_FILE)` | +| 日志出现 `extension main js path not found` / `ExtensionApi executor not inited` | Lingma 扩展运行时未完整提取,MCP/工具执行器未初始化 | 重启容器触发 bootstrap 自愈;确认 `data/bin//extension/main.js` 已存在 | | 工具调用未触发 | 模型未选择工具或当前协议路径不支持合成回退 | OpenAI 可配合 `tool_choice` 强制并约束输出 JSON;Anthropic 当前仅 non-stream 支持合成 `tool_use` / `tool_result` 回退 | --- diff --git a/app/bootstrap_lingma.py b/app/bootstrap_lingma.py index 61f651c..82a00d7 100644 --- a/app/bootstrap_lingma.py +++ b/app/bootstrap_lingma.py @@ -3,6 +3,7 @@ from __future__ import annotations import io import json import os +import shutil import time import urllib.request import zipfile @@ -70,6 +71,15 @@ def _extract_release_tree( dst.write(src.read()) +def _release_dir_for_binary(lingma_bin: Path, release_root: str | None) -> Path: + return lingma_bin.parent / ((release_root or "").strip() or "2.5.20") + + +def _release_has_required_assets(release_dir: Path) -> bool: + extension_main = release_dir / "extension" / "main.js" + return extension_main.exists() and extension_main.is_file() + + def _query_marketplace_latest_vsix( publisher: str, extension: str ) -> tuple[str, str, dict]: @@ -168,8 +178,19 @@ def bootstrap_from_vsix() -> None: ) resolved_url = vsix_url + current_release_dir = _release_dir_for_binary( + lingma_bin, old_marker.get("release_root") if isinstance(old_marker, dict) else None + ) + release_ready = _release_has_required_assets(current_release_dir) + if lingma_bin.exists() and not release_ready: + print( + "[bootstrap] existing Lingma binary found but extension assets are incomplete; " + f"refreshing install under {current_release_dir}" + ) + if ( lingma_bin.exists() + and release_ready and not force_refresh and ( (not always_refresh) @@ -215,7 +236,8 @@ def bootstrap_from_vsix() -> None: lingma_bytes = inner_zip.read(lingma_member) release_root = _infer_release_root(lingma_member) lingma_bin.parent.mkdir(parents=True, exist_ok=True) - release_dir = lingma_bin.parent / (release_root or "2.5.20") + release_dir = _release_dir_for_binary(lingma_bin, release_root) + shutil.rmtree(release_dir, ignore_errors=True) _extract_release_tree(inner_zip, release_root, release_dir) lingma_bin.write_bytes(lingma_bytes) @@ -224,7 +246,9 @@ def bootstrap_from_vsix() -> None: if extension_main.exists(): print(f"[bootstrap] extension ready: {extension_main}") else: - print(f"[bootstrap] extension missing under: {release_dir}") + raise RuntimeError( + f"extension assets missing after extraction under: {release_dir}" + ) marker = { "source": source_type, diff --git a/app/http/tool_bridge.py b/app/http/tool_bridge.py index 6179281..0b063a4 100644 --- a/app/http/tool_bridge.py +++ b/app/http/tool_bridge.py @@ -196,6 +196,33 @@ def _extract_tool_calls_from_text(text: str) -> list[dict[str, Any]] | None: return None +def _extract_hash_tool_call_event_from_text( + text: str, + *, + forced_tool_name: str | None = None, +) -> dict[str, Any] | None: + raw = (text or "").strip() + if not raw: + return None + match = re.search( + r"#Tool Call\s*```([A-Za-z0-9_\-.]+)\s*(\{.*?\})\s*```", + raw, + flags=re.S, + ) + if not match: + return None + name = match.group(1).strip() + if forced_tool_name and name != forced_tool_name: + return None + try: + arguments = json.loads(match.group(2)) + except Exception: + return None + if not isinstance(arguments, dict): + return None + return {"name": name, "input": arguments} + + def _tool_code_single_arg_name( tools: list[dict[str, Any]] | None, forced_tool_name: str ) -> str | None: @@ -342,6 +369,45 @@ def _forced_tool_fallback_event( ) +def _declared_tool_names(tools: list[dict[str, Any]] | None) -> list[str]: + if not isinstance(tools, list): + return [] + out: list[str] = [] + for tool in tools: + name = _openai_tool_name(tool) or _anthropic_tool_name(tool) + if name and name not in out: + out.append(name) + return out + + +def _infer_tool_event_from_declared_tools( + text: str, + *, + tools: list[dict[str, Any]] | None, +) -> dict[str, Any] | None: + for tool_name in _declared_tool_names(tools): + inferred = _extract_function_call_event_from_text( + text, + forced_tool_name=tool_name, + ) + if inferred is not None: + return inferred + inferred = _extract_hash_tool_call_event_from_text( + text, + forced_tool_name=tool_name, + ) + if inferred is not None: + return inferred + inferred = _forced_tool_fallback_event( + text, + forced_tool_name=tool_name, + tools=tools, + ) + if inferred is not None: + return inferred + return None + + def _openai_tool_call( tool: dict[str, Any], *, forced_id: str | None = None ) -> dict[str, Any]: diff --git a/app/http/tool_emulation.py b/app/http/tool_emulation.py new file mode 100644 index 0000000..e9e962d --- /dev/null +++ b/app/http/tool_emulation.py @@ -0,0 +1,657 @@ +from __future__ import annotations + +import json +import re +import uuid +from dataclasses import dataclass +from typing import Any + + +@dataclass +class EmulatedToolDef: + name: str + description: str + input_schema: dict[str, Any] + + +@dataclass +class EmulatedToolChoice: + mode: str + name: str = "" + + +@dataclass +class EmulatedToolCall: + id: str + name: str + arguments: dict[str, Any] + + +def extract_openai_tools(raw: Any) -> list[EmulatedToolDef]: + if not isinstance(raw, list): + return [] + out: list[EmulatedToolDef] = [] + for item in raw: + if not isinstance(item, dict): + continue + fn = item.get("function") + if not isinstance(fn, dict): + continue + name = str(fn.get("name") or "").strip() + if not name: + continue + schema = fn.get("parameters") if isinstance(fn.get("parameters"), dict) else {} + out.append( + EmulatedToolDef( + name=name, + description=str(fn.get("description") or "").strip(), + input_schema=dict(schema), + ) + ) + return out + + +def extract_anthropic_tools(raw: Any) -> list[EmulatedToolDef]: + if not isinstance(raw, list): + return [] + out: list[EmulatedToolDef] = [] + for item in raw: + if not isinstance(item, dict): + continue + tool_type = str(item.get("type") or "").strip() + if tool_type.startswith("web_search_"): + continue + name = str(item.get("name") or "").strip() + if not name: + continue + schema = item.get("input_schema") if isinstance(item.get("input_schema"), dict) else {} + out.append( + EmulatedToolDef( + name=name, + description=str(item.get("description") or "").strip(), + input_schema=dict(schema), + ) + ) + return out + + +def extract_openai_tool_choice(raw: Any) -> EmulatedToolChoice: + if raw is None: + return EmulatedToolChoice(mode="auto") + if isinstance(raw, str): + value = raw.strip() + if value in {"", "auto"}: + return EmulatedToolChoice(mode="auto") + if value == "none": + return EmulatedToolChoice(mode="none") + if value in {"required", "any"}: + return EmulatedToolChoice(mode="any") + return EmulatedToolChoice(mode="tool", name=value) + if not isinstance(raw, dict): + return EmulatedToolChoice(mode="auto") + type_name = str(raw.get("type") or "").strip() + if type_name in {"required", "any"}: + return EmulatedToolChoice(mode="any") + if type_name in {"none"}: + return EmulatedToolChoice(mode="none") + if type_name in {"function", "tool"}: + fn = raw.get("function") + if isinstance(fn, dict): + name = str(fn.get("name") or "").strip() + if name: + return EmulatedToolChoice(mode="tool", name=name) + name = str(raw.get("name") or "").strip() + if name: + return EmulatedToolChoice(mode="tool", name=name) + return EmulatedToolChoice(mode="auto") + + +def extract_anthropic_tool_choice(raw: Any) -> EmulatedToolChoice: + if raw is None: + return EmulatedToolChoice(mode="auto") + if not isinstance(raw, dict): + return extract_openai_tool_choice(raw) + type_name = str(raw.get("type") or "").strip() + if type_name in {"", "auto"}: + return EmulatedToolChoice(mode="auto") + if type_name == "none": + return EmulatedToolChoice(mode="none") + if type_name in {"any", "required"}: + return EmulatedToolChoice(mode="any") + if type_name == "tool": + name = str(raw.get("name") or "").strip() + if name: + return EmulatedToolChoice(mode="tool", name=name) + return EmulatedToolChoice(mode="auto") + + +def has_tool_request(tools: list[EmulatedToolDef], choice: EmulatedToolChoice) -> bool: + return bool(tools) or choice.mode not in {"", "auto"} + + +def inject_tooling(system: str, tools: list[EmulatedToolDef], choice: EmulatedToolChoice) -> str: + system = system.strip() + if not tools: + return system + + tool_lines: list[str] = [] + for tool in tools: + signature = _compact_schema(tool.input_schema) + line = f"{tool.name}({signature})" + if tool.description: + line += f" - {_truncate(tool.description, 120)}" + tool_lines.append(line) + + parts = [ + "You are an AI assistant with DIRECT tool access.", + "When a request needs local files, terminal state, browser state, current web data, or another external result, use the proxy tools listed below.", + "Do not claim tools are unavailable.", + "When you need to use a tool, output exactly one or more structured action blocks in this format:", + '```json action\n{"tool":"NAME","parameters":{"key":"value"}}\n```', + "Available tools:", + "\n".join(tool_lines), + "Rules:", + "- Use ```json action``` blocks for tool calls.", + "- If a tool is needed, do not explain first; emit the action block directly.", + "- If no tool is needed, answer normally.", + "- Never say tools are unavailable.", + _force_constraint(choice), + ] + tooling = "\n\n".join(part for part in parts if part) + if not system: + return tooling + return f"{system}\n\n---\n\n{tooling}" + + +def action_output_prompt(tool_call_id: str | None, output: str) -> str: + output = (output or "").strip() + if not output: + return "" + suffix = ( + "Based on the tool result above, answer the user's request directly if you have enough information. " + "Only use another tool call if a specific missing fact still requires it." + ) + if tool_call_id and tool_call_id.strip(): + return f"Tool result for {tool_call_id.strip()}:\n{output}\n\n{suffix}" + return f"Tool result:\n{output}\n\n{suffix}" + + +def parse_action_blocks( + text: str, + tools: list[EmulatedToolDef], + *, + max_scan_bytes: int = 0, + max_tool_calls: int = 8, +) -> tuple[list[EmulatedToolCall], str]: + if not text or not text.strip(): + return [], "" + if max_scan_bytes > 0 and len(text) > max_scan_bytes: + text = text[:max_scan_bytes] + + tool_name_map = {tool.name.lower(): tool.name for tool in tools if tool.name.strip()} + tool_schema_map = {tool.name: tool.input_schema for tool in tools if tool.name.strip()} + + calls: list[EmulatedToolCall] = [] + spans: list[tuple[int, int]] = [] + seen: set[str] = set() + + for match in re.finditer(r"```json(?:\s+action)?\s*(.*?)```", text, flags=re.S | re.I): + raw = (match.group(1) or "").strip() + if not raw: + continue + parsed = _parse_tool_call_json(raw) + if parsed is None: + continue + name, arguments = parsed + normalized = _normalize_tool_name(name, tool_name_map) + schema = tool_schema_map.get(normalized) + if schema: + arguments = _filter_args_by_schema(arguments, schema) + if not _has_required_args(arguments, schema): + continue + key = _tool_call_key(normalized, arguments) + if key in seen: + spans.append(match.span()) + continue + seen.add(key) + calls.append( + EmulatedToolCall( + id=_stable_call_id(normalized, arguments), + name=normalized, + arguments=arguments, + ) + ) + spans.append(match.span()) + if len(calls) >= max_tool_calls: + break + + if not calls: + return [], text.strip() + + clean = text + for start, end in reversed(spans): + clean = clean[:start] + clean[end:] + return calls, clean.strip() + + +def looks_like_refusal(text: str) -> bool: + lowered = (text or "").strip().lower() + if not lowered: + return False + needles = [ + "tools are unavailable", + "cannot call tools", + "can't call tools", + "cannot execute", + "can't execute", + "没有可用的工具", + "工具不可用", + "不能调用工具", + "无法直接执行", + ] + return any(needle in lowered for needle in needles) + + +def looks_like_missed_tool_use(text: str) -> bool: + lowered = (text or "").strip().lower() + if not lowered: + return False + needles = [ + "let me use", + "i need to use", + "i will use", + "i need to run", + "i will run", + "我需要使用", + "让我使用", + "执行命令", + "读取文件", + "查看文件", + "查询天气", + "#tool call", + ] + return any(needle in lowered for needle in needles) + + +def infer_tool_calls_from_text( + text: str, + tools: list[EmulatedToolDef], +) -> list[EmulatedToolCall]: + if not (looks_like_refusal(text) or looks_like_missed_tool_use(text)): + return [] + direct = infer_declared_tool_call_from_text(text, tools) + return [direct] if direct is not None else [] + + +def force_tooling_prompt(choice: EmulatedToolChoice) -> str: + prompt = ( + "Your last response did not include any ```json action``` block. " + "You must respond with at least one valid action block now. " + "Select the single most appropriate available tool for the user request. " + "Do not explain. Do not say tools are unavailable. Output the action block directly." + ) + if choice.mode == "tool" and choice.name.strip(): + prompt += f' You must call "{choice.name.strip()}".' + return prompt + + +def infer_declared_tool_call_from_text( + text: str, + tools: list[EmulatedToolDef], +) -> EmulatedToolCall | None: + for tool in tools: + event = _extract_fenced_json_tool_call_event_from_text( + text, forced_tool_name=tool.name + ) + if event is None: + event = _extract_hash_tool_call_event_from_text(text, forced_tool_name=tool.name) + if event is None: + event = _extract_function_call_event_from_text(text, forced_tool_name=tool.name) + if event is None: + event = _forced_tool_fallback_event(text, forced_tool_name=tool.name, tools=tools) + if event is None: + continue + schema = tool.input_schema + arguments = dict(event.get("input") or {}) + if schema: + arguments = _filter_args_by_schema(arguments, schema) + if not _has_required_args(arguments, schema): + continue + return EmulatedToolCall( + id=_stable_call_id(tool.name, arguments), + name=tool.name, + arguments=arguments, + ) + return None + + +def openai_tool_call_from_emulated(call: EmulatedToolCall) -> dict[str, Any]: + return { + "id": call.id, + "type": "function", + "function": { + "name": call.name, + "arguments": json.dumps(call.arguments, ensure_ascii=False), + }, + } + + +def _extract_hash_tool_call_event_from_text( + text: str, + *, + forced_tool_name: str | None = None, +) -> dict[str, Any] | None: + raw = (text or "").strip() + match = re.search( + r"#Tool Call\s*```([A-Za-z0-9_\-.]+)\s*(\{.*?\})\s*```", + raw, + flags=re.S, + ) + if not match: + return None + name = match.group(1).strip() + if forced_tool_name and name != forced_tool_name: + return None + try: + arguments = json.loads(match.group(2)) + except Exception: + return None + if not isinstance(arguments, dict): + return None + return {"name": name, "input": arguments} + + +def _extract_fenced_json_tool_call_event_from_text( + text: str, + *, + forced_tool_name: str | None = None, +) -> dict[str, Any] | None: + raw = (text or "").strip() + match = re.search(r"```json(?:\s+action)?\s*(\{.*?\})\s*```", raw, flags=re.S | re.I) + if not match: + return None + try: + payload = json.loads(match.group(1)) + except Exception: + return None + if not isinstance(payload, dict): + return None + + name = str(payload.get("tool") or payload.get("name") or "").strip() + fn = payload.get("function") + if not name and isinstance(fn, dict): + name = str(fn.get("name") or "").strip() + if not name: + return None + if forced_tool_name and name != forced_tool_name: + return None + + arguments = payload.get("parameters") + if arguments is None: + arguments = payload.get("arguments") + if arguments is None: + arguments = payload.get("input") + if arguments is None and isinstance(fn, dict): + arguments = fn.get("arguments") + if isinstance(arguments, str): + try: + arguments = json.loads(arguments) + except Exception: + return None + if arguments is None: + arguments = {} + if not isinstance(arguments, dict): + return None + return {"name": name, "input": arguments} + + +def _extract_function_call_event_from_text( + text: str, + *, + forced_tool_name: str | None = None, +) -> dict[str, Any] | None: + raw = (text or "").strip() + match = re.search(r"\s*(\{.*?\})\s*", raw, flags=re.S) + if not match: + return None + try: + payload = json.loads(match.group(1)) + except Exception: + return None + if not isinstance(payload, dict): + return None + name = str(payload.get("name") or "").strip() + if not name: + return None + if forced_tool_name and name != forced_tool_name: + return None + arguments = payload.get("arguments") + if isinstance(arguments, str): + try: + arguments = json.loads(arguments) + except Exception: + return None + if arguments is None: + arguments = {} + if not isinstance(arguments, dict): + return None + return {"name": name, "input": arguments} + + +def _forced_tool_fallback_event( + text: str, + *, + forced_tool_name: str | None, + tools: list[EmulatedToolDef], +) -> dict[str, Any] | None: + if not forced_tool_name: + return None + parsed = _tool_code_object_from_text( + text, + forced_tool_name, + single_arg_name=_tool_code_single_arg_name(tools, forced_tool_name), + ) + if parsed is None: + try: + parsed = json.loads((text or "").strip()) + except Exception: + return None + if not isinstance(parsed, dict): + return None + explicit_name = parsed.get("name") or parsed.get("tool") + if explicit_name is not None and str(explicit_name) != forced_tool_name: + return None + tool_input = parsed.get("input") + if tool_input is None and "arguments" in parsed: + tool_input = parsed.get("arguments") + if isinstance(tool_input, str): + try: + tool_input = json.loads(tool_input) + except Exception: + return None + if tool_input is None: + reserved = {"name", "tool", "function", "arguments", "input", "result"} + tool_input = {k: v for k, v in parsed.items() if k not in reserved} + if not isinstance(tool_input, dict): + return None + return {"name": forced_tool_name, "input": tool_input} + + +def _tool_code_single_arg_name( + tools: list[EmulatedToolDef], forced_tool_name: str +) -> str | None: + for tool in tools: + if tool.name != forced_tool_name: + continue + properties = tool.input_schema.get("properties") + if not isinstance(properties, dict) or len(properties) != 1: + return None + only_name = next(iter(properties.keys()), None) + return only_name if isinstance(only_name, str) and only_name.strip() else None + return None + + +def _tool_code_object_from_text( + text: str, + forced_tool_name: str, + *, + single_arg_name: str | None = None, +) -> dict[str, Any] | None: + raw = (text or "").strip() + if not raw.startswith("```") or not raw.endswith("```"): + return None + lines = raw.splitlines() + if len(lines) < 2: + return None + fence = lines[0].strip().lower() + language = fence[3:].strip() + if language and language not in {"tool_code", "python", "py"}: + return None + body = "\n".join(lines[1:-1]).strip() + call_match = re.fullmatch(rf"{re.escape(forced_tool_name)}\((.*)\)", body, flags=re.S) + if not call_match: + return None + arguments_text = call_match.group(1).strip() + if not arguments_text: + return {"arguments": {}} + if single_arg_name and not re.search(r"\w+\s*=", arguments_text): + try: + value = json.loads(arguments_text) + except Exception: + value = arguments_text.strip('"\'') + return {"arguments": {single_arg_name: value}} + arguments: dict[str, Any] = {} + for part in [p.strip() for p in arguments_text.split(",") if p.strip()]: + if "=" not in part: + return None + key, value_text = part.split("=", 1) + key = key.strip() + value_text = value_text.strip() + try: + value = json.loads(value_text) + except Exception: + value = value_text.strip('"\'') + arguments[key] = value + return {"arguments": arguments} + + +def _parse_tool_call_json(raw: str) -> tuple[str, dict[str, Any]] | None: + try: + obj = json.loads(_normalize_json(raw)) + except Exception: + return None + if not isinstance(obj, dict): + return None + name = str(obj.get("tool") or obj.get("name") or "").strip() + fn = obj.get("function") + if not name and isinstance(fn, dict): + name = str(fn.get("name") or "").strip() + if not name: + return None + arguments = obj.get("parameters") + if arguments is None: + arguments = obj.get("arguments") + if arguments is None: + arguments = obj.get("input") + if arguments is None and isinstance(fn, dict): + arguments = fn.get("arguments") + if isinstance(arguments, str): + try: + arguments = json.loads(arguments) + except Exception: + arguments = {} + if arguments is None: + arguments = {k: v for k, v in obj.items() if k not in {"tool", "name"}} + if not isinstance(arguments, dict): + return None + return name, arguments + + +def _normalize_tool_name(raw: str, available: dict[str, str]) -> str: + name = raw.strip() + if not name: + return "" + exact = available.get(name.lower()) + if exact: + return exact + key = name.lower().replace("-", "_").replace(" ", "_") + aliases = { + "bash": "terminal", + "shell": "terminal", + "read": "read_file", + "grep": "search_files", + "glob": "search_files", + "edit": "patch", + "write": "write_file", + } + mapped = aliases.get(key) + if mapped and mapped in available: + return available[mapped] + return name + + +def _filter_args_by_schema(args: dict[str, Any], schema: dict[str, Any]) -> dict[str, Any]: + properties = schema.get("properties") + if not isinstance(properties, dict) or not properties: + return args + return {k: v for k, v in args.items() if k in properties} + + +def _has_required_args(args: dict[str, Any], schema: dict[str, Any]) -> bool: + required = schema.get("required") + if not isinstance(required, list): + return True + for key in required: + if not isinstance(key, str): + continue + if key not in args: + return False + value = args.get(key) + if isinstance(value, str) and not value.strip(): + return False + return True + + +def _compact_schema(schema: dict[str, Any]) -> str: + properties = schema.get("properties") + if not isinstance(properties, dict) or not properties: + return "" + required = {item for item in schema.get("required", []) if isinstance(item, str)} + parts: list[str] = [] + for key in sorted(properties.keys()): + parts.append(key if key in required else f"{key}?") + return ", ".join(parts) + + +def _truncate(text: str, max_len: int) -> str: + text = text.strip() + if len(text) <= max_len: + return text + return text[:max_len] + "..." + + +def _force_constraint(choice: EmulatedToolChoice) -> str: + if choice.mode == "any": + return "- You must output at least one ```json action``` block in this reply." + if choice.mode == "tool" and choice.name.strip(): + return f'- You must call "{choice.name.strip()}" in this reply.' + return "" + + +def _normalize_json(text: str) -> str: + return ( + text.strip() + .replace("“", '"') + .replace("”", '"') + .replace(",\n}", "\n}") + .replace(",\n]", "\n]") + ) + + +def _tool_call_key(name: str, arguments: dict[str, Any]) -> str: + return f"{name.lower()}\0{json.dumps(arguments, ensure_ascii=False, sort_keys=True)}" + + +def _stable_call_id(name: str, arguments: dict[str, Any]) -> str: + key = _tool_call_key(name, arguments) + return "call_" + uuid.uuid5(uuid.NAMESPACE_OID, key).hex[:16] diff --git a/app/lingma_client.py b/app/lingma_client.py index e50631d..1e747ff 100644 --- a/app/lingma_client.py +++ b/app/lingma_client.py @@ -419,6 +419,17 @@ class LspWsRpcClient: method = msg.get("method") params = msg.get("params") or {} + if method and ( + method.startswith("tool/") + or method.startswith("mcp/") + or method in {"chat/answer", "chat/finish"} + ): + logger.info( + "lingma server message method=%s params=%s", + method, + params, + ) + if method == "chat/answer": req_id = params.get("requestId") stream = self._chat_streams.get(req_id) diff --git a/app/main.py b/app/main.py index 75fa904..9476543 100644 --- a/app/main.py +++ b/app/main.py @@ -36,6 +36,20 @@ from .http.execution_core import ( release_execution, start_execution, ) +from .http.tool_emulation import ( + action_output_prompt, + extract_anthropic_tool_choice as _em_extract_anthropic_tool_choice, + extract_anthropic_tools as _em_extract_anthropic_tools, + extract_openai_tool_choice as _em_extract_openai_tool_choice, + extract_openai_tools as _em_extract_openai_tools, + force_tooling_prompt, + has_tool_request as _em_has_tool_request, + infer_declared_tool_call_from_text, + infer_tool_calls_from_text, + inject_tooling, + openai_tool_call_from_emulated, + parse_action_blocks, +) from .http.openai_responses import handle_responses from .http.tool_bridge import ( _allowed_stream_tool_event, @@ -44,8 +58,10 @@ from .http.tool_bridge import ( _anthropic_tool_result_block, _anthropic_tool_use_block, _extract_function_call_event_from_text, + _extract_hash_tool_call_event_from_text, _extract_tool_calls_from_text, _forced_tool_fallback_event, + _infer_tool_event_from_declared_tools, _json_string, _openai_forced_tool_name, _openai_tool_call, @@ -417,6 +433,82 @@ def _messages_to_prompt(messages: list[dict]) -> str: return "\n".join(parts).strip() +def _messages_to_emulation_prompt( + messages: list[dict[str, Any]], + *, + system_text: str, + tools: list[dict[str, Any]] | None, + tool_choice: Any, +) -> str: + filtered: list[tuple[str, str]] = [] + for message in messages: + role = str(message.get("role") or "").strip().lower() + if role in {"system", "developer"}: + continue + text = flatten_content(message.get("content")) + if role == "tool": + text = action_output_prompt(message.get("tool_call_id"), text) + role = "user" + if not text: + continue + if role not in {"user", "assistant"}: + continue + filtered.append((role, text)) + + if not filtered: + return system_text.strip() + + em_tools = _em_extract_openai_tools(tools) + em_choice = _em_extract_openai_tool_choice(tool_choice) + injected_system = inject_tooling(system_text, em_tools, em_choice) + + parts: list[str] = [] + for role, text in filtered: + label = "User" if role == "user" else "Assistant" + parts.append(f"{label}: {text}") + if injected_system: + parts.append(injected_system) + parts.append("Assistant:") + return "\n\n".join(parts).strip() + + +def _anthropic_messages_to_emulation_prompt( + messages: list[dict[str, Any]], + *, + system_text: str, + tools: list[dict[str, Any]] | None, + tool_choice: Any, +) -> str: + filtered: list[tuple[str, str]] = [] + for message in messages: + role = str(message.get("role") or "").strip().lower() + text = str(message.get("content") or "").strip() + if role == "tool": + text = action_output_prompt(message.get("tool_call_id"), text) + role = "user" + if not text: + continue + if role not in {"user", "assistant"}: + continue + filtered.append((role, text)) + + if not filtered: + return system_text.strip() + + em_tools = _em_extract_anthropic_tools(tools) + em_choice = _em_extract_anthropic_tool_choice(tool_choice) + injected_system = inject_tooling(system_text, em_tools, em_choice) + + parts: list[str] = [] + for role, text in filtered: + label = "User" if role == "user" else "Assistant" + parts.append(f"{label}: {text}") + if injected_system: + parts.append(injected_system) + parts.append("Assistant:") + return "\n\n".join(parts).strip() + + def _include_usage(stream_options: dict | None) -> bool: if not isinstance(stream_options, dict): return False @@ -525,6 +617,20 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request): is_reply = execution.is_reply include_usage = _include_usage(req.stream_options) + em_tools = _em_extract_openai_tools(req.tools) + em_choice = _em_extract_openai_tool_choice(req.tool_choice) + if _em_has_tool_request(em_tools, em_choice): + system_parts = [ + flatten_content(m.content) + for m in req.messages + if m.role in {"system", "developer"} and flatten_content(m.content) + ] + prompt = _messages_to_emulation_prompt( + messages_dump, + system_text="\n\n".join(system_parts), + tools=req.tools, + tool_choice=req.tool_choice, + ) try: started = await start_execution( @@ -709,6 +815,11 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request): merged_text, forced_tool_name=forced_tool_name, ) + if inferred is None: + inferred = _extract_hash_tool_call_event_from_text( + merged_text, + forced_tool_name=forced_tool_name, + ) if inferred is None: inferred = _forced_tool_fallback_event( merged_text, @@ -747,6 +858,72 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request): if buffered_text_parts and forced_tool_name and saw_tool_call: buffered_text_parts.clear() + if buffered_text_parts and req.tools and not saw_tool_call: + merged_text = "".join(buffered_text_parts) + inferred = _infer_tool_event_from_declared_tools( + merged_text, + tools=req.tools, + ) + if inferred is not None: + tool_id = "call_inferred_0" + tool_call_indexes[tool_id] = 0 + saw_tool_call = True + payload = { + "id": completion_id, + "object": "chat.completion.chunk", + "created": created, + "model": model, + "choices": [ + { + "index": 0, + "delta": { + "tool_calls": [ + { + "index": 0, + **_openai_tool_call( + inferred, forced_id=tool_id + ), + } + ] + }, + "finish_reason": None, + } + ], + } + buffered_text_parts.clear() + yield f"data: {json.dumps(payload, ensure_ascii=False)}\n\n" + + if buffered_text_parts and req.tools and not saw_tool_call: + merged_text = "".join(buffered_text_parts) + parsed_calls, remaining = parse_action_blocks(merged_text, em_tools) + if parsed_calls: + saw_tool_call = True + for i, call in enumerate(parsed_calls): + tool_id = call.id or f"call_inferred_{i}" + tool_call_indexes[tool_id] = i + payload = { + "id": completion_id, + "object": "chat.completion.chunk", + "created": created, + "model": model, + "choices": [ + { + "index": 0, + "delta": { + "tool_calls": [ + { + "index": i, + **openai_tool_call_from_emulated(call), + } + ] + }, + "finish_reason": None, + } + ], + } + yield f"data: {json.dumps(payload, ensure_ascii=False)}\n\n" + buffered_text_parts = [remaining] if remaining else [] + done_payload = { "id": completion_id, "object": "chat.completion.chunk", @@ -866,6 +1043,11 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request): message_content, forced_tool_name=forced_tool_name, ) + if inferred is None: + inferred = _extract_hash_tool_call_event_from_text( + message_content, + forced_tool_name=forced_tool_name, + ) if inferred is None: inferred = _forced_tool_fallback_event( message_content, @@ -878,6 +1060,59 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request): ) saw_tool_call = True message_content = "" + if not saw_tool_call and req.tools: + inferred = _infer_tool_event_from_declared_tools( + message_content, + tools=req.tools, + ) + if inferred is not None: + tool_calls.append( + _openai_tool_call(inferred, forced_id="call_inferred_0") + ) + saw_tool_call = True + message_content = "" + if not saw_tool_call and em_tools: + parsed_calls, remaining = parse_action_blocks(message_content, em_tools) + if parsed_calls: + for call in parsed_calls: + tool_calls.append(openai_tool_call_from_emulated(call)) + saw_tool_call = True + message_content = remaining + if not saw_tool_call and em_tools: + inferred_call = infer_declared_tool_call_from_text(message_content, em_tools) + if inferred_call is None: + inferred_calls = infer_tool_calls_from_text(message_content, em_tools) + inferred_call = inferred_calls[0] if inferred_calls else None + if inferred_call is not None: + tool_calls.append(openai_tool_call_from_emulated(inferred_call)) + saw_tool_call = True + message_content = "" + if not saw_tool_call and em_tools: + retry_prompt = f"{prompt}\n\n{force_tooling_prompt(em_choice)}" + retry_result = await inst.client.chat_complete( + retry_prompt, + model, + ask_mode, + session_id=None, + is_reply=False, + tool_config=tool_config, + ) + retry_text = retry_result.get("text") or "" + parsed_calls, remaining = parse_action_blocks(retry_text, em_tools) + if parsed_calls: + for call in parsed_calls: + tool_calls.append(openai_tool_call_from_emulated(call)) + saw_tool_call = True + message_content = remaining + else: + inferred_call = infer_declared_tool_call_from_text(retry_text, em_tools) + if inferred_call is None: + inferred_calls = infer_tool_calls_from_text(retry_text, em_tools) + inferred_call = inferred_calls[0] if inferred_calls else None + if inferred_call is not None: + tool_calls.append(openai_tool_call_from_emulated(inferred_call)) + saw_tool_call = True + message_content = "" response = ChatCompletionResponse( id=f"chatcmpl-{uuid.uuid4().hex}", created=int(time.time()), @@ -1049,6 +1284,16 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request): model = execution.model prompt = execution.prompt is_reply = execution.is_reply + em_anthropic_tools = _em_extract_anthropic_tools(req.tools) + em_anthropic_choice = _em_extract_anthropic_tool_choice(req.tool_choice) + if _em_has_tool_request(em_anthropic_tools, em_anthropic_choice): + system_text = flatten_anthropic_content(req.system) if req.system else "" + prompt = _anthropic_messages_to_emulation_prompt( + messages_dump, + system_text=system_text, + tools=req.tools, + tool_choice=req.tool_choice, + ) try: started = await start_execution( @@ -1090,12 +1335,14 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request): stream_meta: dict = {} max_tokens = req.max_tokens forced_tool_name = _anthropic_forced_tool_name(req.tool_choice) + aggregate_emulated_tools = bool(em_anthropic_tools) async def event_stream(_ticket=ticket, _inst=inst, _meta=stream_meta): success = False block_index = 0 text_block_open = False saw_pending_tool_use = False + buffered_text_parts: list[str] = [] try: # 1) message_start — Anthropic SDKs read this first to get # the message envelope (id/model/initial usage). @@ -1196,7 +1443,78 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request): text = _stream_text(chunk) if not text: continue - completion_tokens_holder["n"] += estimate_tokens(text) + if aggregate_emulated_tools: + buffered_text_parts.append(text) + completion_tokens_holder["n"] += estimate_tokens(text) + continue + + buffered_text_parts.append(text) + merged_text = "".join(buffered_text_parts) + + parsed_calls, remaining = parse_action_blocks( + merged_text, em_anthropic_tools + ) + if not parsed_calls: + inferred = infer_declared_tool_call_from_text( + merged_text, + em_anthropic_tools, + ) + if inferred is None: + inferred_calls = infer_tool_calls_from_text( + merged_text, + em_anthropic_tools, + ) + inferred = inferred_calls[0] if inferred_calls else None + if inferred is not None: + parsed_calls = [inferred] + remaining = "" + + if parsed_calls: + if text_block_open: + yield _sse( + "content_block_stop", + {"type": "content_block_stop", "index": block_index}, + ) + block_index += 1 + text_block_open = False + saw_pending_tool_use = True + for call in parsed_calls: + yield _sse( + "content_block_start", + { + "type": "content_block_start", + "index": block_index, + "content_block": { + "type": "tool_use", + "id": call.id, + "name": call.name, + "input": {}, + }, + }, + ) + yield _sse( + "content_block_delta", + { + "type": "content_block_delta", + "index": block_index, + "delta": { + "type": "input_json_delta", + "partial_json": json.dumps(call.arguments, ensure_ascii=False), + }, + }, + ) + yield _sse( + "content_block_stop", + {"type": "content_block_stop", "index": block_index}, + ) + block_index += 1 + buffered_text_parts = [remaining] if remaining else [] + if not buffered_text_parts: + continue + + text_to_emit = "".join(buffered_text_parts) + buffered_text_parts.clear() + completion_tokens_holder["n"] += estimate_tokens(text_to_emit) if not text_block_open: yield _sse( "content_block_start", @@ -1213,10 +1531,106 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request): { "type": "content_block_delta", "index": block_index, - "delta": {"type": "text_delta", "text": text}, + "delta": {"type": "text_delta", "text": text_to_emit}, }, ) + if aggregate_emulated_tools: + merged_text = "".join(buffered_text_parts) + parsed_calls, remaining = parse_action_blocks( + merged_text, em_anthropic_tools + ) + if not parsed_calls: + inferred = infer_declared_tool_call_from_text( + merged_text, + em_anthropic_tools, + ) + if inferred is None: + inferred_calls = infer_tool_calls_from_text( + merged_text, + em_anthropic_tools, + ) + inferred = inferred_calls[0] if inferred_calls else None + if inferred is not None: + parsed_calls = [inferred] + remaining = "" + + if parsed_calls: + if remaining.strip(): + yield _sse( + "content_block_start", + { + "type": "content_block_start", + "index": block_index, + "content_block": {"type": "text", "text": ""}, + }, + ) + yield _sse( + "content_block_delta", + { + "type": "content_block_delta", + "index": block_index, + "delta": {"type": "text_delta", "text": remaining}, + }, + ) + yield _sse( + "content_block_stop", + {"type": "content_block_stop", "index": block_index}, + ) + block_index += 1 + for call in parsed_calls: + saw_pending_tool_use = True + yield _sse( + "content_block_start", + { + "type": "content_block_start", + "index": block_index, + "content_block": { + "type": "tool_use", + "id": call.id, + "name": call.name, + "input": {}, + }, + }, + ) + yield _sse( + "content_block_delta", + { + "type": "content_block_delta", + "index": block_index, + "delta": { + "type": "input_json_delta", + "partial_json": json.dumps(call.arguments, ensure_ascii=False), + }, + }, + ) + yield _sse( + "content_block_stop", + {"type": "content_block_stop", "index": block_index}, + ) + block_index += 1 + elif merged_text.strip(): + yield _sse( + "content_block_start", + { + "type": "content_block_start", + "index": block_index, + "content_block": {"type": "text", "text": ""}, + }, + ) + yield _sse( + "content_block_delta", + { + "type": "content_block_delta", + "index": block_index, + "delta": {"type": "text_delta", "text": merged_text}, + }, + ) + yield _sse( + "content_block_stop", + {"type": "content_block_stop", "index": block_index}, + ) + if text_block_open: yield _sse( "content_block_stop", @@ -1323,6 +1737,135 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request): else: saw_pending_tool_use = True + if not saw_tool_event and em_anthropic_tools: + parsed_calls, remaining = parse_action_blocks(text, em_anthropic_tools) + if parsed_calls: + content_blocks = [] + if remaining: + content_blocks.append({"type": "text", "text": remaining}) + for call in parsed_calls: + content_blocks.append( + { + "type": "tool_use", + "id": call.id, + "name": call.name, + "input": call.arguments, + } + ) + saw_tool_event = True + saw_pending_tool_use = True + text = remaining + + if not saw_tool_event and em_anthropic_tools: + inferred_call = infer_declared_tool_call_from_text(text, em_anthropic_tools) + if inferred_call is None: + inferred_calls = infer_tool_calls_from_text(text, em_anthropic_tools) + inferred_call = inferred_calls[0] if inferred_calls else None + if inferred_call is not None: + content_blocks = [ + { + "type": "tool_use", + "id": inferred_call.id, + "name": inferred_call.name, + "input": inferred_call.arguments, + } + ] + saw_tool_event = True + saw_pending_tool_use = True + text = "" + + if not saw_tool_event and em_anthropic_tools: + retry_prompt = f"{prompt}\n\n{force_tooling_prompt(em_anthropic_choice)}" + retry_result = await inst.client.chat_complete( + retry_prompt, + model, + ask_mode, + session_id=None, + is_reply=False, + tool_config=tool_config, + ) + retry_text = retry_result.get("text") or "" + parsed_calls, remaining = parse_action_blocks(retry_text, em_anthropic_tools) + if parsed_calls: + content_blocks = [] + if remaining: + content_blocks.append({"type": "text", "text": remaining}) + for call in parsed_calls: + content_blocks.append( + { + "type": "tool_use", + "id": call.id, + "name": call.name, + "input": call.arguments, + } + ) + saw_tool_event = True + saw_pending_tool_use = True + text = remaining + else: + inferred_call = infer_declared_tool_call_from_text(retry_text, em_anthropic_tools) + if inferred_call is None: + inferred_calls = infer_tool_calls_from_text(retry_text, em_anthropic_tools) + inferred_call = inferred_calls[0] if inferred_calls else None + if inferred_call is not None: + content_blocks = [ + { + "type": "tool_use", + "id": inferred_call.id, + "name": inferred_call.name, + "input": inferred_call.arguments, + } + ] + saw_tool_event = True + saw_pending_tool_use = True + text = "" + + if not saw_tool_event and em_anthropic_tools and text.strip(): + retry_prompt = f"{prompt}\n\n{force_tooling_prompt(em_anthropic_choice)}" + retry_result = await inst.client.chat_complete( + retry_prompt, + model, + ask_mode, + session_id=None, + is_reply=False, + tool_config=tool_config, + ) + retry_text = retry_result.get("text") or "" + parsed_calls, remaining = parse_action_blocks(retry_text, em_anthropic_tools) + if parsed_calls: + content_blocks = [] + if remaining: + content_blocks.append({"type": "text", "text": remaining}) + for call in parsed_calls: + content_blocks.append( + { + "type": "tool_use", + "id": call.id, + "name": call.name, + "input": call.arguments, + } + ) + saw_tool_event = True + saw_pending_tool_use = True + text = remaining + else: + inferred_call = infer_declared_tool_call_from_text(retry_text, em_anthropic_tools) + if inferred_call is None: + inferred_calls = infer_tool_calls_from_text(retry_text, em_anthropic_tools) + inferred_call = inferred_calls[0] if inferred_calls else None + if inferred_call is not None: + content_blocks = [ + { + "type": "tool_use", + "id": inferred_call.id, + "name": inferred_call.name, + "input": inferred_call.arguments, + } + ] + saw_tool_event = True + saw_pending_tool_use = True + text = "" + if not saw_tool_event and forced_tool_name: inferred = _extract_function_call_event_from_text( text, diff --git a/requirements.txt b/requirements.txt index c141b14..e2c9c76 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,4 @@ uvicorn[standard]==0.30.6 websockets==13.1 pydantic==2.9.2 playwright==1.52.0 +mcp==1.12.4 diff --git a/scripts/smoke_tool_calls.sh b/scripts/smoke_tool_calls.sh new file mode 100644 index 0000000..8d6e498 --- /dev/null +++ b/scripts/smoke_tool_calls.sh @@ -0,0 +1,117 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)" +ENV_FILE="$ROOT_DIR/.env" + +if [[ ! -f "$ENV_FILE" ]]; then + printf 'missing .env: %s\n' "$ENV_FILE" >&2 + exit 1 +fi + +PORT="$(python3 - <<'PY' +from pathlib import Path +env = Path("/root/lingma-openai-gateway/.env") +vals = {} +for line in env.read_text().splitlines(): + line = line.strip() + if not line or line.startswith('#') or '=' not in line: + continue + k, v = line.split('=', 1) + vals[k.strip()] = v.strip() +print(vals.get('PORT', '13013')) +PY +)" + +API_KEY="$(python3 - <<'PY' +from pathlib import Path +env = Path("/root/lingma-openai-gateway/.env") +vals = {} +for line in env.read_text().splitlines(): + line = line.strip() + if not line or line.startswith('#') or '=' not in line: + continue + k, v = line.split('=', 1) + vals[k.strip()] = v.strip() +keys = vals.get('API_KEYS', '') +print(keys.split(',')[0].strip()) +PY +)" + +BASE_URL="http://127.0.0.1:${PORT}" + +printf '\n[1/5] /v1/models\n' +curl -fsS "$BASE_URL/v1/models" \ + -H "Authorization: Bearer ${API_KEY}" | python3 -m json.tool + +printf '\n[2/5] OpenAI non-stream tool call\n' +curl -fsS "$BASE_URL/v1/chat/completions" \ + -H "Authorization: Bearer ${API_KEY}" \ + -H 'Content-Type: application/json' \ + -d '{ + "model": "org_auto", + "stream": false, + "messages": [ + {"role": "system", "content": "Use tools when available."}, + {"role": "user", "content": "Use fetch_weather for Hangzhou and return the tool call."} + ], + "tools": [ + {"type": "function", "function": {"name": "fetch_weather", "description": "Get weather for a city", "parameters": {"type": "object", "properties": {"city": {"type": "string"}}, "required": ["city"]}}} + ], + "tool_choice": {"type": "function", "function": {"name": "fetch_weather"}} + }' | python3 -m json.tool + +printf '\n[3/5] Anthropic non-stream tool use\n' +curl -fsS "$BASE_URL/v1/messages" \ + -H "x-api-key: ${API_KEY}" \ + -H 'anthropic-version: 2023-06-01' \ + -H 'Content-Type: application/json' \ + -d '{ + "model": "claude-3-5-sonnet-20241022", + "max_tokens": 256, + "stream": false, + "messages": [ + {"role": "user", "content": "Use fetch_weather for Hangzhou and return the tool call."} + ], + "tools": [ + {"name": "fetch_weather", "description": "Get weather for a city", "input_schema": {"type": "object", "properties": {"city": {"type": "string"}}, "required": ["city"]}} + ], + "tool_choice": {"type": "tool", "name": "fetch_weather"} + }' | python3 -m json.tool + +printf '\n[4/5] OpenAI stream tool call\n' +curl -fsS -N "$BASE_URL/v1/chat/completions" \ + -H "Authorization: Bearer ${API_KEY}" \ + -H 'Content-Type: application/json' \ + -d '{ + "model": "org_auto", + "stream": true, + "messages": [ + {"role": "system", "content": "Use tools when available."}, + {"role": "user", "content": "Use fetch_weather for Hangzhou and return the tool call."} + ], + "tools": [ + {"type": "function", "function": {"name": "fetch_weather", "description": "Get weather for a city", "parameters": {"type": "object", "properties": {"city": {"type": "string"}}, "required": ["city"]}}} + ], + "tool_choice": {"type": "function", "function": {"name": "fetch_weather"}} + }' + +printf '\n[5/5] Anthropic stream tool use\n' +curl -fsS -N "$BASE_URL/v1/messages" \ + -H "x-api-key: ${API_KEY}" \ + -H 'anthropic-version: 2023-06-01' \ + -H 'Content-Type: application/json' \ + -d '{ + "model": "claude-3-5-sonnet-20241022", + "max_tokens": 256, + "stream": true, + "messages": [ + {"role": "user", "content": "Use fetch_weather for Hangzhou and return the tool call."} + ], + "tools": [ + {"name": "fetch_weather", "description": "Get weather for a city", "input_schema": {"type": "object", "properties": {"city": {"type": "string"}}, "required": ["city"]}} + ], + "tool_choice": {"type": "tool", "name": "fetch_weather"} + }' + +printf '\nsmoke tool-call checks completed\n' diff --git a/tests/TEST_PLAN.md b/tests/TEST_PLAN.md index 2a5d8a4..e7d790e 100644 --- a/tests/TEST_PLAN.md +++ b/tests/TEST_PLAN.md @@ -42,6 +42,7 @@ 1. 定点执行新增测试文件。 2. 全量执行 `tests/` 下 `test_*.py`。 3. 汇总通过率与失败项(若失败,给出定位与修复建议)。 +4. Docker 运行态执行 `bash scripts/smoke_tool_calls.sh`,验证 OpenAI / Anthropic 的 stream / non-stream 工具调用。 ## 6. 执行命令 ```bash @@ -50,4 +51,5 @@ python3 -m unittest tests/test_session_cache_tooling.py python3 -m unittest tests/test_schema_normalization.py python3 -m unittest tests/test_tool_call_bridge.py python3 -m unittest discover -s tests -p "test_*.py" +bash scripts/smoke_tool_calls.sh ``` diff --git a/tests/test_pool_stats_config.py b/tests/test_pool_stats_config.py index 2544bf6..4f7ad48 100644 --- a/tests/test_pool_stats_config.py +++ b/tests/test_pool_stats_config.py @@ -3,10 +3,12 @@ from __future__ import annotations import json import os import sys +import tempfile import types import unittest from types import SimpleNamespace from unittest.mock import patch +import zipfile # app.lingma_pool imports auto_login; tests here don't execute Playwright paths. # Stub module import so test environments without playwright can import pool code. @@ -28,6 +30,7 @@ sys.modules.setdefault("playwright", _playwright) sys.modules.setdefault("playwright.async_api", _playwright_async) from app.config import _parse_accounts, load_settings +from app.bootstrap_lingma import bootstrap_from_vsix from app.lingma_pool import LingmaPool from app.stats import StatsCollector, estimate_tokens @@ -212,5 +215,57 @@ class ConfigParsingTests(unittest.TestCase): self.assertEqual(settings.tool_allowlist, []) +class BootstrapLingmaTests(unittest.TestCase): + def _make_test_vsix(self, root: str) -> str: + nested_zip_path = os.path.join(root, "nested.zip") + with zipfile.ZipFile(nested_zip_path, "w") as nested: + nested.writestr("2.5.20/x86_64_linux/Lingma", b"new-binary") + nested.writestr("2.5.20/extension/main.js", b"console.log('ok')") + + vsix_path = os.path.join(root, "test.vsix") + with zipfile.ZipFile(vsix_path, "w") as vsix: + with open(nested_zip_path, "rb") as nested_file: + vsix.writestr( + "extension/dist/bin/lingma-2.5.20.zip", + nested_file.read(), + ) + return vsix_path + + def test_bootstrap_refreshes_when_extension_assets_missing(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + bin_dir = os.path.join(tmpdir, "data", "bin") + release_dir = os.path.join(bin_dir, "2.5.20") + os.makedirs(release_dir, exist_ok=True) + + lingma_bin = os.path.join(bin_dir, "Lingma") + with open(lingma_bin, "wb") as f: + f.write(b"old-binary") + + marker = { + "version": "2.5.20", + "release_root": "2.5.20", + } + with open(os.path.join(bin_dir, ".lingma-bootstrap.json"), "w", encoding="utf-8") as f: + json.dump(marker, f) + + vsix_path = self._make_test_vsix(tmpdir) + + env = { + "LINGMA_BIN": lingma_bin, + "LINGMA_SOURCE_TYPE": "vsix", + "LINGMA_VSIX_URL": f"file://{vsix_path}", + "LINGMA_BOOTSTRAP_ALWAYS": "false", + "LINGMA_FORCE_REFRESH": "false", + } + with patch.dict(os.environ, env, clear=False): + bootstrap_from_vsix() + + with open(lingma_bin, "rb") as f: + self.assertEqual(f.read(), b"new-binary") + self.assertTrue( + os.path.exists(os.path.join(release_dir, "extension", "main.js")) + ) + + if __name__ == "__main__": unittest.main() diff --git a/tests/test_tool_call_bridge.py b/tests/test_tool_call_bridge.py index 84c6421..a1c2a94 100644 --- a/tests/test_tool_call_bridge.py +++ b/tests/test_tool_call_bridge.py @@ -388,6 +388,169 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase): {"query": "gateway"}, ) + async def test_openai_non_stream_synthesizes_tool_call_from_hash_tool_call_block( + self, + ) -> None: + fake_client = _FakeClient( + stream_events=[], + complete_result={ + "text": '#Tool Call\n```fetch_weather\n{"city": "Hangzhou"}\n```\n', + "toolEvents": [], + "sessionId": "sess-fallback-hash-tool-call-openai", + }, + ) + req = ChatCompletionsRequest( + model="org_auto", + messages=[{"role": "user", "content": "hi"}], + stream=False, + tools=[ + { + "type": "function", + "function": { + "name": "fetch_weather", + "parameters": { + "type": "object", + "properties": {"city": {"type": "string"}}, + "required": ["city"], + }, + }, + } + ], + tool_choice={"type": "function", "function": {"name": "fetch_weather"}}, + ) + + with ( + patch.object(main, "pool", _FakePool(_FakeInstance(fake_client))), + patch.object(main, "chat_guard", _FakeGuard()), + patch.object( + main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"}) + ), + patch.object( + main.stats_collector, "record_chat", AsyncMock(return_value=None) + ), + ): + response = await main.v1_chat_completions( + req, _make_request("/v1/chat/completions") + ) + + payload = json.loads(response.body) + message = payload["choices"][0]["message"] + self.assertEqual(payload["choices"][0]["finish_reason"], "tool_calls") + self.assertEqual(message["content"], "") + self.assertEqual(message["tool_calls"][0]["function"]["name"], "fetch_weather") + self.assertEqual( + json.loads(message["tool_calls"][0]["function"]["arguments"]), + {"city": "Hangzhou"}, + ) + + async def test_openai_non_stream_synthesizes_tool_call_from_hash_tool_call_block_without_tool_choice( + self, + ) -> None: + fake_client = _FakeClient( + stream_events=[], + complete_result={ + "text": '#Tool Call\n```fetch_weather\n{"city": "Hangzhou"}\n```\n', + "toolEvents": [], + "sessionId": "sess-fallback-hash-tool-call-openai-no-choice", + }, + ) + req = ChatCompletionsRequest( + model="org_auto", + messages=[{"role": "user", "content": "hi"}], + stream=False, + tools=[ + { + "type": "function", + "function": { + "name": "fetch_weather", + "parameters": { + "type": "object", + "properties": {"city": {"type": "string"}}, + "required": ["city"], + }, + }, + } + ], + ) + + with ( + patch.object(main, "pool", _FakePool(_FakeInstance(fake_client))), + patch.object(main, "chat_guard", _FakeGuard()), + patch.object( + main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"}) + ), + patch.object( + main.stats_collector, "record_chat", AsyncMock(return_value=None) + ), + ): + response = await main.v1_chat_completions( + req, _make_request("/v1/chat/completions") + ) + + payload = json.loads(response.body) + message = payload["choices"][0]["message"] + self.assertEqual(payload["choices"][0]["finish_reason"], "tool_calls") + self.assertEqual(message["content"], "") + self.assertEqual(message["tool_calls"][0]["function"]["name"], "fetch_weather") + self.assertEqual( + json.loads(message["tool_calls"][0]["function"]["arguments"]), + {"city": "Hangzhou"}, + ) + + async def test_openai_non_stream_synthesizes_tool_call_from_json_action_block( + self, + ) -> None: + fake_client = _FakeClient( + stream_events=[], + complete_result={ + "text": '```json action\n{"tool":"fetch_weather","parameters":{"city":"Hangzhou"}}\n```', + "toolEvents": [], + "sessionId": "sess-action-block-openai", + }, + ) + req = ChatCompletionsRequest( + model="org_auto", + messages=[{"role": "user", "content": "hi"}], + stream=False, + tools=[ + { + "type": "function", + "function": { + "name": "fetch_weather", + "parameters": { + "type": "object", + "properties": {"city": {"type": "string"}}, + "required": ["city"], + }, + }, + } + ], + ) + + with ( + patch.object(main, "pool", _FakePool(_FakeInstance(fake_client))), + patch.object(main, "chat_guard", _FakeGuard()), + patch.object( + main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"}) + ), + patch.object( + main.stats_collector, "record_chat", AsyncMock(return_value=None) + ), + ): + response = await main.v1_chat_completions( + req, _make_request("/v1/chat/completions") + ) + + payload = json.loads(response.body) + message = payload["choices"][0]["message"] + self.assertEqual(payload["choices"][0]["finish_reason"], "tool_calls") + self.assertEqual(message["content"], "") + self.assertEqual(message["tool_calls"][0]["function"]["name"], "fetch_weather") + self.assertEqual( + json.loads(message["tool_calls"][0]["function"]["arguments"]), + {"city": "Hangzhou"}, + ) + async def test_openai_stream_synthesizes_tool_call_from_tool_code( self, ) -> None: @@ -439,6 +602,55 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase): self.assertIn('"finish_reason": "tool_calls"', body) self.assertIn("data: [DONE]", body) + async def test_openai_stream_synthesizes_tool_call_from_hash_tool_call_block_without_tool_choice( + self, + ) -> None: + fake_client = _FakeClient( + stream_events=[ + {"type": "text", "text": "#Tool Call\n```fetch_weather\n"}, + {"type": "text", "text": '{"city": "Hangzhou"}\n'}, + {"type": "text", "text": "```\n"}, + ], + complete_result={}, + ) + req = ChatCompletionsRequest( + model="org_auto", + messages=[{"role": "user", "content": "hi"}], + stream=True, + tools=[ + { + "type": "function", + "function": { + "name": "fetch_weather", + "parameters": { + "type": "object", + "properties": {"city": {"type": "string"}}, + "required": ["city"], + }, + }, + } + ], + ) + + with ( + patch.object(main, "pool", _FakePool(_FakeInstance(fake_client))), + patch.object(main, "chat_guard", _FakeGuard()), + patch.object( + main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"}) + ), + patch.object( + main.stats_collector, "record_chat", AsyncMock(return_value=None) + ), + ): + response = await main.v1_chat_completions( + req, _make_request("/v1/chat/completions") + ) + body = await _collect_stream(response) + + self.assertIn('"tool_calls"', body) + self.assertIn('"fetch_weather"', body) + self.assertIn('"finish_reason": "tool_calls"', body) + async def test_openai_non_stream_synthesizes_tool_call_from_json_array(self) -> None: fake_client = _FakeClient( stream_events=[], @@ -1918,6 +2130,117 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase): self.assertEqual(messages_dump[3]["role"], "user") self.assertEqual(messages_dump[3]["content"], "follow up") + async def test_openai_tool_result_is_emulated_into_followup_prompt(self) -> None: + spy_client = _SpyClient( + stream_events=[], + complete_result={ + "text": "done", + "toolEvents": [], + "sessionId": "sess-emulated-tool-result", + }, + ) + req = ChatCompletionsRequest( + model="org_auto", + messages=[ + {"role": "assistant", "content": None, "tool_calls": [{ + "id": "call_1", + "type": "function", + "function": {"name": "fetch_weather", "arguments": '{"city":"Hangzhou"}'}, + }]}, + {"role": "tool", "tool_call_id": "call_1", "content": '{"temperature":"22C"}'}, + {"role": "user", "content": "continue"}, + ], + stream=False, + tools=[ + { + "type": "function", + "function": { + "name": "fetch_weather", + "parameters": { + "type": "object", + "properties": {"city": {"type": "string"}}, + "required": ["city"], + }, + }, + } + ], + ) + + with ( + patch.object(main, "pool", _FakePool(_FakeInstance(spy_client))), + patch.object(main, "chat_guard", _FakeGuard()), + patch.object( + main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"}) + ), + patch.object( + main.stats_collector, "record_chat", AsyncMock(return_value=None) + ), + ): + await main.v1_chat_completions(req, _make_request("/v1/chat/completions")) + + prompt = spy_client.last_complete_args[0] + self.assertIn("Tool result for call_1:", prompt) + self.assertIn('{"temperature":"22C"}', prompt) + self.assertIn("Assistant:", prompt) + + async def test_anthropic_non_stream_synthesizes_tool_use_from_json_action_block( + self, + ) -> None: + fake_client = _FakeClient( + stream_events=[], + complete_result={ + "text": '```json action\n{"tool":"fetch_weather","parameters":{"city":"Hangzhou"}}\n```', + "toolEvents": [], + "sessionId": "sess-anthropic-action-block", + }, + ) + req = AnthropicMessagesRequest( + model="claude-3-5-sonnet-20241022", + max_tokens=64, + messages=[{"role": "user", "content": "weather"}], + stream=False, + tools=[ + { + "name": "fetch_weather", + "description": "Get weather for a city", + "input_schema": { + "type": "object", + "properties": {"city": {"type": "string"}}, + "required": ["city"], + }, + } + ], + tool_choice={"type": "tool", "name": "fetch_weather"}, + ) + + with ( + patch.object(main, "pool", _FakePool(_FakeInstance(fake_client))), + patch.object(main, "chat_guard", _FakeGuard()), + patch.object( + main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"}) + ), + patch.object( + main.stats_collector, "record_chat", AsyncMock(return_value=None) + ), + patch.object(main.settings, "api_keys", ["test-key"]), + ): + response = await main.v1_messages( + req, + _make_request( + "/v1/messages", + headers={ + "x-api-key": "test-key", + "anthropic-version": "2023-06-01", + }, + ), + ) + + payload = json.loads(response.body) + tool_blocks = [item for item in payload["content"] if item["type"] == "tool_use"] + self.assertEqual(payload["stop_reason"], "tool_use") + self.assertEqual(tool_blocks[0]["name"], "fetch_weather") + self.assertEqual(tool_blocks[0]["input"], {"city": "Hangzhou"}) + async def test_responses_stream_bridges_text_tool_and_completed_events( self, ) -> None: