feat: add emulated tool-calling bridge for Lingma

Add a proxy-side tool emulation layer so Lingma requests can surface stable OpenAI tool_calls and Anthropic tool_use blocks even when upstream tool events are missing or inconsistent.

Constraint: Keep native Lingma tool event bridging as the first path and layer emulation as a fallback

Rejected: Depend exclusively on Lingma native tool/invoke events | tool visibility remains inconsistent across models and transports

Confidence: high

Scope-risk: moderate
This commit is contained in:
mmc
2026-05-07 18:10:01 +08:00
parent 5911e4322e
commit 94a8025ae5
11 changed files with 1808 additions and 4 deletions

View File

@@ -3,6 +3,7 @@ from __future__ import annotations
import io
import json
import os
import shutil
import time
import urllib.request
import zipfile
@@ -70,6 +71,15 @@ def _extract_release_tree(
dst.write(src.read())
def _release_dir_for_binary(lingma_bin: Path, release_root: str | None) -> Path:
return lingma_bin.parent / ((release_root or "").strip() or "2.5.20")
def _release_has_required_assets(release_dir: Path) -> bool:
extension_main = release_dir / "extension" / "main.js"
return extension_main.exists() and extension_main.is_file()
def _query_marketplace_latest_vsix(
publisher: str, extension: str
) -> tuple[str, str, dict]:
@@ -168,8 +178,19 @@ def bootstrap_from_vsix() -> None:
)
resolved_url = vsix_url
current_release_dir = _release_dir_for_binary(
lingma_bin, old_marker.get("release_root") if isinstance(old_marker, dict) else None
)
release_ready = _release_has_required_assets(current_release_dir)
if lingma_bin.exists() and not release_ready:
print(
"[bootstrap] existing Lingma binary found but extension assets are incomplete; "
f"refreshing install under {current_release_dir}"
)
if (
lingma_bin.exists()
and release_ready
and not force_refresh
and (
(not always_refresh)
@@ -215,7 +236,8 @@ def bootstrap_from_vsix() -> None:
lingma_bytes = inner_zip.read(lingma_member)
release_root = _infer_release_root(lingma_member)
lingma_bin.parent.mkdir(parents=True, exist_ok=True)
release_dir = lingma_bin.parent / (release_root or "2.5.20")
release_dir = _release_dir_for_binary(lingma_bin, release_root)
shutil.rmtree(release_dir, ignore_errors=True)
_extract_release_tree(inner_zip, release_root, release_dir)
lingma_bin.write_bytes(lingma_bytes)
@@ -224,7 +246,9 @@ def bootstrap_from_vsix() -> None:
if extension_main.exists():
print(f"[bootstrap] extension ready: {extension_main}")
else:
print(f"[bootstrap] extension missing under: {release_dir}")
raise RuntimeError(
f"extension assets missing after extraction under: {release_dir}"
)
marker = {
"source": source_type,

View File

@@ -196,6 +196,33 @@ def _extract_tool_calls_from_text(text: str) -> list[dict[str, Any]] | None:
return None
def _extract_hash_tool_call_event_from_text(
text: str,
*,
forced_tool_name: str | None = None,
) -> dict[str, Any] | None:
raw = (text or "").strip()
if not raw:
return None
match = re.search(
r"#Tool Call\s*```([A-Za-z0-9_\-.]+)\s*(\{.*?\})\s*```",
raw,
flags=re.S,
)
if not match:
return None
name = match.group(1).strip()
if forced_tool_name and name != forced_tool_name:
return None
try:
arguments = json.loads(match.group(2))
except Exception:
return None
if not isinstance(arguments, dict):
return None
return {"name": name, "input": arguments}
def _tool_code_single_arg_name(
tools: list[dict[str, Any]] | None, forced_tool_name: str
) -> str | None:
@@ -342,6 +369,45 @@ def _forced_tool_fallback_event(
)
def _declared_tool_names(tools: list[dict[str, Any]] | None) -> list[str]:
if not isinstance(tools, list):
return []
out: list[str] = []
for tool in tools:
name = _openai_tool_name(tool) or _anthropic_tool_name(tool)
if name and name not in out:
out.append(name)
return out
def _infer_tool_event_from_declared_tools(
text: str,
*,
tools: list[dict[str, Any]] | None,
) -> dict[str, Any] | None:
for tool_name in _declared_tool_names(tools):
inferred = _extract_function_call_event_from_text(
text,
forced_tool_name=tool_name,
)
if inferred is not None:
return inferred
inferred = _extract_hash_tool_call_event_from_text(
text,
forced_tool_name=tool_name,
)
if inferred is not None:
return inferred
inferred = _forced_tool_fallback_event(
text,
forced_tool_name=tool_name,
tools=tools,
)
if inferred is not None:
return inferred
return None
def _openai_tool_call(
tool: dict[str, Any], *, forced_id: str | None = None
) -> dict[str, Any]:

657
app/http/tool_emulation.py Normal file
View File

@@ -0,0 +1,657 @@
from __future__ import annotations
import json
import re
import uuid
from dataclasses import dataclass
from typing import Any
@dataclass
class EmulatedToolDef:
name: str
description: str
input_schema: dict[str, Any]
@dataclass
class EmulatedToolChoice:
mode: str
name: str = ""
@dataclass
class EmulatedToolCall:
id: str
name: str
arguments: dict[str, Any]
def extract_openai_tools(raw: Any) -> list[EmulatedToolDef]:
if not isinstance(raw, list):
return []
out: list[EmulatedToolDef] = []
for item in raw:
if not isinstance(item, dict):
continue
fn = item.get("function")
if not isinstance(fn, dict):
continue
name = str(fn.get("name") or "").strip()
if not name:
continue
schema = fn.get("parameters") if isinstance(fn.get("parameters"), dict) else {}
out.append(
EmulatedToolDef(
name=name,
description=str(fn.get("description") or "").strip(),
input_schema=dict(schema),
)
)
return out
def extract_anthropic_tools(raw: Any) -> list[EmulatedToolDef]:
if not isinstance(raw, list):
return []
out: list[EmulatedToolDef] = []
for item in raw:
if not isinstance(item, dict):
continue
tool_type = str(item.get("type") or "").strip()
if tool_type.startswith("web_search_"):
continue
name = str(item.get("name") or "").strip()
if not name:
continue
schema = item.get("input_schema") if isinstance(item.get("input_schema"), dict) else {}
out.append(
EmulatedToolDef(
name=name,
description=str(item.get("description") or "").strip(),
input_schema=dict(schema),
)
)
return out
def extract_openai_tool_choice(raw: Any) -> EmulatedToolChoice:
if raw is None:
return EmulatedToolChoice(mode="auto")
if isinstance(raw, str):
value = raw.strip()
if value in {"", "auto"}:
return EmulatedToolChoice(mode="auto")
if value == "none":
return EmulatedToolChoice(mode="none")
if value in {"required", "any"}:
return EmulatedToolChoice(mode="any")
return EmulatedToolChoice(mode="tool", name=value)
if not isinstance(raw, dict):
return EmulatedToolChoice(mode="auto")
type_name = str(raw.get("type") or "").strip()
if type_name in {"required", "any"}:
return EmulatedToolChoice(mode="any")
if type_name in {"none"}:
return EmulatedToolChoice(mode="none")
if type_name in {"function", "tool"}:
fn = raw.get("function")
if isinstance(fn, dict):
name = str(fn.get("name") or "").strip()
if name:
return EmulatedToolChoice(mode="tool", name=name)
name = str(raw.get("name") or "").strip()
if name:
return EmulatedToolChoice(mode="tool", name=name)
return EmulatedToolChoice(mode="auto")
def extract_anthropic_tool_choice(raw: Any) -> EmulatedToolChoice:
if raw is None:
return EmulatedToolChoice(mode="auto")
if not isinstance(raw, dict):
return extract_openai_tool_choice(raw)
type_name = str(raw.get("type") or "").strip()
if type_name in {"", "auto"}:
return EmulatedToolChoice(mode="auto")
if type_name == "none":
return EmulatedToolChoice(mode="none")
if type_name in {"any", "required"}:
return EmulatedToolChoice(mode="any")
if type_name == "tool":
name = str(raw.get("name") or "").strip()
if name:
return EmulatedToolChoice(mode="tool", name=name)
return EmulatedToolChoice(mode="auto")
def has_tool_request(tools: list[EmulatedToolDef], choice: EmulatedToolChoice) -> bool:
return bool(tools) or choice.mode not in {"", "auto"}
def inject_tooling(system: str, tools: list[EmulatedToolDef], choice: EmulatedToolChoice) -> str:
system = system.strip()
if not tools:
return system
tool_lines: list[str] = []
for tool in tools:
signature = _compact_schema(tool.input_schema)
line = f"{tool.name}({signature})"
if tool.description:
line += f" - {_truncate(tool.description, 120)}"
tool_lines.append(line)
parts = [
"You are an AI assistant with DIRECT tool access.",
"When a request needs local files, terminal state, browser state, current web data, or another external result, use the proxy tools listed below.",
"Do not claim tools are unavailable.",
"When you need to use a tool, output exactly one or more structured action blocks in this format:",
'```json action\n{"tool":"NAME","parameters":{"key":"value"}}\n```',
"Available tools:",
"\n".join(tool_lines),
"Rules:",
"- Use ```json action``` blocks for tool calls.",
"- If a tool is needed, do not explain first; emit the action block directly.",
"- If no tool is needed, answer normally.",
"- Never say tools are unavailable.",
_force_constraint(choice),
]
tooling = "\n\n".join(part for part in parts if part)
if not system:
return tooling
return f"{system}\n\n---\n\n{tooling}"
def action_output_prompt(tool_call_id: str | None, output: str) -> str:
output = (output or "").strip()
if not output:
return ""
suffix = (
"Based on the tool result above, answer the user's request directly if you have enough information. "
"Only use another tool call if a specific missing fact still requires it."
)
if tool_call_id and tool_call_id.strip():
return f"Tool result for {tool_call_id.strip()}:\n{output}\n\n{suffix}"
return f"Tool result:\n{output}\n\n{suffix}"
def parse_action_blocks(
text: str,
tools: list[EmulatedToolDef],
*,
max_scan_bytes: int = 0,
max_tool_calls: int = 8,
) -> tuple[list[EmulatedToolCall], str]:
if not text or not text.strip():
return [], ""
if max_scan_bytes > 0 and len(text) > max_scan_bytes:
text = text[:max_scan_bytes]
tool_name_map = {tool.name.lower(): tool.name for tool in tools if tool.name.strip()}
tool_schema_map = {tool.name: tool.input_schema for tool in tools if tool.name.strip()}
calls: list[EmulatedToolCall] = []
spans: list[tuple[int, int]] = []
seen: set[str] = set()
for match in re.finditer(r"```json(?:\s+action)?\s*(.*?)```", text, flags=re.S | re.I):
raw = (match.group(1) or "").strip()
if not raw:
continue
parsed = _parse_tool_call_json(raw)
if parsed is None:
continue
name, arguments = parsed
normalized = _normalize_tool_name(name, tool_name_map)
schema = tool_schema_map.get(normalized)
if schema:
arguments = _filter_args_by_schema(arguments, schema)
if not _has_required_args(arguments, schema):
continue
key = _tool_call_key(normalized, arguments)
if key in seen:
spans.append(match.span())
continue
seen.add(key)
calls.append(
EmulatedToolCall(
id=_stable_call_id(normalized, arguments),
name=normalized,
arguments=arguments,
)
)
spans.append(match.span())
if len(calls) >= max_tool_calls:
break
if not calls:
return [], text.strip()
clean = text
for start, end in reversed(spans):
clean = clean[:start] + clean[end:]
return calls, clean.strip()
def looks_like_refusal(text: str) -> bool:
lowered = (text or "").strip().lower()
if not lowered:
return False
needles = [
"tools are unavailable",
"cannot call tools",
"can't call tools",
"cannot execute",
"can't execute",
"没有可用的工具",
"工具不可用",
"不能调用工具",
"无法直接执行",
]
return any(needle in lowered for needle in needles)
def looks_like_missed_tool_use(text: str) -> bool:
lowered = (text or "").strip().lower()
if not lowered:
return False
needles = [
"let me use",
"i need to use",
"i will use",
"i need to run",
"i will run",
"我需要使用",
"让我使用",
"执行命令",
"读取文件",
"查看文件",
"查询天气",
"#tool call",
]
return any(needle in lowered for needle in needles)
def infer_tool_calls_from_text(
text: str,
tools: list[EmulatedToolDef],
) -> list[EmulatedToolCall]:
if not (looks_like_refusal(text) or looks_like_missed_tool_use(text)):
return []
direct = infer_declared_tool_call_from_text(text, tools)
return [direct] if direct is not None else []
def force_tooling_prompt(choice: EmulatedToolChoice) -> str:
prompt = (
"Your last response did not include any ```json action``` block. "
"You must respond with at least one valid action block now. "
"Select the single most appropriate available tool for the user request. "
"Do not explain. Do not say tools are unavailable. Output the action block directly."
)
if choice.mode == "tool" and choice.name.strip():
prompt += f' You must call "{choice.name.strip()}".'
return prompt
def infer_declared_tool_call_from_text(
text: str,
tools: list[EmulatedToolDef],
) -> EmulatedToolCall | None:
for tool in tools:
event = _extract_fenced_json_tool_call_event_from_text(
text, forced_tool_name=tool.name
)
if event is None:
event = _extract_hash_tool_call_event_from_text(text, forced_tool_name=tool.name)
if event is None:
event = _extract_function_call_event_from_text(text, forced_tool_name=tool.name)
if event is None:
event = _forced_tool_fallback_event(text, forced_tool_name=tool.name, tools=tools)
if event is None:
continue
schema = tool.input_schema
arguments = dict(event.get("input") or {})
if schema:
arguments = _filter_args_by_schema(arguments, schema)
if not _has_required_args(arguments, schema):
continue
return EmulatedToolCall(
id=_stable_call_id(tool.name, arguments),
name=tool.name,
arguments=arguments,
)
return None
def openai_tool_call_from_emulated(call: EmulatedToolCall) -> dict[str, Any]:
return {
"id": call.id,
"type": "function",
"function": {
"name": call.name,
"arguments": json.dumps(call.arguments, ensure_ascii=False),
},
}
def _extract_hash_tool_call_event_from_text(
text: str,
*,
forced_tool_name: str | None = None,
) -> dict[str, Any] | None:
raw = (text or "").strip()
match = re.search(
r"#Tool Call\s*```([A-Za-z0-9_\-.]+)\s*(\{.*?\})\s*```",
raw,
flags=re.S,
)
if not match:
return None
name = match.group(1).strip()
if forced_tool_name and name != forced_tool_name:
return None
try:
arguments = json.loads(match.group(2))
except Exception:
return None
if not isinstance(arguments, dict):
return None
return {"name": name, "input": arguments}
def _extract_fenced_json_tool_call_event_from_text(
text: str,
*,
forced_tool_name: str | None = None,
) -> dict[str, Any] | None:
raw = (text or "").strip()
match = re.search(r"```json(?:\s+action)?\s*(\{.*?\})\s*```", raw, flags=re.S | re.I)
if not match:
return None
try:
payload = json.loads(match.group(1))
except Exception:
return None
if not isinstance(payload, dict):
return None
name = str(payload.get("tool") or payload.get("name") or "").strip()
fn = payload.get("function")
if not name and isinstance(fn, dict):
name = str(fn.get("name") or "").strip()
if not name:
return None
if forced_tool_name and name != forced_tool_name:
return None
arguments = payload.get("parameters")
if arguments is None:
arguments = payload.get("arguments")
if arguments is None:
arguments = payload.get("input")
if arguments is None and isinstance(fn, dict):
arguments = fn.get("arguments")
if isinstance(arguments, str):
try:
arguments = json.loads(arguments)
except Exception:
return None
if arguments is None:
arguments = {}
if not isinstance(arguments, dict):
return None
return {"name": name, "input": arguments}
def _extract_function_call_event_from_text(
text: str,
*,
forced_tool_name: str | None = None,
) -> dict[str, Any] | None:
raw = (text or "").strip()
match = re.search(r"<function_calls>\s*(\{.*?\})\s*</function_calls>", raw, flags=re.S)
if not match:
return None
try:
payload = json.loads(match.group(1))
except Exception:
return None
if not isinstance(payload, dict):
return None
name = str(payload.get("name") or "").strip()
if not name:
return None
if forced_tool_name and name != forced_tool_name:
return None
arguments = payload.get("arguments")
if isinstance(arguments, str):
try:
arguments = json.loads(arguments)
except Exception:
return None
if arguments is None:
arguments = {}
if not isinstance(arguments, dict):
return None
return {"name": name, "input": arguments}
def _forced_tool_fallback_event(
text: str,
*,
forced_tool_name: str | None,
tools: list[EmulatedToolDef],
) -> dict[str, Any] | None:
if not forced_tool_name:
return None
parsed = _tool_code_object_from_text(
text,
forced_tool_name,
single_arg_name=_tool_code_single_arg_name(tools, forced_tool_name),
)
if parsed is None:
try:
parsed = json.loads((text or "").strip())
except Exception:
return None
if not isinstance(parsed, dict):
return None
explicit_name = parsed.get("name") or parsed.get("tool")
if explicit_name is not None and str(explicit_name) != forced_tool_name:
return None
tool_input = parsed.get("input")
if tool_input is None and "arguments" in parsed:
tool_input = parsed.get("arguments")
if isinstance(tool_input, str):
try:
tool_input = json.loads(tool_input)
except Exception:
return None
if tool_input is None:
reserved = {"name", "tool", "function", "arguments", "input", "result"}
tool_input = {k: v for k, v in parsed.items() if k not in reserved}
if not isinstance(tool_input, dict):
return None
return {"name": forced_tool_name, "input": tool_input}
def _tool_code_single_arg_name(
tools: list[EmulatedToolDef], forced_tool_name: str
) -> str | None:
for tool in tools:
if tool.name != forced_tool_name:
continue
properties = tool.input_schema.get("properties")
if not isinstance(properties, dict) or len(properties) != 1:
return None
only_name = next(iter(properties.keys()), None)
return only_name if isinstance(only_name, str) and only_name.strip() else None
return None
def _tool_code_object_from_text(
text: str,
forced_tool_name: str,
*,
single_arg_name: str | None = None,
) -> dict[str, Any] | None:
raw = (text or "").strip()
if not raw.startswith("```") or not raw.endswith("```"):
return None
lines = raw.splitlines()
if len(lines) < 2:
return None
fence = lines[0].strip().lower()
language = fence[3:].strip()
if language and language not in {"tool_code", "python", "py"}:
return None
body = "\n".join(lines[1:-1]).strip()
call_match = re.fullmatch(rf"{re.escape(forced_tool_name)}\((.*)\)", body, flags=re.S)
if not call_match:
return None
arguments_text = call_match.group(1).strip()
if not arguments_text:
return {"arguments": {}}
if single_arg_name and not re.search(r"\w+\s*=", arguments_text):
try:
value = json.loads(arguments_text)
except Exception:
value = arguments_text.strip('"\'')
return {"arguments": {single_arg_name: value}}
arguments: dict[str, Any] = {}
for part in [p.strip() for p in arguments_text.split(",") if p.strip()]:
if "=" not in part:
return None
key, value_text = part.split("=", 1)
key = key.strip()
value_text = value_text.strip()
try:
value = json.loads(value_text)
except Exception:
value = value_text.strip('"\'')
arguments[key] = value
return {"arguments": arguments}
def _parse_tool_call_json(raw: str) -> tuple[str, dict[str, Any]] | None:
try:
obj = json.loads(_normalize_json(raw))
except Exception:
return None
if not isinstance(obj, dict):
return None
name = str(obj.get("tool") or obj.get("name") or "").strip()
fn = obj.get("function")
if not name and isinstance(fn, dict):
name = str(fn.get("name") or "").strip()
if not name:
return None
arguments = obj.get("parameters")
if arguments is None:
arguments = obj.get("arguments")
if arguments is None:
arguments = obj.get("input")
if arguments is None and isinstance(fn, dict):
arguments = fn.get("arguments")
if isinstance(arguments, str):
try:
arguments = json.loads(arguments)
except Exception:
arguments = {}
if arguments is None:
arguments = {k: v for k, v in obj.items() if k not in {"tool", "name"}}
if not isinstance(arguments, dict):
return None
return name, arguments
def _normalize_tool_name(raw: str, available: dict[str, str]) -> str:
name = raw.strip()
if not name:
return ""
exact = available.get(name.lower())
if exact:
return exact
key = name.lower().replace("-", "_").replace(" ", "_")
aliases = {
"bash": "terminal",
"shell": "terminal",
"read": "read_file",
"grep": "search_files",
"glob": "search_files",
"edit": "patch",
"write": "write_file",
}
mapped = aliases.get(key)
if mapped and mapped in available:
return available[mapped]
return name
def _filter_args_by_schema(args: dict[str, Any], schema: dict[str, Any]) -> dict[str, Any]:
properties = schema.get("properties")
if not isinstance(properties, dict) or not properties:
return args
return {k: v for k, v in args.items() if k in properties}
def _has_required_args(args: dict[str, Any], schema: dict[str, Any]) -> bool:
required = schema.get("required")
if not isinstance(required, list):
return True
for key in required:
if not isinstance(key, str):
continue
if key not in args:
return False
value = args.get(key)
if isinstance(value, str) and not value.strip():
return False
return True
def _compact_schema(schema: dict[str, Any]) -> str:
properties = schema.get("properties")
if not isinstance(properties, dict) or not properties:
return ""
required = {item for item in schema.get("required", []) if isinstance(item, str)}
parts: list[str] = []
for key in sorted(properties.keys()):
parts.append(key if key in required else f"{key}?")
return ", ".join(parts)
def _truncate(text: str, max_len: int) -> str:
text = text.strip()
if len(text) <= max_len:
return text
return text[:max_len] + "..."
def _force_constraint(choice: EmulatedToolChoice) -> str:
if choice.mode == "any":
return "- You must output at least one ```json action``` block in this reply."
if choice.mode == "tool" and choice.name.strip():
return f'- You must call "{choice.name.strip()}" in this reply.'
return ""
def _normalize_json(text: str) -> str:
return (
text.strip()
.replace("", '"')
.replace("", '"')
.replace(",\n}", "\n}")
.replace(",\n]", "\n]")
)
def _tool_call_key(name: str, arguments: dict[str, Any]) -> str:
return f"{name.lower()}\0{json.dumps(arguments, ensure_ascii=False, sort_keys=True)}"
def _stable_call_id(name: str, arguments: dict[str, Any]) -> str:
key = _tool_call_key(name, arguments)
return "call_" + uuid.uuid5(uuid.NAMESPACE_OID, key).hex[:16]

View File

@@ -419,6 +419,17 @@ class LspWsRpcClient:
method = msg.get("method")
params = msg.get("params") or {}
if method and (
method.startswith("tool/")
or method.startswith("mcp/")
or method in {"chat/answer", "chat/finish"}
):
logger.info(
"lingma server message method=%s params=%s",
method,
params,
)
if method == "chat/answer":
req_id = params.get("requestId")
stream = self._chat_streams.get(req_id)

View File

@@ -36,6 +36,20 @@ from .http.execution_core import (
release_execution,
start_execution,
)
from .http.tool_emulation import (
action_output_prompt,
extract_anthropic_tool_choice as _em_extract_anthropic_tool_choice,
extract_anthropic_tools as _em_extract_anthropic_tools,
extract_openai_tool_choice as _em_extract_openai_tool_choice,
extract_openai_tools as _em_extract_openai_tools,
force_tooling_prompt,
has_tool_request as _em_has_tool_request,
infer_declared_tool_call_from_text,
infer_tool_calls_from_text,
inject_tooling,
openai_tool_call_from_emulated,
parse_action_blocks,
)
from .http.openai_responses import handle_responses
from .http.tool_bridge import (
_allowed_stream_tool_event,
@@ -44,8 +58,10 @@ from .http.tool_bridge import (
_anthropic_tool_result_block,
_anthropic_tool_use_block,
_extract_function_call_event_from_text,
_extract_hash_tool_call_event_from_text,
_extract_tool_calls_from_text,
_forced_tool_fallback_event,
_infer_tool_event_from_declared_tools,
_json_string,
_openai_forced_tool_name,
_openai_tool_call,
@@ -417,6 +433,82 @@ def _messages_to_prompt(messages: list[dict]) -> str:
return "\n".join(parts).strip()
def _messages_to_emulation_prompt(
messages: list[dict[str, Any]],
*,
system_text: str,
tools: list[dict[str, Any]] | None,
tool_choice: Any,
) -> str:
filtered: list[tuple[str, str]] = []
for message in messages:
role = str(message.get("role") or "").strip().lower()
if role in {"system", "developer"}:
continue
text = flatten_content(message.get("content"))
if role == "tool":
text = action_output_prompt(message.get("tool_call_id"), text)
role = "user"
if not text:
continue
if role not in {"user", "assistant"}:
continue
filtered.append((role, text))
if not filtered:
return system_text.strip()
em_tools = _em_extract_openai_tools(tools)
em_choice = _em_extract_openai_tool_choice(tool_choice)
injected_system = inject_tooling(system_text, em_tools, em_choice)
parts: list[str] = []
for role, text in filtered:
label = "User" if role == "user" else "Assistant"
parts.append(f"{label}: {text}")
if injected_system:
parts.append(injected_system)
parts.append("Assistant:")
return "\n\n".join(parts).strip()
def _anthropic_messages_to_emulation_prompt(
messages: list[dict[str, Any]],
*,
system_text: str,
tools: list[dict[str, Any]] | None,
tool_choice: Any,
) -> str:
filtered: list[tuple[str, str]] = []
for message in messages:
role = str(message.get("role") or "").strip().lower()
text = str(message.get("content") or "").strip()
if role == "tool":
text = action_output_prompt(message.get("tool_call_id"), text)
role = "user"
if not text:
continue
if role not in {"user", "assistant"}:
continue
filtered.append((role, text))
if not filtered:
return system_text.strip()
em_tools = _em_extract_anthropic_tools(tools)
em_choice = _em_extract_anthropic_tool_choice(tool_choice)
injected_system = inject_tooling(system_text, em_tools, em_choice)
parts: list[str] = []
for role, text in filtered:
label = "User" if role == "user" else "Assistant"
parts.append(f"{label}: {text}")
if injected_system:
parts.append(injected_system)
parts.append("Assistant:")
return "\n\n".join(parts).strip()
def _include_usage(stream_options: dict | None) -> bool:
if not isinstance(stream_options, dict):
return False
@@ -525,6 +617,20 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
is_reply = execution.is_reply
include_usage = _include_usage(req.stream_options)
em_tools = _em_extract_openai_tools(req.tools)
em_choice = _em_extract_openai_tool_choice(req.tool_choice)
if _em_has_tool_request(em_tools, em_choice):
system_parts = [
flatten_content(m.content)
for m in req.messages
if m.role in {"system", "developer"} and flatten_content(m.content)
]
prompt = _messages_to_emulation_prompt(
messages_dump,
system_text="\n\n".join(system_parts),
tools=req.tools,
tool_choice=req.tool_choice,
)
try:
started = await start_execution(
@@ -709,6 +815,11 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
merged_text,
forced_tool_name=forced_tool_name,
)
if inferred is None:
inferred = _extract_hash_tool_call_event_from_text(
merged_text,
forced_tool_name=forced_tool_name,
)
if inferred is None:
inferred = _forced_tool_fallback_event(
merged_text,
@@ -747,6 +858,72 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
if buffered_text_parts and forced_tool_name and saw_tool_call:
buffered_text_parts.clear()
if buffered_text_parts and req.tools and not saw_tool_call:
merged_text = "".join(buffered_text_parts)
inferred = _infer_tool_event_from_declared_tools(
merged_text,
tools=req.tools,
)
if inferred is not None:
tool_id = "call_inferred_0"
tool_call_indexes[tool_id] = 0
saw_tool_call = True
payload = {
"id": completion_id,
"object": "chat.completion.chunk",
"created": created,
"model": model,
"choices": [
{
"index": 0,
"delta": {
"tool_calls": [
{
"index": 0,
**_openai_tool_call(
inferred, forced_id=tool_id
),
}
]
},
"finish_reason": None,
}
],
}
buffered_text_parts.clear()
yield f"data: {json.dumps(payload, ensure_ascii=False)}\n\n"
if buffered_text_parts and req.tools and not saw_tool_call:
merged_text = "".join(buffered_text_parts)
parsed_calls, remaining = parse_action_blocks(merged_text, em_tools)
if parsed_calls:
saw_tool_call = True
for i, call in enumerate(parsed_calls):
tool_id = call.id or f"call_inferred_{i}"
tool_call_indexes[tool_id] = i
payload = {
"id": completion_id,
"object": "chat.completion.chunk",
"created": created,
"model": model,
"choices": [
{
"index": 0,
"delta": {
"tool_calls": [
{
"index": i,
**openai_tool_call_from_emulated(call),
}
]
},
"finish_reason": None,
}
],
}
yield f"data: {json.dumps(payload, ensure_ascii=False)}\n\n"
buffered_text_parts = [remaining] if remaining else []
done_payload = {
"id": completion_id,
"object": "chat.completion.chunk",
@@ -866,6 +1043,11 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
message_content,
forced_tool_name=forced_tool_name,
)
if inferred is None:
inferred = _extract_hash_tool_call_event_from_text(
message_content,
forced_tool_name=forced_tool_name,
)
if inferred is None:
inferred = _forced_tool_fallback_event(
message_content,
@@ -878,6 +1060,59 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
)
saw_tool_call = True
message_content = ""
if not saw_tool_call and req.tools:
inferred = _infer_tool_event_from_declared_tools(
message_content,
tools=req.tools,
)
if inferred is not None:
tool_calls.append(
_openai_tool_call(inferred, forced_id="call_inferred_0")
)
saw_tool_call = True
message_content = ""
if not saw_tool_call and em_tools:
parsed_calls, remaining = parse_action_blocks(message_content, em_tools)
if parsed_calls:
for call in parsed_calls:
tool_calls.append(openai_tool_call_from_emulated(call))
saw_tool_call = True
message_content = remaining
if not saw_tool_call and em_tools:
inferred_call = infer_declared_tool_call_from_text(message_content, em_tools)
if inferred_call is None:
inferred_calls = infer_tool_calls_from_text(message_content, em_tools)
inferred_call = inferred_calls[0] if inferred_calls else None
if inferred_call is not None:
tool_calls.append(openai_tool_call_from_emulated(inferred_call))
saw_tool_call = True
message_content = ""
if not saw_tool_call and em_tools:
retry_prompt = f"{prompt}\n\n{force_tooling_prompt(em_choice)}"
retry_result = await inst.client.chat_complete(
retry_prompt,
model,
ask_mode,
session_id=None,
is_reply=False,
tool_config=tool_config,
)
retry_text = retry_result.get("text") or ""
parsed_calls, remaining = parse_action_blocks(retry_text, em_tools)
if parsed_calls:
for call in parsed_calls:
tool_calls.append(openai_tool_call_from_emulated(call))
saw_tool_call = True
message_content = remaining
else:
inferred_call = infer_declared_tool_call_from_text(retry_text, em_tools)
if inferred_call is None:
inferred_calls = infer_tool_calls_from_text(retry_text, em_tools)
inferred_call = inferred_calls[0] if inferred_calls else None
if inferred_call is not None:
tool_calls.append(openai_tool_call_from_emulated(inferred_call))
saw_tool_call = True
message_content = ""
response = ChatCompletionResponse(
id=f"chatcmpl-{uuid.uuid4().hex}",
created=int(time.time()),
@@ -1049,6 +1284,16 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
model = execution.model
prompt = execution.prompt
is_reply = execution.is_reply
em_anthropic_tools = _em_extract_anthropic_tools(req.tools)
em_anthropic_choice = _em_extract_anthropic_tool_choice(req.tool_choice)
if _em_has_tool_request(em_anthropic_tools, em_anthropic_choice):
system_text = flatten_anthropic_content(req.system) if req.system else ""
prompt = _anthropic_messages_to_emulation_prompt(
messages_dump,
system_text=system_text,
tools=req.tools,
tool_choice=req.tool_choice,
)
try:
started = await start_execution(
@@ -1090,12 +1335,14 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
stream_meta: dict = {}
max_tokens = req.max_tokens
forced_tool_name = _anthropic_forced_tool_name(req.tool_choice)
aggregate_emulated_tools = bool(em_anthropic_tools)
async def event_stream(_ticket=ticket, _inst=inst, _meta=stream_meta):
success = False
block_index = 0
text_block_open = False
saw_pending_tool_use = False
buffered_text_parts: list[str] = []
try:
# 1) message_start — Anthropic SDKs read this first to get
# the message envelope (id/model/initial usage).
@@ -1196,7 +1443,78 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
text = _stream_text(chunk)
if not text:
continue
completion_tokens_holder["n"] += estimate_tokens(text)
if aggregate_emulated_tools:
buffered_text_parts.append(text)
completion_tokens_holder["n"] += estimate_tokens(text)
continue
buffered_text_parts.append(text)
merged_text = "".join(buffered_text_parts)
parsed_calls, remaining = parse_action_blocks(
merged_text, em_anthropic_tools
)
if not parsed_calls:
inferred = infer_declared_tool_call_from_text(
merged_text,
em_anthropic_tools,
)
if inferred is None:
inferred_calls = infer_tool_calls_from_text(
merged_text,
em_anthropic_tools,
)
inferred = inferred_calls[0] if inferred_calls else None
if inferred is not None:
parsed_calls = [inferred]
remaining = ""
if parsed_calls:
if text_block_open:
yield _sse(
"content_block_stop",
{"type": "content_block_stop", "index": block_index},
)
block_index += 1
text_block_open = False
saw_pending_tool_use = True
for call in parsed_calls:
yield _sse(
"content_block_start",
{
"type": "content_block_start",
"index": block_index,
"content_block": {
"type": "tool_use",
"id": call.id,
"name": call.name,
"input": {},
},
},
)
yield _sse(
"content_block_delta",
{
"type": "content_block_delta",
"index": block_index,
"delta": {
"type": "input_json_delta",
"partial_json": json.dumps(call.arguments, ensure_ascii=False),
},
},
)
yield _sse(
"content_block_stop",
{"type": "content_block_stop", "index": block_index},
)
block_index += 1
buffered_text_parts = [remaining] if remaining else []
if not buffered_text_parts:
continue
text_to_emit = "".join(buffered_text_parts)
buffered_text_parts.clear()
completion_tokens_holder["n"] += estimate_tokens(text_to_emit)
if not text_block_open:
yield _sse(
"content_block_start",
@@ -1213,10 +1531,106 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
{
"type": "content_block_delta",
"index": block_index,
"delta": {"type": "text_delta", "text": text},
"delta": {"type": "text_delta", "text": text_to_emit},
},
)
if aggregate_emulated_tools:
merged_text = "".join(buffered_text_parts)
parsed_calls, remaining = parse_action_blocks(
merged_text, em_anthropic_tools
)
if not parsed_calls:
inferred = infer_declared_tool_call_from_text(
merged_text,
em_anthropic_tools,
)
if inferred is None:
inferred_calls = infer_tool_calls_from_text(
merged_text,
em_anthropic_tools,
)
inferred = inferred_calls[0] if inferred_calls else None
if inferred is not None:
parsed_calls = [inferred]
remaining = ""
if parsed_calls:
if remaining.strip():
yield _sse(
"content_block_start",
{
"type": "content_block_start",
"index": block_index,
"content_block": {"type": "text", "text": ""},
},
)
yield _sse(
"content_block_delta",
{
"type": "content_block_delta",
"index": block_index,
"delta": {"type": "text_delta", "text": remaining},
},
)
yield _sse(
"content_block_stop",
{"type": "content_block_stop", "index": block_index},
)
block_index += 1
for call in parsed_calls:
saw_pending_tool_use = True
yield _sse(
"content_block_start",
{
"type": "content_block_start",
"index": block_index,
"content_block": {
"type": "tool_use",
"id": call.id,
"name": call.name,
"input": {},
},
},
)
yield _sse(
"content_block_delta",
{
"type": "content_block_delta",
"index": block_index,
"delta": {
"type": "input_json_delta",
"partial_json": json.dumps(call.arguments, ensure_ascii=False),
},
},
)
yield _sse(
"content_block_stop",
{"type": "content_block_stop", "index": block_index},
)
block_index += 1
elif merged_text.strip():
yield _sse(
"content_block_start",
{
"type": "content_block_start",
"index": block_index,
"content_block": {"type": "text", "text": ""},
},
)
yield _sse(
"content_block_delta",
{
"type": "content_block_delta",
"index": block_index,
"delta": {"type": "text_delta", "text": merged_text},
},
)
yield _sse(
"content_block_stop",
{"type": "content_block_stop", "index": block_index},
)
if text_block_open:
yield _sse(
"content_block_stop",
@@ -1323,6 +1737,135 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
else:
saw_pending_tool_use = True
if not saw_tool_event and em_anthropic_tools:
parsed_calls, remaining = parse_action_blocks(text, em_anthropic_tools)
if parsed_calls:
content_blocks = []
if remaining:
content_blocks.append({"type": "text", "text": remaining})
for call in parsed_calls:
content_blocks.append(
{
"type": "tool_use",
"id": call.id,
"name": call.name,
"input": call.arguments,
}
)
saw_tool_event = True
saw_pending_tool_use = True
text = remaining
if not saw_tool_event and em_anthropic_tools:
inferred_call = infer_declared_tool_call_from_text(text, em_anthropic_tools)
if inferred_call is None:
inferred_calls = infer_tool_calls_from_text(text, em_anthropic_tools)
inferred_call = inferred_calls[0] if inferred_calls else None
if inferred_call is not None:
content_blocks = [
{
"type": "tool_use",
"id": inferred_call.id,
"name": inferred_call.name,
"input": inferred_call.arguments,
}
]
saw_tool_event = True
saw_pending_tool_use = True
text = ""
if not saw_tool_event and em_anthropic_tools:
retry_prompt = f"{prompt}\n\n{force_tooling_prompt(em_anthropic_choice)}"
retry_result = await inst.client.chat_complete(
retry_prompt,
model,
ask_mode,
session_id=None,
is_reply=False,
tool_config=tool_config,
)
retry_text = retry_result.get("text") or ""
parsed_calls, remaining = parse_action_blocks(retry_text, em_anthropic_tools)
if parsed_calls:
content_blocks = []
if remaining:
content_blocks.append({"type": "text", "text": remaining})
for call in parsed_calls:
content_blocks.append(
{
"type": "tool_use",
"id": call.id,
"name": call.name,
"input": call.arguments,
}
)
saw_tool_event = True
saw_pending_tool_use = True
text = remaining
else:
inferred_call = infer_declared_tool_call_from_text(retry_text, em_anthropic_tools)
if inferred_call is None:
inferred_calls = infer_tool_calls_from_text(retry_text, em_anthropic_tools)
inferred_call = inferred_calls[0] if inferred_calls else None
if inferred_call is not None:
content_blocks = [
{
"type": "tool_use",
"id": inferred_call.id,
"name": inferred_call.name,
"input": inferred_call.arguments,
}
]
saw_tool_event = True
saw_pending_tool_use = True
text = ""
if not saw_tool_event and em_anthropic_tools and text.strip():
retry_prompt = f"{prompt}\n\n{force_tooling_prompt(em_anthropic_choice)}"
retry_result = await inst.client.chat_complete(
retry_prompt,
model,
ask_mode,
session_id=None,
is_reply=False,
tool_config=tool_config,
)
retry_text = retry_result.get("text") or ""
parsed_calls, remaining = parse_action_blocks(retry_text, em_anthropic_tools)
if parsed_calls:
content_blocks = []
if remaining:
content_blocks.append({"type": "text", "text": remaining})
for call in parsed_calls:
content_blocks.append(
{
"type": "tool_use",
"id": call.id,
"name": call.name,
"input": call.arguments,
}
)
saw_tool_event = True
saw_pending_tool_use = True
text = remaining
else:
inferred_call = infer_declared_tool_call_from_text(retry_text, em_anthropic_tools)
if inferred_call is None:
inferred_calls = infer_tool_calls_from_text(retry_text, em_anthropic_tools)
inferred_call = inferred_calls[0] if inferred_calls else None
if inferred_call is not None:
content_blocks = [
{
"type": "tool_use",
"id": inferred_call.id,
"name": inferred_call.name,
"input": inferred_call.arguments,
}
]
saw_tool_event = True
saw_pending_tool_use = True
text = ""
if not saw_tool_event and forced_tool_name:
inferred = _extract_function_call_event_from_text(
text,