feat: add emulated tool-calling bridge for Lingma

Add a proxy-side tool emulation layer so Lingma requests can surface stable OpenAI tool_calls and Anthropic tool_use blocks even when upstream tool events are missing or inconsistent.

Constraint: Keep native Lingma tool event bridging as the first path and layer emulation as a fallback

Rejected: Depend exclusively on Lingma native tool/invoke events | tool visibility remains inconsistent across models and transports

Confidence: high

Scope-risk: moderate
This commit is contained in:
mmc
2026-05-07 18:10:01 +08:00
parent 5911e4322e
commit 94a8025ae5
11 changed files with 1808 additions and 4 deletions

View File

@@ -36,6 +36,20 @@ from .http.execution_core import (
release_execution,
start_execution,
)
from .http.tool_emulation import (
action_output_prompt,
extract_anthropic_tool_choice as _em_extract_anthropic_tool_choice,
extract_anthropic_tools as _em_extract_anthropic_tools,
extract_openai_tool_choice as _em_extract_openai_tool_choice,
extract_openai_tools as _em_extract_openai_tools,
force_tooling_prompt,
has_tool_request as _em_has_tool_request,
infer_declared_tool_call_from_text,
infer_tool_calls_from_text,
inject_tooling,
openai_tool_call_from_emulated,
parse_action_blocks,
)
from .http.openai_responses import handle_responses
from .http.tool_bridge import (
_allowed_stream_tool_event,
@@ -44,8 +58,10 @@ from .http.tool_bridge import (
_anthropic_tool_result_block,
_anthropic_tool_use_block,
_extract_function_call_event_from_text,
_extract_hash_tool_call_event_from_text,
_extract_tool_calls_from_text,
_forced_tool_fallback_event,
_infer_tool_event_from_declared_tools,
_json_string,
_openai_forced_tool_name,
_openai_tool_call,
@@ -417,6 +433,82 @@ def _messages_to_prompt(messages: list[dict]) -> str:
return "\n".join(parts).strip()
def _messages_to_emulation_prompt(
messages: list[dict[str, Any]],
*,
system_text: str,
tools: list[dict[str, Any]] | None,
tool_choice: Any,
) -> str:
filtered: list[tuple[str, str]] = []
for message in messages:
role = str(message.get("role") or "").strip().lower()
if role in {"system", "developer"}:
continue
text = flatten_content(message.get("content"))
if role == "tool":
text = action_output_prompt(message.get("tool_call_id"), text)
role = "user"
if not text:
continue
if role not in {"user", "assistant"}:
continue
filtered.append((role, text))
if not filtered:
return system_text.strip()
em_tools = _em_extract_openai_tools(tools)
em_choice = _em_extract_openai_tool_choice(tool_choice)
injected_system = inject_tooling(system_text, em_tools, em_choice)
parts: list[str] = []
for role, text in filtered:
label = "User" if role == "user" else "Assistant"
parts.append(f"{label}: {text}")
if injected_system:
parts.append(injected_system)
parts.append("Assistant:")
return "\n\n".join(parts).strip()
def _anthropic_messages_to_emulation_prompt(
messages: list[dict[str, Any]],
*,
system_text: str,
tools: list[dict[str, Any]] | None,
tool_choice: Any,
) -> str:
filtered: list[tuple[str, str]] = []
for message in messages:
role = str(message.get("role") or "").strip().lower()
text = str(message.get("content") or "").strip()
if role == "tool":
text = action_output_prompt(message.get("tool_call_id"), text)
role = "user"
if not text:
continue
if role not in {"user", "assistant"}:
continue
filtered.append((role, text))
if not filtered:
return system_text.strip()
em_tools = _em_extract_anthropic_tools(tools)
em_choice = _em_extract_anthropic_tool_choice(tool_choice)
injected_system = inject_tooling(system_text, em_tools, em_choice)
parts: list[str] = []
for role, text in filtered:
label = "User" if role == "user" else "Assistant"
parts.append(f"{label}: {text}")
if injected_system:
parts.append(injected_system)
parts.append("Assistant:")
return "\n\n".join(parts).strip()
def _include_usage(stream_options: dict | None) -> bool:
if not isinstance(stream_options, dict):
return False
@@ -525,6 +617,20 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
is_reply = execution.is_reply
include_usage = _include_usage(req.stream_options)
em_tools = _em_extract_openai_tools(req.tools)
em_choice = _em_extract_openai_tool_choice(req.tool_choice)
if _em_has_tool_request(em_tools, em_choice):
system_parts = [
flatten_content(m.content)
for m in req.messages
if m.role in {"system", "developer"} and flatten_content(m.content)
]
prompt = _messages_to_emulation_prompt(
messages_dump,
system_text="\n\n".join(system_parts),
tools=req.tools,
tool_choice=req.tool_choice,
)
try:
started = await start_execution(
@@ -709,6 +815,11 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
merged_text,
forced_tool_name=forced_tool_name,
)
if inferred is None:
inferred = _extract_hash_tool_call_event_from_text(
merged_text,
forced_tool_name=forced_tool_name,
)
if inferred is None:
inferred = _forced_tool_fallback_event(
merged_text,
@@ -747,6 +858,72 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
if buffered_text_parts and forced_tool_name and saw_tool_call:
buffered_text_parts.clear()
if buffered_text_parts and req.tools and not saw_tool_call:
merged_text = "".join(buffered_text_parts)
inferred = _infer_tool_event_from_declared_tools(
merged_text,
tools=req.tools,
)
if inferred is not None:
tool_id = "call_inferred_0"
tool_call_indexes[tool_id] = 0
saw_tool_call = True
payload = {
"id": completion_id,
"object": "chat.completion.chunk",
"created": created,
"model": model,
"choices": [
{
"index": 0,
"delta": {
"tool_calls": [
{
"index": 0,
**_openai_tool_call(
inferred, forced_id=tool_id
),
}
]
},
"finish_reason": None,
}
],
}
buffered_text_parts.clear()
yield f"data: {json.dumps(payload, ensure_ascii=False)}\n\n"
if buffered_text_parts and req.tools and not saw_tool_call:
merged_text = "".join(buffered_text_parts)
parsed_calls, remaining = parse_action_blocks(merged_text, em_tools)
if parsed_calls:
saw_tool_call = True
for i, call in enumerate(parsed_calls):
tool_id = call.id or f"call_inferred_{i}"
tool_call_indexes[tool_id] = i
payload = {
"id": completion_id,
"object": "chat.completion.chunk",
"created": created,
"model": model,
"choices": [
{
"index": 0,
"delta": {
"tool_calls": [
{
"index": i,
**openai_tool_call_from_emulated(call),
}
]
},
"finish_reason": None,
}
],
}
yield f"data: {json.dumps(payload, ensure_ascii=False)}\n\n"
buffered_text_parts = [remaining] if remaining else []
done_payload = {
"id": completion_id,
"object": "chat.completion.chunk",
@@ -866,6 +1043,11 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
message_content,
forced_tool_name=forced_tool_name,
)
if inferred is None:
inferred = _extract_hash_tool_call_event_from_text(
message_content,
forced_tool_name=forced_tool_name,
)
if inferred is None:
inferred = _forced_tool_fallback_event(
message_content,
@@ -878,6 +1060,59 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
)
saw_tool_call = True
message_content = ""
if not saw_tool_call and req.tools:
inferred = _infer_tool_event_from_declared_tools(
message_content,
tools=req.tools,
)
if inferred is not None:
tool_calls.append(
_openai_tool_call(inferred, forced_id="call_inferred_0")
)
saw_tool_call = True
message_content = ""
if not saw_tool_call and em_tools:
parsed_calls, remaining = parse_action_blocks(message_content, em_tools)
if parsed_calls:
for call in parsed_calls:
tool_calls.append(openai_tool_call_from_emulated(call))
saw_tool_call = True
message_content = remaining
if not saw_tool_call and em_tools:
inferred_call = infer_declared_tool_call_from_text(message_content, em_tools)
if inferred_call is None:
inferred_calls = infer_tool_calls_from_text(message_content, em_tools)
inferred_call = inferred_calls[0] if inferred_calls else None
if inferred_call is not None:
tool_calls.append(openai_tool_call_from_emulated(inferred_call))
saw_tool_call = True
message_content = ""
if not saw_tool_call and em_tools:
retry_prompt = f"{prompt}\n\n{force_tooling_prompt(em_choice)}"
retry_result = await inst.client.chat_complete(
retry_prompt,
model,
ask_mode,
session_id=None,
is_reply=False,
tool_config=tool_config,
)
retry_text = retry_result.get("text") or ""
parsed_calls, remaining = parse_action_blocks(retry_text, em_tools)
if parsed_calls:
for call in parsed_calls:
tool_calls.append(openai_tool_call_from_emulated(call))
saw_tool_call = True
message_content = remaining
else:
inferred_call = infer_declared_tool_call_from_text(retry_text, em_tools)
if inferred_call is None:
inferred_calls = infer_tool_calls_from_text(retry_text, em_tools)
inferred_call = inferred_calls[0] if inferred_calls else None
if inferred_call is not None:
tool_calls.append(openai_tool_call_from_emulated(inferred_call))
saw_tool_call = True
message_content = ""
response = ChatCompletionResponse(
id=f"chatcmpl-{uuid.uuid4().hex}",
created=int(time.time()),
@@ -1049,6 +1284,16 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
model = execution.model
prompt = execution.prompt
is_reply = execution.is_reply
em_anthropic_tools = _em_extract_anthropic_tools(req.tools)
em_anthropic_choice = _em_extract_anthropic_tool_choice(req.tool_choice)
if _em_has_tool_request(em_anthropic_tools, em_anthropic_choice):
system_text = flatten_anthropic_content(req.system) if req.system else ""
prompt = _anthropic_messages_to_emulation_prompt(
messages_dump,
system_text=system_text,
tools=req.tools,
tool_choice=req.tool_choice,
)
try:
started = await start_execution(
@@ -1090,12 +1335,14 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
stream_meta: dict = {}
max_tokens = req.max_tokens
forced_tool_name = _anthropic_forced_tool_name(req.tool_choice)
aggregate_emulated_tools = bool(em_anthropic_tools)
async def event_stream(_ticket=ticket, _inst=inst, _meta=stream_meta):
success = False
block_index = 0
text_block_open = False
saw_pending_tool_use = False
buffered_text_parts: list[str] = []
try:
# 1) message_start — Anthropic SDKs read this first to get
# the message envelope (id/model/initial usage).
@@ -1196,7 +1443,78 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
text = _stream_text(chunk)
if not text:
continue
completion_tokens_holder["n"] += estimate_tokens(text)
if aggregate_emulated_tools:
buffered_text_parts.append(text)
completion_tokens_holder["n"] += estimate_tokens(text)
continue
buffered_text_parts.append(text)
merged_text = "".join(buffered_text_parts)
parsed_calls, remaining = parse_action_blocks(
merged_text, em_anthropic_tools
)
if not parsed_calls:
inferred = infer_declared_tool_call_from_text(
merged_text,
em_anthropic_tools,
)
if inferred is None:
inferred_calls = infer_tool_calls_from_text(
merged_text,
em_anthropic_tools,
)
inferred = inferred_calls[0] if inferred_calls else None
if inferred is not None:
parsed_calls = [inferred]
remaining = ""
if parsed_calls:
if text_block_open:
yield _sse(
"content_block_stop",
{"type": "content_block_stop", "index": block_index},
)
block_index += 1
text_block_open = False
saw_pending_tool_use = True
for call in parsed_calls:
yield _sse(
"content_block_start",
{
"type": "content_block_start",
"index": block_index,
"content_block": {
"type": "tool_use",
"id": call.id,
"name": call.name,
"input": {},
},
},
)
yield _sse(
"content_block_delta",
{
"type": "content_block_delta",
"index": block_index,
"delta": {
"type": "input_json_delta",
"partial_json": json.dumps(call.arguments, ensure_ascii=False),
},
},
)
yield _sse(
"content_block_stop",
{"type": "content_block_stop", "index": block_index},
)
block_index += 1
buffered_text_parts = [remaining] if remaining else []
if not buffered_text_parts:
continue
text_to_emit = "".join(buffered_text_parts)
buffered_text_parts.clear()
completion_tokens_holder["n"] += estimate_tokens(text_to_emit)
if not text_block_open:
yield _sse(
"content_block_start",
@@ -1213,10 +1531,106 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
{
"type": "content_block_delta",
"index": block_index,
"delta": {"type": "text_delta", "text": text},
"delta": {"type": "text_delta", "text": text_to_emit},
},
)
if aggregate_emulated_tools:
merged_text = "".join(buffered_text_parts)
parsed_calls, remaining = parse_action_blocks(
merged_text, em_anthropic_tools
)
if not parsed_calls:
inferred = infer_declared_tool_call_from_text(
merged_text,
em_anthropic_tools,
)
if inferred is None:
inferred_calls = infer_tool_calls_from_text(
merged_text,
em_anthropic_tools,
)
inferred = inferred_calls[0] if inferred_calls else None
if inferred is not None:
parsed_calls = [inferred]
remaining = ""
if parsed_calls:
if remaining.strip():
yield _sse(
"content_block_start",
{
"type": "content_block_start",
"index": block_index,
"content_block": {"type": "text", "text": ""},
},
)
yield _sse(
"content_block_delta",
{
"type": "content_block_delta",
"index": block_index,
"delta": {"type": "text_delta", "text": remaining},
},
)
yield _sse(
"content_block_stop",
{"type": "content_block_stop", "index": block_index},
)
block_index += 1
for call in parsed_calls:
saw_pending_tool_use = True
yield _sse(
"content_block_start",
{
"type": "content_block_start",
"index": block_index,
"content_block": {
"type": "tool_use",
"id": call.id,
"name": call.name,
"input": {},
},
},
)
yield _sse(
"content_block_delta",
{
"type": "content_block_delta",
"index": block_index,
"delta": {
"type": "input_json_delta",
"partial_json": json.dumps(call.arguments, ensure_ascii=False),
},
},
)
yield _sse(
"content_block_stop",
{"type": "content_block_stop", "index": block_index},
)
block_index += 1
elif merged_text.strip():
yield _sse(
"content_block_start",
{
"type": "content_block_start",
"index": block_index,
"content_block": {"type": "text", "text": ""},
},
)
yield _sse(
"content_block_delta",
{
"type": "content_block_delta",
"index": block_index,
"delta": {"type": "text_delta", "text": merged_text},
},
)
yield _sse(
"content_block_stop",
{"type": "content_block_stop", "index": block_index},
)
if text_block_open:
yield _sse(
"content_block_stop",
@@ -1323,6 +1737,135 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
else:
saw_pending_tool_use = True
if not saw_tool_event and em_anthropic_tools:
parsed_calls, remaining = parse_action_blocks(text, em_anthropic_tools)
if parsed_calls:
content_blocks = []
if remaining:
content_blocks.append({"type": "text", "text": remaining})
for call in parsed_calls:
content_blocks.append(
{
"type": "tool_use",
"id": call.id,
"name": call.name,
"input": call.arguments,
}
)
saw_tool_event = True
saw_pending_tool_use = True
text = remaining
if not saw_tool_event and em_anthropic_tools:
inferred_call = infer_declared_tool_call_from_text(text, em_anthropic_tools)
if inferred_call is None:
inferred_calls = infer_tool_calls_from_text(text, em_anthropic_tools)
inferred_call = inferred_calls[0] if inferred_calls else None
if inferred_call is not None:
content_blocks = [
{
"type": "tool_use",
"id": inferred_call.id,
"name": inferred_call.name,
"input": inferred_call.arguments,
}
]
saw_tool_event = True
saw_pending_tool_use = True
text = ""
if not saw_tool_event and em_anthropic_tools:
retry_prompt = f"{prompt}\n\n{force_tooling_prompt(em_anthropic_choice)}"
retry_result = await inst.client.chat_complete(
retry_prompt,
model,
ask_mode,
session_id=None,
is_reply=False,
tool_config=tool_config,
)
retry_text = retry_result.get("text") or ""
parsed_calls, remaining = parse_action_blocks(retry_text, em_anthropic_tools)
if parsed_calls:
content_blocks = []
if remaining:
content_blocks.append({"type": "text", "text": remaining})
for call in parsed_calls:
content_blocks.append(
{
"type": "tool_use",
"id": call.id,
"name": call.name,
"input": call.arguments,
}
)
saw_tool_event = True
saw_pending_tool_use = True
text = remaining
else:
inferred_call = infer_declared_tool_call_from_text(retry_text, em_anthropic_tools)
if inferred_call is None:
inferred_calls = infer_tool_calls_from_text(retry_text, em_anthropic_tools)
inferred_call = inferred_calls[0] if inferred_calls else None
if inferred_call is not None:
content_blocks = [
{
"type": "tool_use",
"id": inferred_call.id,
"name": inferred_call.name,
"input": inferred_call.arguments,
}
]
saw_tool_event = True
saw_pending_tool_use = True
text = ""
if not saw_tool_event and em_anthropic_tools and text.strip():
retry_prompt = f"{prompt}\n\n{force_tooling_prompt(em_anthropic_choice)}"
retry_result = await inst.client.chat_complete(
retry_prompt,
model,
ask_mode,
session_id=None,
is_reply=False,
tool_config=tool_config,
)
retry_text = retry_result.get("text") or ""
parsed_calls, remaining = parse_action_blocks(retry_text, em_anthropic_tools)
if parsed_calls:
content_blocks = []
if remaining:
content_blocks.append({"type": "text", "text": remaining})
for call in parsed_calls:
content_blocks.append(
{
"type": "tool_use",
"id": call.id,
"name": call.name,
"input": call.arguments,
}
)
saw_tool_event = True
saw_pending_tool_use = True
text = remaining
else:
inferred_call = infer_declared_tool_call_from_text(retry_text, em_anthropic_tools)
if inferred_call is None:
inferred_calls = infer_tool_calls_from_text(retry_text, em_anthropic_tools)
inferred_call = inferred_calls[0] if inferred_calls else None
if inferred_call is not None:
content_blocks = [
{
"type": "tool_use",
"id": inferred_call.id,
"name": inferred_call.name,
"input": inferred_call.arguments,
}
]
saw_tool_event = True
saw_pending_tool_use = True
text = ""
if not saw_tool_event and forced_tool_name:
inferred = _extract_function_call_event_from_text(
text,