feat: add emulated tool-calling bridge for Lingma

Add a proxy-side tool emulation layer so Lingma requests can surface stable OpenAI tool_calls and Anthropic tool_use blocks even when upstream tool events are missing or inconsistent. Constraint: Keep native Lingma tool event bridging as the first path and layer emulation as a fallback Rejected: Depend exclusively on Lingma native tool/invoke events | tool visibility remains inconsistent across models and transports Confidence: high Scope-risk: moderate
2026-05-07 18:10:01 +08:00
parent 5911e4322e
commit 94a8025ae5
11 changed files with 1808 additions and 4 deletions
--- a/app/main.py
+++ b/app/main.py
@@ -36,6 +36,20 @@ from .http.execution_core import (
    release_execution,
    start_execution,
 )
+from .http.tool_emulation import (
+    action_output_prompt,
+    extract_anthropic_tool_choice as _em_extract_anthropic_tool_choice,
+    extract_anthropic_tools as _em_extract_anthropic_tools,
+    extract_openai_tool_choice as _em_extract_openai_tool_choice,
+    extract_openai_tools as _em_extract_openai_tools,
+    force_tooling_prompt,
+    has_tool_request as _em_has_tool_request,
+    infer_declared_tool_call_from_text,
+    infer_tool_calls_from_text,
+    inject_tooling,
+    openai_tool_call_from_emulated,
+    parse_action_blocks,
+)
 from .http.openai_responses import handle_responses
 from .http.tool_bridge import (
    _allowed_stream_tool_event,
@@ -44,8 +58,10 @@ from .http.tool_bridge import (
    _anthropic_tool_result_block,
    _anthropic_tool_use_block,
    _extract_function_call_event_from_text,
+    _extract_hash_tool_call_event_from_text,
    _extract_tool_calls_from_text,
    _forced_tool_fallback_event,
+    _infer_tool_event_from_declared_tools,
    _json_string,
    _openai_forced_tool_name,
    _openai_tool_call,
@@ -417,6 +433,82 @@ def _messages_to_prompt(messages: list[dict]) -> str:
    return "\n".join(parts).strip()


+def _messages_to_emulation_prompt(
+    messages: list[dict[str, Any]],
+    *,
+    system_text: str,
+    tools: list[dict[str, Any]] | None,
+    tool_choice: Any,
+) -> str:
+    filtered: list[tuple[str, str]] = []
+    for message in messages:
+        role = str(message.get("role") or "").strip().lower()
+        if role in {"system", "developer"}:
+            continue
+        text = flatten_content(message.get("content"))
+        if role == "tool":
+            text = action_output_prompt(message.get("tool_call_id"), text)
+            role = "user"
+        if not text:
+            continue
+        if role not in {"user", "assistant"}:
+            continue
+        filtered.append((role, text))
+
+    if not filtered:
+        return system_text.strip()
+
+    em_tools = _em_extract_openai_tools(tools)
+    em_choice = _em_extract_openai_tool_choice(tool_choice)
+    injected_system = inject_tooling(system_text, em_tools, em_choice)
+
+    parts: list[str] = []
+    for role, text in filtered:
+        label = "User" if role == "user" else "Assistant"
+        parts.append(f"{label}: {text}")
+    if injected_system:
+        parts.append(injected_system)
+    parts.append("Assistant:")
+    return "\n\n".join(parts).strip()
+
+
+def _anthropic_messages_to_emulation_prompt(
+    messages: list[dict[str, Any]],
+    *,
+    system_text: str,
+    tools: list[dict[str, Any]] | None,
+    tool_choice: Any,
+) -> str:
+    filtered: list[tuple[str, str]] = []
+    for message in messages:
+        role = str(message.get("role") or "").strip().lower()
+        text = str(message.get("content") or "").strip()
+        if role == "tool":
+            text = action_output_prompt(message.get("tool_call_id"), text)
+            role = "user"
+        if not text:
+            continue
+        if role not in {"user", "assistant"}:
+            continue
+        filtered.append((role, text))
+
+    if not filtered:
+        return system_text.strip()
+
+    em_tools = _em_extract_anthropic_tools(tools)
+    em_choice = _em_extract_anthropic_tool_choice(tool_choice)
+    injected_system = inject_tooling(system_text, em_tools, em_choice)
+
+    parts: list[str] = []
+    for role, text in filtered:
+        label = "User" if role == "user" else "Assistant"
+        parts.append(f"{label}: {text}")
+    if injected_system:
+        parts.append(injected_system)
+    parts.append("Assistant:")
+    return "\n\n".join(parts).strip()
+
+
 def _include_usage(stream_options: dict | None) -> bool:
    if not isinstance(stream_options, dict):
        return False
@@ -525,6 +617,20 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
    is_reply = execution.is_reply

    include_usage = _include_usage(req.stream_options)
+    em_tools = _em_extract_openai_tools(req.tools)
+    em_choice = _em_extract_openai_tool_choice(req.tool_choice)
+    if _em_has_tool_request(em_tools, em_choice):
+        system_parts = [
+            flatten_content(m.content)
+            for m in req.messages
+            if m.role in {"system", "developer"} and flatten_content(m.content)
+        ]
+        prompt = _messages_to_emulation_prompt(
+            messages_dump,
+            system_text="\n\n".join(system_parts),
+            tools=req.tools,
+            tool_choice=req.tool_choice,
+        )

    try:
        started = await start_execution(
@@ -709,6 +815,11 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
                            merged_text,
                            forced_tool_name=forced_tool_name,
                        )
+                        if inferred is None:
+                            inferred = _extract_hash_tool_call_event_from_text(
+                                merged_text,
+                                forced_tool_name=forced_tool_name,
+                            )
                        if inferred is None:
                            inferred = _forced_tool_fallback_event(
                                merged_text,
@@ -747,6 +858,72 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
                    if buffered_text_parts and forced_tool_name and saw_tool_call:
                        buffered_text_parts.clear()

+                    if buffered_text_parts and req.tools and not saw_tool_call:
+                        merged_text = "".join(buffered_text_parts)
+                        inferred = _infer_tool_event_from_declared_tools(
+                            merged_text,
+                            tools=req.tools,
+                        )
+                        if inferred is not None:
+                            tool_id = "call_inferred_0"
+                            tool_call_indexes[tool_id] = 0
+                            saw_tool_call = True
+                            payload = {
+                                "id": completion_id,
+                                "object": "chat.completion.chunk",
+                                "created": created,
+                                "model": model,
+                                "choices": [
+                                    {
+                                        "index": 0,
+                                        "delta": {
+                                            "tool_calls": [
+                                                {
+                                                    "index": 0,
+                                                    **_openai_tool_call(
+                                                        inferred, forced_id=tool_id
+                                                    ),
+                                                }
+                                            ]
+                                        },
+                                        "finish_reason": None,
+                                    }
+                                ],
+                            }
+                            buffered_text_parts.clear()
+                            yield f"data: {json.dumps(payload, ensure_ascii=False)}\n\n"
+
+                    if buffered_text_parts and req.tools and not saw_tool_call:
+                        merged_text = "".join(buffered_text_parts)
+                        parsed_calls, remaining = parse_action_blocks(merged_text, em_tools)
+                        if parsed_calls:
+                            saw_tool_call = True
+                            for i, call in enumerate(parsed_calls):
+                                tool_id = call.id or f"call_inferred_{i}"
+                                tool_call_indexes[tool_id] = i
+                                payload = {
+                                    "id": completion_id,
+                                    "object": "chat.completion.chunk",
+                                    "created": created,
+                                    "model": model,
+                                    "choices": [
+                                        {
+                                            "index": 0,
+                                            "delta": {
+                                                "tool_calls": [
+                                                    {
+                                                        "index": i,
+                                                        **openai_tool_call_from_emulated(call),
+                                                    }
+                                                ]
+                                            },
+                                            "finish_reason": None,
+                                        }
+                                    ],
+                                }
+                                yield f"data: {json.dumps(payload, ensure_ascii=False)}\n\n"
+                            buffered_text_parts = [remaining] if remaining else []
+
                    done_payload = {
                        "id": completion_id,
                        "object": "chat.completion.chunk",
@@ -866,6 +1043,11 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
                message_content,
                forced_tool_name=forced_tool_name,
            )
+            if inferred is None:
+                inferred = _extract_hash_tool_call_event_from_text(
+                    message_content,
+                    forced_tool_name=forced_tool_name,
+                )
            if inferred is None:
                inferred = _forced_tool_fallback_event(
                    message_content,
@@ -878,6 +1060,59 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
                )
                saw_tool_call = True
                message_content = ""
+        if not saw_tool_call and req.tools:
+            inferred = _infer_tool_event_from_declared_tools(
+                message_content,
+                tools=req.tools,
+            )
+            if inferred is not None:
+                tool_calls.append(
+                    _openai_tool_call(inferred, forced_id="call_inferred_0")
+                )
+                saw_tool_call = True
+                message_content = ""
+        if not saw_tool_call and em_tools:
+            parsed_calls, remaining = parse_action_blocks(message_content, em_tools)
+            if parsed_calls:
+                for call in parsed_calls:
+                    tool_calls.append(openai_tool_call_from_emulated(call))
+                saw_tool_call = True
+                message_content = remaining
+        if not saw_tool_call and em_tools:
+            inferred_call = infer_declared_tool_call_from_text(message_content, em_tools)
+            if inferred_call is None:
+                inferred_calls = infer_tool_calls_from_text(message_content, em_tools)
+                inferred_call = inferred_calls[0] if inferred_calls else None
+            if inferred_call is not None:
+                tool_calls.append(openai_tool_call_from_emulated(inferred_call))
+                saw_tool_call = True
+                message_content = ""
+        if not saw_tool_call and em_tools:
+            retry_prompt = f"{prompt}\n\n{force_tooling_prompt(em_choice)}"
+            retry_result = await inst.client.chat_complete(
+                retry_prompt,
+                model,
+                ask_mode,
+                session_id=None,
+                is_reply=False,
+                tool_config=tool_config,
+            )
+            retry_text = retry_result.get("text") or ""
+            parsed_calls, remaining = parse_action_blocks(retry_text, em_tools)
+            if parsed_calls:
+                for call in parsed_calls:
+                    tool_calls.append(openai_tool_call_from_emulated(call))
+                saw_tool_call = True
+                message_content = remaining
+            else:
+                inferred_call = infer_declared_tool_call_from_text(retry_text, em_tools)
+                if inferred_call is None:
+                    inferred_calls = infer_tool_calls_from_text(retry_text, em_tools)
+                    inferred_call = inferred_calls[0] if inferred_calls else None
+                if inferred_call is not None:
+                    tool_calls.append(openai_tool_call_from_emulated(inferred_call))
+                    saw_tool_call = True
+                    message_content = ""
        response = ChatCompletionResponse(
            id=f"chatcmpl-{uuid.uuid4().hex}",
            created=int(time.time()),
@@ -1049,6 +1284,16 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
    model = execution.model
    prompt = execution.prompt
    is_reply = execution.is_reply
+    em_anthropic_tools = _em_extract_anthropic_tools(req.tools)
+    em_anthropic_choice = _em_extract_anthropic_tool_choice(req.tool_choice)
+    if _em_has_tool_request(em_anthropic_tools, em_anthropic_choice):
+        system_text = flatten_anthropic_content(req.system) if req.system else ""
+        prompt = _anthropic_messages_to_emulation_prompt(
+            messages_dump,
+            system_text=system_text,
+            tools=req.tools,
+            tool_choice=req.tool_choice,
+        )

    try:
        started = await start_execution(
@@ -1090,12 +1335,14 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
            stream_meta: dict = {}
            max_tokens = req.max_tokens
            forced_tool_name = _anthropic_forced_tool_name(req.tool_choice)
+            aggregate_emulated_tools = bool(em_anthropic_tools)

            async def event_stream(_ticket=ticket, _inst=inst, _meta=stream_meta):
                success = False
                block_index = 0
                text_block_open = False
                saw_pending_tool_use = False
+                buffered_text_parts: list[str] = []
                try:
                    # 1) message_start — Anthropic SDKs read this first to get
                    #    the message envelope (id/model/initial usage).
@@ -1196,7 +1443,78 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
                        text = _stream_text(chunk)
                        if not text:
                            continue
-                        completion_tokens_holder["n"] += estimate_tokens(text)
+                        if aggregate_emulated_tools:
+                            buffered_text_parts.append(text)
+                            completion_tokens_holder["n"] += estimate_tokens(text)
+                            continue
+
+                        buffered_text_parts.append(text)
+                        merged_text = "".join(buffered_text_parts)
+
+                        parsed_calls, remaining = parse_action_blocks(
+                            merged_text, em_anthropic_tools
+                        )
+                        if not parsed_calls:
+                            inferred = infer_declared_tool_call_from_text(
+                                merged_text,
+                                em_anthropic_tools,
+                            )
+                            if inferred is None:
+                                inferred_calls = infer_tool_calls_from_text(
+                                    merged_text,
+                                    em_anthropic_tools,
+                                )
+                                inferred = inferred_calls[0] if inferred_calls else None
+                            if inferred is not None:
+                                parsed_calls = [inferred]
+                                remaining = ""
+
+                        if parsed_calls:
+                            if text_block_open:
+                                yield _sse(
+                                    "content_block_stop",
+                                    {"type": "content_block_stop", "index": block_index},
+                                )
+                                block_index += 1
+                                text_block_open = False
+                            saw_pending_tool_use = True
+                            for call in parsed_calls:
+                                yield _sse(
+                                    "content_block_start",
+                                    {
+                                        "type": "content_block_start",
+                                        "index": block_index,
+                                        "content_block": {
+                                            "type": "tool_use",
+                                            "id": call.id,
+                                            "name": call.name,
+                                            "input": {},
+                                        },
+                                    },
+                                )
+                                yield _sse(
+                                    "content_block_delta",
+                                    {
+                                        "type": "content_block_delta",
+                                        "index": block_index,
+                                        "delta": {
+                                            "type": "input_json_delta",
+                                            "partial_json": json.dumps(call.arguments, ensure_ascii=False),
+                                        },
+                                    },
+                                )
+                                yield _sse(
+                                    "content_block_stop",
+                                    {"type": "content_block_stop", "index": block_index},
+                                )
+                                block_index += 1
+                            buffered_text_parts = [remaining] if remaining else []
+                            if not buffered_text_parts:
+                                continue
+
+                        text_to_emit = "".join(buffered_text_parts)
+                        buffered_text_parts.clear()
+                        completion_tokens_holder["n"] += estimate_tokens(text_to_emit)
                        if not text_block_open:
                            yield _sse(
                                "content_block_start",
@@ -1213,10 +1531,106 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
                            {
                                "type": "content_block_delta",
                                "index": block_index,
-                                "delta": {"type": "text_delta", "text": text},
+                                "delta": {"type": "text_delta", "text": text_to_emit},
                            },
                        )

+                    if aggregate_emulated_tools:
+                        merged_text = "".join(buffered_text_parts)
+                        parsed_calls, remaining = parse_action_blocks(
+                            merged_text, em_anthropic_tools
+                        )
+                        if not parsed_calls:
+                            inferred = infer_declared_tool_call_from_text(
+                                merged_text,
+                                em_anthropic_tools,
+                            )
+                            if inferred is None:
+                                inferred_calls = infer_tool_calls_from_text(
+                                    merged_text,
+                                    em_anthropic_tools,
+                                )
+                                inferred = inferred_calls[0] if inferred_calls else None
+                            if inferred is not None:
+                                parsed_calls = [inferred]
+                                remaining = ""
+
+                        if parsed_calls:
+                            if remaining.strip():
+                                yield _sse(
+                                    "content_block_start",
+                                    {
+                                        "type": "content_block_start",
+                                        "index": block_index,
+                                        "content_block": {"type": "text", "text": ""},
+                                    },
+                                )
+                                yield _sse(
+                                    "content_block_delta",
+                                    {
+                                        "type": "content_block_delta",
+                                        "index": block_index,
+                                        "delta": {"type": "text_delta", "text": remaining},
+                                    },
+                                )
+                                yield _sse(
+                                    "content_block_stop",
+                                    {"type": "content_block_stop", "index": block_index},
+                                )
+                                block_index += 1
+                            for call in parsed_calls:
+                                saw_pending_tool_use = True
+                                yield _sse(
+                                    "content_block_start",
+                                    {
+                                        "type": "content_block_start",
+                                        "index": block_index,
+                                        "content_block": {
+                                            "type": "tool_use",
+                                            "id": call.id,
+                                            "name": call.name,
+                                            "input": {},
+                                        },
+                                    },
+                                )
+                                yield _sse(
+                                    "content_block_delta",
+                                    {
+                                        "type": "content_block_delta",
+                                        "index": block_index,
+                                        "delta": {
+                                            "type": "input_json_delta",
+                                            "partial_json": json.dumps(call.arguments, ensure_ascii=False),
+                                        },
+                                    },
+                                )
+                                yield _sse(
+                                    "content_block_stop",
+                                    {"type": "content_block_stop", "index": block_index},
+                                )
+                                block_index += 1
+                        elif merged_text.strip():
+                            yield _sse(
+                                "content_block_start",
+                                {
+                                    "type": "content_block_start",
+                                    "index": block_index,
+                                    "content_block": {"type": "text", "text": ""},
+                                },
+                            )
+                            yield _sse(
+                                "content_block_delta",
+                                {
+                                    "type": "content_block_delta",
+                                    "index": block_index,
+                                    "delta": {"type": "text_delta", "text": merged_text},
+                                },
+                            )
+                            yield _sse(
+                                "content_block_stop",
+                                {"type": "content_block_stop", "index": block_index},
+                            )
+
                    if text_block_open:
                        yield _sse(
                            "content_block_stop",
@@ -1323,6 +1737,135 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
            else:
                saw_pending_tool_use = True

+        if not saw_tool_event and em_anthropic_tools:
+            parsed_calls, remaining = parse_action_blocks(text, em_anthropic_tools)
+            if parsed_calls:
+                content_blocks = []
+                if remaining:
+                    content_blocks.append({"type": "text", "text": remaining})
+                for call in parsed_calls:
+                    content_blocks.append(
+                        {
+                            "type": "tool_use",
+                            "id": call.id,
+                            "name": call.name,
+                            "input": call.arguments,
+                        }
+                    )
+                saw_tool_event = True
+                saw_pending_tool_use = True
+                text = remaining
+
+        if not saw_tool_event and em_anthropic_tools:
+            inferred_call = infer_declared_tool_call_from_text(text, em_anthropic_tools)
+            if inferred_call is None:
+                inferred_calls = infer_tool_calls_from_text(text, em_anthropic_tools)
+                inferred_call = inferred_calls[0] if inferred_calls else None
+            if inferred_call is not None:
+                content_blocks = [
+                    {
+                        "type": "tool_use",
+                        "id": inferred_call.id,
+                        "name": inferred_call.name,
+                        "input": inferred_call.arguments,
+                    }
+                ]
+                saw_tool_event = True
+                saw_pending_tool_use = True
+                text = ""
+
+        if not saw_tool_event and em_anthropic_tools:
+            retry_prompt = f"{prompt}\n\n{force_tooling_prompt(em_anthropic_choice)}"
+            retry_result = await inst.client.chat_complete(
+                retry_prompt,
+                model,
+                ask_mode,
+                session_id=None,
+                is_reply=False,
+                tool_config=tool_config,
+            )
+            retry_text = retry_result.get("text") or ""
+            parsed_calls, remaining = parse_action_blocks(retry_text, em_anthropic_tools)
+            if parsed_calls:
+                content_blocks = []
+                if remaining:
+                    content_blocks.append({"type": "text", "text": remaining})
+                for call in parsed_calls:
+                    content_blocks.append(
+                        {
+                            "type": "tool_use",
+                            "id": call.id,
+                            "name": call.name,
+                            "input": call.arguments,
+                        }
+                    )
+                saw_tool_event = True
+                saw_pending_tool_use = True
+                text = remaining
+            else:
+                inferred_call = infer_declared_tool_call_from_text(retry_text, em_anthropic_tools)
+                if inferred_call is None:
+                    inferred_calls = infer_tool_calls_from_text(retry_text, em_anthropic_tools)
+                    inferred_call = inferred_calls[0] if inferred_calls else None
+                if inferred_call is not None:
+                    content_blocks = [
+                        {
+                            "type": "tool_use",
+                            "id": inferred_call.id,
+                            "name": inferred_call.name,
+                            "input": inferred_call.arguments,
+                        }
+                    ]
+                    saw_tool_event = True
+                    saw_pending_tool_use = True
+                    text = ""
+
+        if not saw_tool_event and em_anthropic_tools and text.strip():
+            retry_prompt = f"{prompt}\n\n{force_tooling_prompt(em_anthropic_choice)}"
+            retry_result = await inst.client.chat_complete(
+                retry_prompt,
+                model,
+                ask_mode,
+                session_id=None,
+                is_reply=False,
+                tool_config=tool_config,
+            )
+            retry_text = retry_result.get("text") or ""
+            parsed_calls, remaining = parse_action_blocks(retry_text, em_anthropic_tools)
+            if parsed_calls:
+                content_blocks = []
+                if remaining:
+                    content_blocks.append({"type": "text", "text": remaining})
+                for call in parsed_calls:
+                    content_blocks.append(
+                        {
+                            "type": "tool_use",
+                            "id": call.id,
+                            "name": call.name,
+                            "input": call.arguments,
+                        }
+                    )
+                saw_tool_event = True
+                saw_pending_tool_use = True
+                text = remaining
+            else:
+                inferred_call = infer_declared_tool_call_from_text(retry_text, em_anthropic_tools)
+                if inferred_call is None:
+                    inferred_calls = infer_tool_calls_from_text(retry_text, em_anthropic_tools)
+                    inferred_call = inferred_calls[0] if inferred_calls else None
+                if inferred_call is not None:
+                    content_blocks = [
+                        {
+                            "type": "tool_use",
+                            "id": inferred_call.id,
+                            "name": inferred_call.name,
+                            "input": inferred_call.arguments,
+                        }
+                    ]
+                    saw_tool_event = True
+                    saw_pending_tool_use = True
+                    text = ""
+
        if not saw_tool_event and forced_tool_name:
            inferred = _extract_function_call_event_from_text(
                text,