refactor: share execution prep for tool-call phase

Keep the current tool-call bridge contract stable while extracting shared
execution setup and tightening Anthropic forwarding regressions.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
GitHub Actions
2026-04-22 07:37:00 +08:00
parent 4748432501
commit 5a7553b35b
5 changed files with 319 additions and 209 deletions

View File

@@ -52,10 +52,9 @@ class AnthropicMessagesRequest(BaseModel):
stop_sequences: list[str] | None = None
# metadata.user_id is the official hint for per-user routing / abuse tracking.
metadata: dict[str, Any] | None = None
# Tools / tool_choice are accepted but we can't forward them to Lingma yet —
# they're preserved here so the request doesn't 422, and the flattener
# surfaces any tool_use blocks as `[tool_use] {...}` text so the assistant
# still sees the context.
# Tools / tool_choice are accepted for compatibility and, when forwarding is
# enabled, are passed upstream as tool_config; tool_use / tool_result blocks
# are still flattened into text so the assistant can see prior tool context.
tools: list[dict[str, Any]] | None = None
tool_choice: dict[str, Any] | None = None

148
app/http/execution_core.py Normal file
View File

@@ -0,0 +1,148 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import Any, Awaitable, Callable
from ..lingma_pool import LingmaPool, PoolInstance
from ..model_map import build_model_name_map, flatten_model_keys, resolve_model
from ..session_cache import SessionCache, hash_branch_context
@dataclass
class ExecutionContext:
ask_mode: str
lookup_key: str | None
write_key: str | None
cached_session_id: str | None
inst: PoolInstance
model: str
prompt: str
is_reply: bool
affinity: str | None
def _resolve_ask_mode(model: str, has_tooling_context: bool, *, default_ask_mode: str) -> str:
model_name = (model or "").lower()
if model_name in {"lingma-agent", "agent"} or has_tooling_context:
return "agent"
return default_ask_mode
async def _apply_cached_instance_or_invalidate(
*,
protocol: str,
logger: Any,
session_cache: SessionCache,
inst: PoolInstance,
cached_instance_name: str | None,
cached_session_id: str | None,
lookup_key: str | None,
) -> str | None:
if cached_instance_name and inst.name != cached_instance_name:
logger.info(
"%s session cache instance %s unhealthy, falling back to %s",
protocol,
cached_instance_name,
inst.name,
)
if lookup_key:
await session_cache.invalidate(lookup_key)
return None
return cached_session_id
async def prepare_execution_context(
*,
protocol: str,
requested_model: str,
has_tooling_context: bool,
tool_config: dict[str, Any] | None,
messages_dump: list[dict[str, Any]],
api_key: str,
affinity_key: str | None,
pool: LingmaPool,
session_cache: SessionCache,
logger: Any,
default_model: str,
default_ask_mode: str,
ensure_instance_logged_in: Callable[[PoolInstance], Awaitable[Any]],
last_user_text: Callable[[list[dict[str, Any]]], str],
messages_to_prompt: Callable[[list[dict[str, Any]]], str],
) -> ExecutionContext:
ask_mode = _resolve_ask_mode(
requested_model,
has_tooling_context,
default_ask_mode=default_ask_mode,
)
reuse_eligible = (
session_cache.enabled
and ask_mode == "chat"
and len(messages_dump) >= 2
and not has_tooling_context
)
lookup_key: str | None = None
write_key: str | None = None
cached_session_id: str | None = None
cached_instance_name: str | None = None
if reuse_eligible:
prefix_branch_context = hash_branch_context(messages_dump[:-1])
lookup_key = session_cache.build_key(
api_key,
messages_dump[:-1],
tool_config=tool_config,
branch_context=prefix_branch_context,
)
write_key = session_cache.build_key(
api_key,
messages_dump,
tool_config=tool_config,
branch_context=hash_branch_context(messages_dump),
)
entry = await session_cache.get(lookup_key)
if entry is None:
legacy_lookup_key = session_cache.build_key(api_key, messages_dump[:-1], tool_config=tool_config)
entry = await session_cache.get(legacy_lookup_key)
if entry is not None:
lookup_key = legacy_lookup_key
if entry is not None:
cached_session_id = entry.session_id
cached_instance_name = entry.instance_name or None
affinity = cached_instance_name or affinity_key
inst = pool.pick(affinity_key=affinity)
cached_session_id = await _apply_cached_instance_or_invalidate(
protocol=protocol,
logger=logger,
session_cache=session_cache,
inst=inst,
cached_instance_name=cached_instance_name,
cached_session_id=cached_session_id,
lookup_key=lookup_key,
)
await ensure_instance_logged_in(inst)
models = await inst.client.query_models()
available = flatten_model_keys(models)
name_map = build_model_name_map(models)
model = resolve_model(requested_model, available, default_model, name_map)
if cached_session_id:
prompt = last_user_text(messages_dump)
is_reply = True
else:
prompt = messages_to_prompt(messages_dump)
is_reply = False
return ExecutionContext(
ask_mode=ask_mode,
lookup_key=lookup_key,
write_key=write_key,
cached_session_id=cached_session_id,
inst=inst,
model=model,
prompt=prompt,
is_reply=is_reply,
affinity=affinity,
)

View File

@@ -15,6 +15,49 @@ def _json_string(value: Any) -> str:
return "{}"
def _openai_tool_name(tool: Any) -> str | None:
if not isinstance(tool, dict):
return None
if tool.get("type") == "function":
fn = tool.get("function")
if isinstance(fn, dict):
name = fn.get("name")
if isinstance(name, str) and name.strip():
return name.strip()
name = tool.get("name")
if isinstance(name, str) and name.strip():
return name.strip()
return None
def _anthropic_tool_name(tool: Any) -> str | None:
if not isinstance(tool, dict):
return None
name = tool.get("name")
if isinstance(name, str) and name.strip():
return name.strip()
fn = tool.get("function")
if isinstance(fn, dict):
nested_name = fn.get("name")
if isinstance(nested_name, str) and nested_name.strip():
return nested_name.strip()
return None
def _tool_event_allowed(
tool_name: str,
tool_config: dict[str, Any] | None,
*,
forced_tool_name: str | None = None,
) -> bool:
if not (tool_config and isinstance(tool_config.get("tools"), list) and tool_config.get("tools")):
return True
for tool in tool_config.get("tools") or []:
if tool_name == _anthropic_tool_name(tool) or tool_name == _openai_tool_name(tool):
return True
return bool(forced_tool_name and tool_name == forced_tool_name)
def _openai_forced_tool_name(tool_choice: Any) -> str | None:
if not isinstance(tool_choice, dict):
return None

View File

@@ -25,6 +25,11 @@ from .auth import (
)
from .concurrency import BackpressureRejected, InFlightGuard
from .config import Settings, load_settings
from .http.execution_core import (
_apply_cached_instance_or_invalidate as _shared_apply_cached_instance_or_invalidate,
_resolve_ask_mode as _shared_resolve_ask_mode,
prepare_execution_context,
)
from .http.responses_adapter import (
_responses_id_from_chat_id,
_responses_input_to_messages,
@@ -35,6 +40,7 @@ from .http.responses_adapter import (
)
from .http.tool_bridge import (
_anthropic_forced_tool_name,
_anthropic_tool_name as _shared_anthropic_tool_name,
_anthropic_tool_result_block,
_anthropic_tool_use_block,
_forced_tool_event_from_text,
@@ -42,8 +48,10 @@ from .http.tool_bridge import (
_json_string,
_openai_forced_tool_name,
_openai_tool_call,
_openai_tool_name as _shared_openai_tool_name,
_tool_code_object_from_text,
_tool_code_single_arg_name,
_tool_event_allowed,
)
from .lingma_pool import LingmaPool, PoolInstance
from .logging_config import configure_logging, get_logger, request_id_var
@@ -383,32 +391,11 @@ def _tool_allowlist() -> set[str]:
def _openai_tool_name(tool: Any) -> str | None:
if not isinstance(tool, dict):
return None
if tool.get("type") == "function":
fn = tool.get("function")
if isinstance(fn, dict):
name = fn.get("name")
if isinstance(name, str) and name.strip():
return name.strip()
name = tool.get("name")
if isinstance(name, str) and name.strip():
return name.strip()
return None
return _shared_openai_tool_name(tool)
def _anthropic_tool_name(tool: Any) -> str | None:
if not isinstance(tool, dict):
return None
name = tool.get("name")
if isinstance(name, str) and name.strip():
return name.strip()
fn = tool.get("function")
if isinstance(fn, dict):
nested_name = fn.get("name")
if isinstance(nested_name, str) and nested_name.strip():
return nested_name.strip()
return None
return _shared_anthropic_tool_name(tool)
def _filter_allowed_tools(tools: list[dict[str, Any]], *, provider: str) -> list[dict[str, Any]]:
@@ -509,10 +496,11 @@ def _anthropic_has_tooling_context(req: AnthropicMessagesRequest) -> bool:
def _resolve_ask_mode(model: str, has_tooling_context: bool) -> str:
model_name = (model or "").lower()
if model_name in {"lingma-agent", "agent"} or has_tooling_context:
return "agent"
return settings.default_ask_mode
return _shared_resolve_ask_mode(
model,
has_tooling_context,
default_ask_mode=settings.default_ask_mode,
)
async def _apply_cached_instance_or_invalidate(
@@ -523,17 +511,15 @@ async def _apply_cached_instance_or_invalidate(
cached_session_id: str | None,
lookup_key: str | None,
) -> str | None:
if cached_instance_name and inst.name != cached_instance_name:
logger.info(
"%s session cache instance %s unhealthy, falling back to %s",
protocol,
cached_instance_name,
inst.name,
)
if lookup_key:
await session_cache.invalidate(lookup_key)
return None
return cached_session_id
return await _shared_apply_cached_instance_or_invalidate(
protocol=protocol,
logger=logger,
session_cache=session_cache,
inst=inst,
cached_instance_name=cached_instance_name,
cached_session_id=cached_session_id,
lookup_key=lookup_key,
)
@@ -588,68 +574,32 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
# 3. Stick the request to the pool instance that originally served it.
tool_config = _openai_tool_config(req)
has_tooling_context = _openai_has_tooling_context(req, messages_dump)
ask_mode = _resolve_ask_mode(req.model, has_tooling_context)
reuse_eligible = (
session_cache.enabled
and ask_mode == "chat"
and len(messages_dump) >= 2
and not has_tooling_context
)
lookup_key: str | None = None
write_key: str | None = None
cached_session_id: str | None = None
cached_instance_name: str | None = None
if reuse_eligible:
prefix_branch_context = hash_branch_context(messages_dump[:-1])
lookup_key = session_cache.build_key(
api_key,
messages_dump[:-1],
tool_config=tool_config,
branch_context=prefix_branch_context,
)
write_key = session_cache.build_key(
api_key,
messages_dump,
tool_config=tool_config,
branch_context=hash_branch_context(messages_dump),
)
entry = await session_cache.get(lookup_key)
if entry is None:
legacy_lookup_key = session_cache.build_key(api_key, messages_dump[:-1], tool_config=tool_config)
entry = await session_cache.get(legacy_lookup_key)
if entry is not None:
lookup_key = legacy_lookup_key
if entry is not None:
cached_session_id = entry.session_id
cached_instance_name = entry.instance_name or None
affinity = cached_instance_name or _affinity_key_for(req)
inst = p.pick(affinity_key=affinity)
cached_session_id = await _apply_cached_instance_or_invalidate(
execution = await prepare_execution_context(
protocol="chat",
inst=inst,
cached_instance_name=cached_instance_name,
cached_session_id=cached_session_id,
lookup_key=lookup_key,
requested_model=req.model,
has_tooling_context=has_tooling_context,
tool_config=tool_config,
messages_dump=messages_dump,
api_key=api_key,
affinity_key=_affinity_key_for(req),
pool=p,
session_cache=session_cache,
logger=logger,
default_model=settings.default_model,
default_ask_mode=settings.default_ask_mode,
ensure_instance_logged_in=_ensure_instance_logged_in,
last_user_text=_last_user_text,
messages_to_prompt=_messages_to_prompt,
)
await _ensure_instance_logged_in(inst)
models = await inst.client.query_models()
available = flatten_model_keys(models)
name_map = build_model_name_map(models)
model = resolve_model(req.model, available, settings.default_model, name_map)
# Prompt construction: on cache hit send only the last user turn so Lingma's
# stored context isn't duplicated.
if cached_session_id:
prompt = _last_user_text(messages_dump)
is_reply = True
else:
prompt = _messages_to_prompt(messages_dump)
is_reply = False
ask_mode = execution.ask_mode
lookup_key = execution.lookup_key
write_key = execution.write_key
cached_session_id = execution.cached_session_id
inst = execution.inst
model = execution.model
prompt = execution.prompt
is_reply = execution.is_reply
affinity = execution.affinity
if not prompt:
raise HTTPException(
@@ -748,16 +698,11 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
continue
tool_name = str(tool.get("name") or "")
allowed = True
if tool_config and isinstance(tool_config.get("tools"), list) and tool_config.get("tools"):
allowed = False
for t in tool_config.get("tools"):
if tool_name == _anthropic_tool_name(t) or tool_name == _openai_tool_name(t):
allowed = True
break
if not allowed and forced_tool_name and tool_name == forced_tool_name:
allowed = True
if not allowed:
if not _tool_event_allowed(
tool_name,
tool_config,
forced_tool_name=forced_tool_name,
):
continue
if buffered_text_parts:
@@ -956,16 +901,11 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
for idx, item in enumerate(tool_events):
if isinstance(item, dict):
tool_name = str(item.get("name") or "")
allowed = True
if tool_config and isinstance(tool_config.get("tools"), list) and tool_config.get("tools"):
allowed = False
for t in tool_config.get("tools"):
if tool_name == _anthropic_tool_name(t) or tool_name == _openai_tool_name(t):
allowed = True
break
if not allowed and forced_tool_name and tool_name == forced_tool_name:
allowed = True
if not allowed:
if not _tool_event_allowed(
tool_name,
tool_config,
forced_tool_name=forced_tool_name,
):
continue
tool_id = str(item.get("id") or f"call_{idx}")
@@ -1414,77 +1354,38 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
message = error.get("message") or str(detail) or "invalid tool configuration"
return _anthropic_error(exc.status_code, "invalid_request_error", message)
has_tooling_context = _anthropic_has_tooling_context(req)
ask_mode = _resolve_ask_mode(req.model, has_tooling_context)
reuse_eligible = (
session_cache.enabled and ask_mode == "chat" and len(messages_dump) >= 2 and not has_tooling_context
)
lookup_key: str | None = None
write_key: str | None = None
cached_session_id: str | None = None
cached_instance_name: str | None = None
if reuse_eligible:
prefix_branch_context = hash_branch_context(messages_dump[:-1])
lookup_key = session_cache.build_key(
api_key,
messages_dump[:-1],
tool_config=tool_config,
branch_context=prefix_branch_context,
)
write_key = session_cache.build_key(
api_key,
messages_dump,
tool_config=tool_config,
branch_context=hash_branch_context(messages_dump),
)
entry = await session_cache.get(lookup_key)
if entry is None:
legacy_lookup_key = session_cache.build_key(api_key, messages_dump[:-1], tool_config=tool_config)
entry = await session_cache.get(legacy_lookup_key)
if entry is not None:
lookup_key = legacy_lookup_key
if entry is not None:
cached_session_id = entry.session_id
cached_instance_name = entry.instance_name or None
affinity = cached_instance_name or affinity_key_for_anthropic(req)
inst = p.pick(affinity_key=affinity)
if cached_instance_name and inst.name != cached_instance_name:
logger.info(
"anthropic session cache instance %s unhealthy, falling back to %s",
cached_instance_name,
inst.name,
)
cached_session_id = None
if lookup_key:
await session_cache.invalidate(lookup_key)
try:
await _ensure_instance_logged_in(inst)
execution = await prepare_execution_context(
protocol="anthropic",
requested_model=req.model,
has_tooling_context=has_tooling_context,
tool_config=tool_config,
messages_dump=messages_dump,
api_key=api_key,
affinity_key=affinity_key_for_anthropic(req),
pool=p,
session_cache=session_cache,
logger=logger,
default_model=settings.default_model,
default_ask_mode=settings.default_ask_mode,
ensure_instance_logged_in=_ensure_instance_logged_in,
last_user_text=_last_user_text,
messages_to_prompt=_messages_to_prompt,
)
except HTTPException as exc:
# 503/401/502 from login: map to closest Anthropic kind.
err_type = "authentication_error" if exc.status_code == 401 else "overloaded_error"
detail = exc.detail if isinstance(exc.detail, dict) else {}
msg = (detail.get("error") or {}).get("message") or str(detail) or "upstream error"
return _anthropic_error(exc.status_code, err_type, msg)
# ------------------------------------------------------------- prompt & model
models = await inst.client.query_models()
available = flatten_model_keys(models)
name_map = build_model_name_map(models)
# Anthropic callers send `claude-*` model names. resolve_model's
# final fallback (default_model / first available) handles that cleanly
# without us having to hard-code a mapping table.
model = resolve_model(req.model, available, settings.default_model, name_map)
if cached_session_id:
prompt = _last_user_text(messages_dump)
is_reply = True
else:
prompt = _messages_to_prompt(messages_dump)
is_reply = False
ask_mode = execution.ask_mode
lookup_key = execution.lookup_key
write_key = execution.write_key
cached_session_id = execution.cached_session_id
inst = execution.inst
model = execution.model
prompt = execution.prompt
is_reply = execution.is_reply
affinity = execution.affinity
if not prompt:
return _anthropic_error(400, "invalid_request_error", "messages is empty")
@@ -1588,17 +1489,11 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
continue
tool_name = str(tool.get("name") or "")
allowed = True
if tool_config and isinstance(tool_config.get("tools"), list) and tool_config.get("tools"):
allowed = False
for t in tool_config.get("tools"):
if tool_name == _anthropic_tool_name(t) or tool_name == _openai_tool_name(t):
allowed = True
break
forced_tool_name = _anthropic_forced_tool_name(req.tool_choice)
if not allowed and forced_tool_name and tool_name == forced_tool_name:
allowed = True
if not allowed:
if not _tool_event_allowed(
tool_name,
tool_config,
forced_tool_name=_anthropic_forced_tool_name(req.tool_choice),
):
continue
tool_id = str(tool.get("id") or f"toolu_stream_{block_index}")
@@ -1778,17 +1673,11 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
continue
tool_name = str(item.get("name") or "")
allowed = True
if tool_config and isinstance(tool_config.get("tools"), list) and tool_config.get("tools"):
allowed = False
for t in tool_config.get("tools"):
if tool_name == _anthropic_tool_name(t) or tool_name == _openai_tool_name(t):
allowed = True
break
forced_tool_name = _anthropic_forced_tool_name(req.tool_choice)
if not allowed and forced_tool_name and tool_name == forced_tool_name:
allowed = True
if not allowed:
if not _tool_event_allowed(
tool_name,
tool_config,
forced_tool_name=_anthropic_forced_tool_name(req.tool_choice),
):
continue
saw_tool_event = True