refactor: share execution prep for tool-call phase
Keep the current tool-call bridge contract stable while extracting shared execution setup and tightening Anthropic forwarding regressions. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -52,10 +52,9 @@ class AnthropicMessagesRequest(BaseModel):
|
|||||||
stop_sequences: list[str] | None = None
|
stop_sequences: list[str] | None = None
|
||||||
# metadata.user_id is the official hint for per-user routing / abuse tracking.
|
# metadata.user_id is the official hint for per-user routing / abuse tracking.
|
||||||
metadata: dict[str, Any] | None = None
|
metadata: dict[str, Any] | None = None
|
||||||
# Tools / tool_choice are accepted but we can't forward them to Lingma yet —
|
# Tools / tool_choice are accepted for compatibility and, when forwarding is
|
||||||
# they're preserved here so the request doesn't 422, and the flattener
|
# enabled, are passed upstream as tool_config; tool_use / tool_result blocks
|
||||||
# surfaces any tool_use blocks as `[tool_use] {...}` text so the assistant
|
# are still flattened into text so the assistant can see prior tool context.
|
||||||
# still sees the context.
|
|
||||||
tools: list[dict[str, Any]] | None = None
|
tools: list[dict[str, Any]] | None = None
|
||||||
tool_choice: dict[str, Any] | None = None
|
tool_choice: dict[str, Any] | None = None
|
||||||
|
|
||||||
|
|||||||
148
app/http/execution_core.py
Normal file
148
app/http/execution_core.py
Normal file
@@ -0,0 +1,148 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Any, Awaitable, Callable
|
||||||
|
|
||||||
|
from ..lingma_pool import LingmaPool, PoolInstance
|
||||||
|
from ..model_map import build_model_name_map, flatten_model_keys, resolve_model
|
||||||
|
from ..session_cache import SessionCache, hash_branch_context
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ExecutionContext:
|
||||||
|
ask_mode: str
|
||||||
|
lookup_key: str | None
|
||||||
|
write_key: str | None
|
||||||
|
cached_session_id: str | None
|
||||||
|
inst: PoolInstance
|
||||||
|
model: str
|
||||||
|
prompt: str
|
||||||
|
is_reply: bool
|
||||||
|
affinity: str | None
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_ask_mode(model: str, has_tooling_context: bool, *, default_ask_mode: str) -> str:
|
||||||
|
model_name = (model or "").lower()
|
||||||
|
if model_name in {"lingma-agent", "agent"} or has_tooling_context:
|
||||||
|
return "agent"
|
||||||
|
return default_ask_mode
|
||||||
|
|
||||||
|
|
||||||
|
async def _apply_cached_instance_or_invalidate(
|
||||||
|
*,
|
||||||
|
protocol: str,
|
||||||
|
logger: Any,
|
||||||
|
session_cache: SessionCache,
|
||||||
|
inst: PoolInstance,
|
||||||
|
cached_instance_name: str | None,
|
||||||
|
cached_session_id: str | None,
|
||||||
|
lookup_key: str | None,
|
||||||
|
) -> str | None:
|
||||||
|
if cached_instance_name and inst.name != cached_instance_name:
|
||||||
|
logger.info(
|
||||||
|
"%s session cache instance %s unhealthy, falling back to %s",
|
||||||
|
protocol,
|
||||||
|
cached_instance_name,
|
||||||
|
inst.name,
|
||||||
|
)
|
||||||
|
if lookup_key:
|
||||||
|
await session_cache.invalidate(lookup_key)
|
||||||
|
return None
|
||||||
|
return cached_session_id
|
||||||
|
|
||||||
|
|
||||||
|
async def prepare_execution_context(
|
||||||
|
*,
|
||||||
|
protocol: str,
|
||||||
|
requested_model: str,
|
||||||
|
has_tooling_context: bool,
|
||||||
|
tool_config: dict[str, Any] | None,
|
||||||
|
messages_dump: list[dict[str, Any]],
|
||||||
|
api_key: str,
|
||||||
|
affinity_key: str | None,
|
||||||
|
pool: LingmaPool,
|
||||||
|
session_cache: SessionCache,
|
||||||
|
logger: Any,
|
||||||
|
default_model: str,
|
||||||
|
default_ask_mode: str,
|
||||||
|
ensure_instance_logged_in: Callable[[PoolInstance], Awaitable[Any]],
|
||||||
|
last_user_text: Callable[[list[dict[str, Any]]], str],
|
||||||
|
messages_to_prompt: Callable[[list[dict[str, Any]]], str],
|
||||||
|
) -> ExecutionContext:
|
||||||
|
ask_mode = _resolve_ask_mode(
|
||||||
|
requested_model,
|
||||||
|
has_tooling_context,
|
||||||
|
default_ask_mode=default_ask_mode,
|
||||||
|
)
|
||||||
|
|
||||||
|
reuse_eligible = (
|
||||||
|
session_cache.enabled
|
||||||
|
and ask_mode == "chat"
|
||||||
|
and len(messages_dump) >= 2
|
||||||
|
and not has_tooling_context
|
||||||
|
)
|
||||||
|
lookup_key: str | None = None
|
||||||
|
write_key: str | None = None
|
||||||
|
cached_session_id: str | None = None
|
||||||
|
cached_instance_name: str | None = None
|
||||||
|
if reuse_eligible:
|
||||||
|
prefix_branch_context = hash_branch_context(messages_dump[:-1])
|
||||||
|
lookup_key = session_cache.build_key(
|
||||||
|
api_key,
|
||||||
|
messages_dump[:-1],
|
||||||
|
tool_config=tool_config,
|
||||||
|
branch_context=prefix_branch_context,
|
||||||
|
)
|
||||||
|
write_key = session_cache.build_key(
|
||||||
|
api_key,
|
||||||
|
messages_dump,
|
||||||
|
tool_config=tool_config,
|
||||||
|
branch_context=hash_branch_context(messages_dump),
|
||||||
|
)
|
||||||
|
entry = await session_cache.get(lookup_key)
|
||||||
|
if entry is None:
|
||||||
|
legacy_lookup_key = session_cache.build_key(api_key, messages_dump[:-1], tool_config=tool_config)
|
||||||
|
entry = await session_cache.get(legacy_lookup_key)
|
||||||
|
if entry is not None:
|
||||||
|
lookup_key = legacy_lookup_key
|
||||||
|
if entry is not None:
|
||||||
|
cached_session_id = entry.session_id
|
||||||
|
cached_instance_name = entry.instance_name or None
|
||||||
|
|
||||||
|
affinity = cached_instance_name or affinity_key
|
||||||
|
inst = pool.pick(affinity_key=affinity)
|
||||||
|
cached_session_id = await _apply_cached_instance_or_invalidate(
|
||||||
|
protocol=protocol,
|
||||||
|
logger=logger,
|
||||||
|
session_cache=session_cache,
|
||||||
|
inst=inst,
|
||||||
|
cached_instance_name=cached_instance_name,
|
||||||
|
cached_session_id=cached_session_id,
|
||||||
|
lookup_key=lookup_key,
|
||||||
|
)
|
||||||
|
|
||||||
|
await ensure_instance_logged_in(inst)
|
||||||
|
|
||||||
|
models = await inst.client.query_models()
|
||||||
|
available = flatten_model_keys(models)
|
||||||
|
name_map = build_model_name_map(models)
|
||||||
|
model = resolve_model(requested_model, available, default_model, name_map)
|
||||||
|
|
||||||
|
if cached_session_id:
|
||||||
|
prompt = last_user_text(messages_dump)
|
||||||
|
is_reply = True
|
||||||
|
else:
|
||||||
|
prompt = messages_to_prompt(messages_dump)
|
||||||
|
is_reply = False
|
||||||
|
|
||||||
|
return ExecutionContext(
|
||||||
|
ask_mode=ask_mode,
|
||||||
|
lookup_key=lookup_key,
|
||||||
|
write_key=write_key,
|
||||||
|
cached_session_id=cached_session_id,
|
||||||
|
inst=inst,
|
||||||
|
model=model,
|
||||||
|
prompt=prompt,
|
||||||
|
is_reply=is_reply,
|
||||||
|
affinity=affinity,
|
||||||
|
)
|
||||||
@@ -15,6 +15,49 @@ def _json_string(value: Any) -> str:
|
|||||||
return "{}"
|
return "{}"
|
||||||
|
|
||||||
|
|
||||||
|
def _openai_tool_name(tool: Any) -> str | None:
|
||||||
|
if not isinstance(tool, dict):
|
||||||
|
return None
|
||||||
|
if tool.get("type") == "function":
|
||||||
|
fn = tool.get("function")
|
||||||
|
if isinstance(fn, dict):
|
||||||
|
name = fn.get("name")
|
||||||
|
if isinstance(name, str) and name.strip():
|
||||||
|
return name.strip()
|
||||||
|
name = tool.get("name")
|
||||||
|
if isinstance(name, str) and name.strip():
|
||||||
|
return name.strip()
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _anthropic_tool_name(tool: Any) -> str | None:
|
||||||
|
if not isinstance(tool, dict):
|
||||||
|
return None
|
||||||
|
name = tool.get("name")
|
||||||
|
if isinstance(name, str) and name.strip():
|
||||||
|
return name.strip()
|
||||||
|
fn = tool.get("function")
|
||||||
|
if isinstance(fn, dict):
|
||||||
|
nested_name = fn.get("name")
|
||||||
|
if isinstance(nested_name, str) and nested_name.strip():
|
||||||
|
return nested_name.strip()
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _tool_event_allowed(
|
||||||
|
tool_name: str,
|
||||||
|
tool_config: dict[str, Any] | None,
|
||||||
|
*,
|
||||||
|
forced_tool_name: str | None = None,
|
||||||
|
) -> bool:
|
||||||
|
if not (tool_config and isinstance(tool_config.get("tools"), list) and tool_config.get("tools")):
|
||||||
|
return True
|
||||||
|
for tool in tool_config.get("tools") or []:
|
||||||
|
if tool_name == _anthropic_tool_name(tool) or tool_name == _openai_tool_name(tool):
|
||||||
|
return True
|
||||||
|
return bool(forced_tool_name and tool_name == forced_tool_name)
|
||||||
|
|
||||||
|
|
||||||
def _openai_forced_tool_name(tool_choice: Any) -> str | None:
|
def _openai_forced_tool_name(tool_choice: Any) -> str | None:
|
||||||
if not isinstance(tool_choice, dict):
|
if not isinstance(tool_choice, dict):
|
||||||
return None
|
return None
|
||||||
|
|||||||
297
app/main.py
297
app/main.py
@@ -25,6 +25,11 @@ from .auth import (
|
|||||||
)
|
)
|
||||||
from .concurrency import BackpressureRejected, InFlightGuard
|
from .concurrency import BackpressureRejected, InFlightGuard
|
||||||
from .config import Settings, load_settings
|
from .config import Settings, load_settings
|
||||||
|
from .http.execution_core import (
|
||||||
|
_apply_cached_instance_or_invalidate as _shared_apply_cached_instance_or_invalidate,
|
||||||
|
_resolve_ask_mode as _shared_resolve_ask_mode,
|
||||||
|
prepare_execution_context,
|
||||||
|
)
|
||||||
from .http.responses_adapter import (
|
from .http.responses_adapter import (
|
||||||
_responses_id_from_chat_id,
|
_responses_id_from_chat_id,
|
||||||
_responses_input_to_messages,
|
_responses_input_to_messages,
|
||||||
@@ -35,6 +40,7 @@ from .http.responses_adapter import (
|
|||||||
)
|
)
|
||||||
from .http.tool_bridge import (
|
from .http.tool_bridge import (
|
||||||
_anthropic_forced_tool_name,
|
_anthropic_forced_tool_name,
|
||||||
|
_anthropic_tool_name as _shared_anthropic_tool_name,
|
||||||
_anthropic_tool_result_block,
|
_anthropic_tool_result_block,
|
||||||
_anthropic_tool_use_block,
|
_anthropic_tool_use_block,
|
||||||
_forced_tool_event_from_text,
|
_forced_tool_event_from_text,
|
||||||
@@ -42,8 +48,10 @@ from .http.tool_bridge import (
|
|||||||
_json_string,
|
_json_string,
|
||||||
_openai_forced_tool_name,
|
_openai_forced_tool_name,
|
||||||
_openai_tool_call,
|
_openai_tool_call,
|
||||||
|
_openai_tool_name as _shared_openai_tool_name,
|
||||||
_tool_code_object_from_text,
|
_tool_code_object_from_text,
|
||||||
_tool_code_single_arg_name,
|
_tool_code_single_arg_name,
|
||||||
|
_tool_event_allowed,
|
||||||
)
|
)
|
||||||
from .lingma_pool import LingmaPool, PoolInstance
|
from .lingma_pool import LingmaPool, PoolInstance
|
||||||
from .logging_config import configure_logging, get_logger, request_id_var
|
from .logging_config import configure_logging, get_logger, request_id_var
|
||||||
@@ -383,32 +391,11 @@ def _tool_allowlist() -> set[str]:
|
|||||||
|
|
||||||
|
|
||||||
def _openai_tool_name(tool: Any) -> str | None:
|
def _openai_tool_name(tool: Any) -> str | None:
|
||||||
if not isinstance(tool, dict):
|
return _shared_openai_tool_name(tool)
|
||||||
return None
|
|
||||||
if tool.get("type") == "function":
|
|
||||||
fn = tool.get("function")
|
|
||||||
if isinstance(fn, dict):
|
|
||||||
name = fn.get("name")
|
|
||||||
if isinstance(name, str) and name.strip():
|
|
||||||
return name.strip()
|
|
||||||
name = tool.get("name")
|
|
||||||
if isinstance(name, str) and name.strip():
|
|
||||||
return name.strip()
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def _anthropic_tool_name(tool: Any) -> str | None:
|
def _anthropic_tool_name(tool: Any) -> str | None:
|
||||||
if not isinstance(tool, dict):
|
return _shared_anthropic_tool_name(tool)
|
||||||
return None
|
|
||||||
name = tool.get("name")
|
|
||||||
if isinstance(name, str) and name.strip():
|
|
||||||
return name.strip()
|
|
||||||
fn = tool.get("function")
|
|
||||||
if isinstance(fn, dict):
|
|
||||||
nested_name = fn.get("name")
|
|
||||||
if isinstance(nested_name, str) and nested_name.strip():
|
|
||||||
return nested_name.strip()
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def _filter_allowed_tools(tools: list[dict[str, Any]], *, provider: str) -> list[dict[str, Any]]:
|
def _filter_allowed_tools(tools: list[dict[str, Any]], *, provider: str) -> list[dict[str, Any]]:
|
||||||
@@ -509,10 +496,11 @@ def _anthropic_has_tooling_context(req: AnthropicMessagesRequest) -> bool:
|
|||||||
|
|
||||||
|
|
||||||
def _resolve_ask_mode(model: str, has_tooling_context: bool) -> str:
|
def _resolve_ask_mode(model: str, has_tooling_context: bool) -> str:
|
||||||
model_name = (model or "").lower()
|
return _shared_resolve_ask_mode(
|
||||||
if model_name in {"lingma-agent", "agent"} or has_tooling_context:
|
model,
|
||||||
return "agent"
|
has_tooling_context,
|
||||||
return settings.default_ask_mode
|
default_ask_mode=settings.default_ask_mode,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
async def _apply_cached_instance_or_invalidate(
|
async def _apply_cached_instance_or_invalidate(
|
||||||
@@ -523,17 +511,15 @@ async def _apply_cached_instance_or_invalidate(
|
|||||||
cached_session_id: str | None,
|
cached_session_id: str | None,
|
||||||
lookup_key: str | None,
|
lookup_key: str | None,
|
||||||
) -> str | None:
|
) -> str | None:
|
||||||
if cached_instance_name and inst.name != cached_instance_name:
|
return await _shared_apply_cached_instance_or_invalidate(
|
||||||
logger.info(
|
protocol=protocol,
|
||||||
"%s session cache instance %s unhealthy, falling back to %s",
|
logger=logger,
|
||||||
protocol,
|
session_cache=session_cache,
|
||||||
cached_instance_name,
|
inst=inst,
|
||||||
inst.name,
|
cached_instance_name=cached_instance_name,
|
||||||
|
cached_session_id=cached_session_id,
|
||||||
|
lookup_key=lookup_key,
|
||||||
)
|
)
|
||||||
if lookup_key:
|
|
||||||
await session_cache.invalidate(lookup_key)
|
|
||||||
return None
|
|
||||||
return cached_session_id
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -588,68 +574,32 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
|
|||||||
# 3. Stick the request to the pool instance that originally served it.
|
# 3. Stick the request to the pool instance that originally served it.
|
||||||
tool_config = _openai_tool_config(req)
|
tool_config = _openai_tool_config(req)
|
||||||
has_tooling_context = _openai_has_tooling_context(req, messages_dump)
|
has_tooling_context = _openai_has_tooling_context(req, messages_dump)
|
||||||
|
execution = await prepare_execution_context(
|
||||||
ask_mode = _resolve_ask_mode(req.model, has_tooling_context)
|
|
||||||
|
|
||||||
reuse_eligible = (
|
|
||||||
session_cache.enabled
|
|
||||||
and ask_mode == "chat"
|
|
||||||
and len(messages_dump) >= 2
|
|
||||||
and not has_tooling_context
|
|
||||||
)
|
|
||||||
lookup_key: str | None = None
|
|
||||||
write_key: str | None = None
|
|
||||||
cached_session_id: str | None = None
|
|
||||||
cached_instance_name: str | None = None
|
|
||||||
if reuse_eligible:
|
|
||||||
prefix_branch_context = hash_branch_context(messages_dump[:-1])
|
|
||||||
lookup_key = session_cache.build_key(
|
|
||||||
api_key,
|
|
||||||
messages_dump[:-1],
|
|
||||||
tool_config=tool_config,
|
|
||||||
branch_context=prefix_branch_context,
|
|
||||||
)
|
|
||||||
write_key = session_cache.build_key(
|
|
||||||
api_key,
|
|
||||||
messages_dump,
|
|
||||||
tool_config=tool_config,
|
|
||||||
branch_context=hash_branch_context(messages_dump),
|
|
||||||
)
|
|
||||||
entry = await session_cache.get(lookup_key)
|
|
||||||
if entry is None:
|
|
||||||
legacy_lookup_key = session_cache.build_key(api_key, messages_dump[:-1], tool_config=tool_config)
|
|
||||||
entry = await session_cache.get(legacy_lookup_key)
|
|
||||||
if entry is not None:
|
|
||||||
lookup_key = legacy_lookup_key
|
|
||||||
if entry is not None:
|
|
||||||
cached_session_id = entry.session_id
|
|
||||||
cached_instance_name = entry.instance_name or None
|
|
||||||
affinity = cached_instance_name or _affinity_key_for(req)
|
|
||||||
inst = p.pick(affinity_key=affinity)
|
|
||||||
|
|
||||||
cached_session_id = await _apply_cached_instance_or_invalidate(
|
|
||||||
protocol="chat",
|
protocol="chat",
|
||||||
inst=inst,
|
requested_model=req.model,
|
||||||
cached_instance_name=cached_instance_name,
|
has_tooling_context=has_tooling_context,
|
||||||
cached_session_id=cached_session_id,
|
tool_config=tool_config,
|
||||||
lookup_key=lookup_key,
|
messages_dump=messages_dump,
|
||||||
|
api_key=api_key,
|
||||||
|
affinity_key=_affinity_key_for(req),
|
||||||
|
pool=p,
|
||||||
|
session_cache=session_cache,
|
||||||
|
logger=logger,
|
||||||
|
default_model=settings.default_model,
|
||||||
|
default_ask_mode=settings.default_ask_mode,
|
||||||
|
ensure_instance_logged_in=_ensure_instance_logged_in,
|
||||||
|
last_user_text=_last_user_text,
|
||||||
|
messages_to_prompt=_messages_to_prompt,
|
||||||
)
|
)
|
||||||
|
ask_mode = execution.ask_mode
|
||||||
await _ensure_instance_logged_in(inst)
|
lookup_key = execution.lookup_key
|
||||||
|
write_key = execution.write_key
|
||||||
models = await inst.client.query_models()
|
cached_session_id = execution.cached_session_id
|
||||||
available = flatten_model_keys(models)
|
inst = execution.inst
|
||||||
name_map = build_model_name_map(models)
|
model = execution.model
|
||||||
model = resolve_model(req.model, available, settings.default_model, name_map)
|
prompt = execution.prompt
|
||||||
|
is_reply = execution.is_reply
|
||||||
# Prompt construction: on cache hit send only the last user turn so Lingma's
|
affinity = execution.affinity
|
||||||
# stored context isn't duplicated.
|
|
||||||
if cached_session_id:
|
|
||||||
prompt = _last_user_text(messages_dump)
|
|
||||||
is_reply = True
|
|
||||||
else:
|
|
||||||
prompt = _messages_to_prompt(messages_dump)
|
|
||||||
is_reply = False
|
|
||||||
|
|
||||||
if not prompt:
|
if not prompt:
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
@@ -748,16 +698,11 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
tool_name = str(tool.get("name") or "")
|
tool_name = str(tool.get("name") or "")
|
||||||
allowed = True
|
if not _tool_event_allowed(
|
||||||
if tool_config and isinstance(tool_config.get("tools"), list) and tool_config.get("tools"):
|
tool_name,
|
||||||
allowed = False
|
tool_config,
|
||||||
for t in tool_config.get("tools"):
|
forced_tool_name=forced_tool_name,
|
||||||
if tool_name == _anthropic_tool_name(t) or tool_name == _openai_tool_name(t):
|
):
|
||||||
allowed = True
|
|
||||||
break
|
|
||||||
if not allowed and forced_tool_name and tool_name == forced_tool_name:
|
|
||||||
allowed = True
|
|
||||||
if not allowed:
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if buffered_text_parts:
|
if buffered_text_parts:
|
||||||
@@ -956,16 +901,11 @@ async def v1_chat_completions(req: ChatCompletionsRequest, request: Request):
|
|||||||
for idx, item in enumerate(tool_events):
|
for idx, item in enumerate(tool_events):
|
||||||
if isinstance(item, dict):
|
if isinstance(item, dict):
|
||||||
tool_name = str(item.get("name") or "")
|
tool_name = str(item.get("name") or "")
|
||||||
allowed = True
|
if not _tool_event_allowed(
|
||||||
if tool_config and isinstance(tool_config.get("tools"), list) and tool_config.get("tools"):
|
tool_name,
|
||||||
allowed = False
|
tool_config,
|
||||||
for t in tool_config.get("tools"):
|
forced_tool_name=forced_tool_name,
|
||||||
if tool_name == _anthropic_tool_name(t) or tool_name == _openai_tool_name(t):
|
):
|
||||||
allowed = True
|
|
||||||
break
|
|
||||||
if not allowed and forced_tool_name and tool_name == forced_tool_name:
|
|
||||||
allowed = True
|
|
||||||
if not allowed:
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
tool_id = str(item.get("id") or f"call_{idx}")
|
tool_id = str(item.get("id") or f"call_{idx}")
|
||||||
@@ -1414,77 +1354,38 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
|
|||||||
message = error.get("message") or str(detail) or "invalid tool configuration"
|
message = error.get("message") or str(detail) or "invalid tool configuration"
|
||||||
return _anthropic_error(exc.status_code, "invalid_request_error", message)
|
return _anthropic_error(exc.status_code, "invalid_request_error", message)
|
||||||
has_tooling_context = _anthropic_has_tooling_context(req)
|
has_tooling_context = _anthropic_has_tooling_context(req)
|
||||||
|
|
||||||
ask_mode = _resolve_ask_mode(req.model, has_tooling_context)
|
|
||||||
|
|
||||||
reuse_eligible = (
|
|
||||||
session_cache.enabled and ask_mode == "chat" and len(messages_dump) >= 2 and not has_tooling_context
|
|
||||||
)
|
|
||||||
lookup_key: str | None = None
|
|
||||||
write_key: str | None = None
|
|
||||||
cached_session_id: str | None = None
|
|
||||||
cached_instance_name: str | None = None
|
|
||||||
if reuse_eligible:
|
|
||||||
prefix_branch_context = hash_branch_context(messages_dump[:-1])
|
|
||||||
lookup_key = session_cache.build_key(
|
|
||||||
api_key,
|
|
||||||
messages_dump[:-1],
|
|
||||||
tool_config=tool_config,
|
|
||||||
branch_context=prefix_branch_context,
|
|
||||||
)
|
|
||||||
write_key = session_cache.build_key(
|
|
||||||
api_key,
|
|
||||||
messages_dump,
|
|
||||||
tool_config=tool_config,
|
|
||||||
branch_context=hash_branch_context(messages_dump),
|
|
||||||
)
|
|
||||||
entry = await session_cache.get(lookup_key)
|
|
||||||
if entry is None:
|
|
||||||
legacy_lookup_key = session_cache.build_key(api_key, messages_dump[:-1], tool_config=tool_config)
|
|
||||||
entry = await session_cache.get(legacy_lookup_key)
|
|
||||||
if entry is not None:
|
|
||||||
lookup_key = legacy_lookup_key
|
|
||||||
if entry is not None:
|
|
||||||
cached_session_id = entry.session_id
|
|
||||||
cached_instance_name = entry.instance_name or None
|
|
||||||
|
|
||||||
affinity = cached_instance_name or affinity_key_for_anthropic(req)
|
|
||||||
inst = p.pick(affinity_key=affinity)
|
|
||||||
|
|
||||||
if cached_instance_name and inst.name != cached_instance_name:
|
|
||||||
logger.info(
|
|
||||||
"anthropic session cache instance %s unhealthy, falling back to %s",
|
|
||||||
cached_instance_name,
|
|
||||||
inst.name,
|
|
||||||
)
|
|
||||||
cached_session_id = None
|
|
||||||
if lookup_key:
|
|
||||||
await session_cache.invalidate(lookup_key)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
await _ensure_instance_logged_in(inst)
|
execution = await prepare_execution_context(
|
||||||
|
protocol="anthropic",
|
||||||
|
requested_model=req.model,
|
||||||
|
has_tooling_context=has_tooling_context,
|
||||||
|
tool_config=tool_config,
|
||||||
|
messages_dump=messages_dump,
|
||||||
|
api_key=api_key,
|
||||||
|
affinity_key=affinity_key_for_anthropic(req),
|
||||||
|
pool=p,
|
||||||
|
session_cache=session_cache,
|
||||||
|
logger=logger,
|
||||||
|
default_model=settings.default_model,
|
||||||
|
default_ask_mode=settings.default_ask_mode,
|
||||||
|
ensure_instance_logged_in=_ensure_instance_logged_in,
|
||||||
|
last_user_text=_last_user_text,
|
||||||
|
messages_to_prompt=_messages_to_prompt,
|
||||||
|
)
|
||||||
except HTTPException as exc:
|
except HTTPException as exc:
|
||||||
# 503/401/502 from login: map to closest Anthropic kind.
|
|
||||||
err_type = "authentication_error" if exc.status_code == 401 else "overloaded_error"
|
err_type = "authentication_error" if exc.status_code == 401 else "overloaded_error"
|
||||||
detail = exc.detail if isinstance(exc.detail, dict) else {}
|
detail = exc.detail if isinstance(exc.detail, dict) else {}
|
||||||
msg = (detail.get("error") or {}).get("message") or str(detail) or "upstream error"
|
msg = (detail.get("error") or {}).get("message") or str(detail) or "upstream error"
|
||||||
return _anthropic_error(exc.status_code, err_type, msg)
|
return _anthropic_error(exc.status_code, err_type, msg)
|
||||||
|
ask_mode = execution.ask_mode
|
||||||
# ------------------------------------------------------------- prompt & model
|
lookup_key = execution.lookup_key
|
||||||
models = await inst.client.query_models()
|
write_key = execution.write_key
|
||||||
available = flatten_model_keys(models)
|
cached_session_id = execution.cached_session_id
|
||||||
name_map = build_model_name_map(models)
|
inst = execution.inst
|
||||||
# Anthropic callers send `claude-*` model names. resolve_model's
|
model = execution.model
|
||||||
# final fallback (default_model / first available) handles that cleanly
|
prompt = execution.prompt
|
||||||
# without us having to hard-code a mapping table.
|
is_reply = execution.is_reply
|
||||||
model = resolve_model(req.model, available, settings.default_model, name_map)
|
affinity = execution.affinity
|
||||||
|
|
||||||
if cached_session_id:
|
|
||||||
prompt = _last_user_text(messages_dump)
|
|
||||||
is_reply = True
|
|
||||||
else:
|
|
||||||
prompt = _messages_to_prompt(messages_dump)
|
|
||||||
is_reply = False
|
|
||||||
|
|
||||||
if not prompt:
|
if not prompt:
|
||||||
return _anthropic_error(400, "invalid_request_error", "messages is empty")
|
return _anthropic_error(400, "invalid_request_error", "messages is empty")
|
||||||
@@ -1588,17 +1489,11 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
tool_name = str(tool.get("name") or "")
|
tool_name = str(tool.get("name") or "")
|
||||||
allowed = True
|
if not _tool_event_allowed(
|
||||||
if tool_config and isinstance(tool_config.get("tools"), list) and tool_config.get("tools"):
|
tool_name,
|
||||||
allowed = False
|
tool_config,
|
||||||
for t in tool_config.get("tools"):
|
forced_tool_name=_anthropic_forced_tool_name(req.tool_choice),
|
||||||
if tool_name == _anthropic_tool_name(t) or tool_name == _openai_tool_name(t):
|
):
|
||||||
allowed = True
|
|
||||||
break
|
|
||||||
forced_tool_name = _anthropic_forced_tool_name(req.tool_choice)
|
|
||||||
if not allowed and forced_tool_name and tool_name == forced_tool_name:
|
|
||||||
allowed = True
|
|
||||||
if not allowed:
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
tool_id = str(tool.get("id") or f"toolu_stream_{block_index}")
|
tool_id = str(tool.get("id") or f"toolu_stream_{block_index}")
|
||||||
@@ -1778,17 +1673,11 @@ async def v1_messages(req: AnthropicMessagesRequest, request: Request):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
tool_name = str(item.get("name") or "")
|
tool_name = str(item.get("name") or "")
|
||||||
allowed = True
|
if not _tool_event_allowed(
|
||||||
if tool_config and isinstance(tool_config.get("tools"), list) and tool_config.get("tools"):
|
tool_name,
|
||||||
allowed = False
|
tool_config,
|
||||||
for t in tool_config.get("tools"):
|
forced_tool_name=_anthropic_forced_tool_name(req.tool_choice),
|
||||||
if tool_name == _anthropic_tool_name(t) or tool_name == _openai_tool_name(t):
|
):
|
||||||
allowed = True
|
|
||||||
break
|
|
||||||
forced_tool_name = _anthropic_forced_tool_name(req.tool_choice)
|
|
||||||
if not allowed and forced_tool_name and tool_name == forced_tool_name:
|
|
||||||
allowed = True
|
|
||||||
if not allowed:
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
saw_tool_event = True
|
saw_tool_event = True
|
||||||
|
|||||||
@@ -991,6 +991,37 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase):
|
|||||||
self.assertEqual(openai_spy.last_complete_args[2], "chat")
|
self.assertEqual(openai_spy.last_complete_args[2], "chat")
|
||||||
self.assertEqual(anthropic_spy.last_complete_args[2], "chat")
|
self.assertEqual(anthropic_spy.last_complete_args[2], "chat")
|
||||||
|
|
||||||
|
async def test_anthropic_non_stream_does_not_forward_tool_config_when_disabled(self) -> None:
|
||||||
|
spy_client = _SpyClient(stream_events=[], complete_result={"text": "ok", "toolEvents": []})
|
||||||
|
req = AnthropicMessagesRequest(
|
||||||
|
model="claude-3-5-sonnet-20241022",
|
||||||
|
max_tokens=128,
|
||||||
|
messages=[{"role": "user", "content": "hi"}],
|
||||||
|
stream=False,
|
||||||
|
tools=[{"name": "lookup", "input_schema": {"type": "object", "properties": {}}}],
|
||||||
|
tool_choice={"type": "tool", "name": "lookup"},
|
||||||
|
)
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch.object(main, "pool", _FakePool(_FakeInstance(spy_client))),
|
||||||
|
patch.object(main, "chat_guard", _FakeGuard()),
|
||||||
|
patch.object(main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"})),
|
||||||
|
patch.object(main.stats_collector, "record_chat", AsyncMock(return_value=None)),
|
||||||
|
patch.object(main.settings, "api_keys", ["test-key"]),
|
||||||
|
_SettingsPatch(tool_forward_enabled=False, default_ask_mode="chat"),
|
||||||
|
):
|
||||||
|
await main.v1_messages(
|
||||||
|
req,
|
||||||
|
_make_request(
|
||||||
|
"/v1/messages",
|
||||||
|
headers={"x-api-key": "test-key", "anthropic-version": "2023-06-01"},
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertIn("tool_config", spy_client.last_complete_kwargs)
|
||||||
|
self.assertIsNone(spy_client.last_complete_kwargs["tool_config"])
|
||||||
|
self.assertEqual(spy_client.last_complete_args[2], "agent")
|
||||||
|
|
||||||
async def test_anthropic_non_stream_with_tools_uses_agent_mode(self) -> None:
|
async def test_anthropic_non_stream_with_tools_uses_agent_mode(self) -> None:
|
||||||
spy_client = _SpyClient(stream_events=[], complete_result={"text": "ok", "toolEvents": []})
|
spy_client = _SpyClient(stream_events=[], complete_result={"text": "ok", "toolEvents": []})
|
||||||
req = AnthropicMessagesRequest(
|
req = AnthropicMessagesRequest(
|
||||||
|
|||||||
Reference in New Issue
Block a user