diff --git a/DESIGN.md b/DESIGN.md index a600d0c..7b7e027 100644 --- a/DESIGN.md +++ b/DESIGN.md @@ -47,7 +47,7 @@ - **逆向 Lingma 后端协议**:之前评估过(曾经的"B1 终极方案"),需要反编译二进制,维护成本高、政策风险大,放弃。 - **多租户 / 水平扩缩**:单容器即可;真要大规模部署 → 套层反代 + N 个网关副本就够,不在进程内解决。 -- **请求侧完整 function calling / tools 语义**:仍不是当前目标;现阶段仅支持 `tools`/`tool_choice` 在 `TOOL_FORWARD_ENABLED` 开关下灰度透传(默认关闭)。 +- **请求侧完整 function calling / tools 语义**:仍不是当前目标;现阶段仅支持 `tools`/`tool_choice` 在 `TOOL_FORWARD_ENABLED` 开关下灰度透传(默认开启,可显式关闭)。 - **响应侧工具事件桥接**:若 Lingma 上游产出 tool 事件,网关会向 OpenAI 输出 `tool_calls`,向 Anthropic 输出 `tool_use` / `tool_result`(stream + non-stream)。 - **强制工具回退闭环**:OpenAI 在 stream + non-stream 下都支持从文本里解析严格 JSON / `tool_code` 并合成 `tool_calls`;Anthropic 当前只在 non-stream 下合成 `tool_use` / `tool_result`,stream 仍保持原始文本流。 diff --git a/README.md b/README.md index 336bb89..9ab3234 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ - OpenAI:`/v1/models`、`/v1/chat/completions`(含 stream) - Anthropic:`/v1/messages`、`/v1/messages/count_tokens`(含 stream) - 内置:多实例池、会话复用、Prometheus 指标、登录态 bundle 注入 -- 工具事件桥接:Lingma 上游返回 `tool` 事件时,网关会输出为 OpenAI `tool_calls`(stream/non-stream)和 Anthropic `tool_use` / `tool_result`(stream/non-stream);请求侧 `tools` / `tool_choice` 仅在 `TOOL_FORWARD_ENABLED=true` 时透传(默认关闭) +- 工具事件桥接:Lingma 上游返回 `tool` 事件时,网关会输出为 OpenAI `tool_calls`(stream/non-stream)和 Anthropic `tool_use` / `tool_result`(stream/non-stream);请求侧 `tools` / `tool_choice` 仅在 `TOOL_FORWARD_ENABLED=true` 时透传(默认开启,可显式关闭) - 多模态降级:OpenAI `image_url` / `input_image` 转 `[image]`,`input_audio` 转 `[audio]`;Anthropic `image` 转 `[image]` > 架构设计与二开细节请看 [`DESIGN.md`](./DESIGN.md)。 diff --git a/tests/test_tool_call_bridge.py b/tests/test_tool_call_bridge.py index 6efdd34..066587b 100644 --- a/tests/test_tool_call_bridge.py +++ b/tests/test_tool_call_bridge.py @@ -1197,6 +1197,104 @@ class ToolCallBridgeTests(unittest.IsolatedAsyncioTestCase): self.assertEqual(fake_cache.keys, []) self.assertEqual(fake_cache.get_calls, []) self.assertEqual(fake_cache.put_calls, []) + + async def test_anthropic_count_tokens_returns_input_tokens(self) -> None: + req = AnthropicMessagesRequest( + model="claude-3-5-sonnet-20241022", + max_tokens=64, + messages=[{"role": "user", "content": "count me"}], + ) + + with patch.object(main.settings, "api_keys", ["test-key"]): + response = await main.v1_messages_count_tokens( + req, + _make_request( + "/v1/messages/count_tokens", + headers={"x-api-key": "test-key", "anthropic-version": "2023-06-01"}, + ), + ) + + payload = json.loads(response.body) + self.assertEqual(response.status_code, 200) + self.assertEqual(payload, {"input_tokens": main.estimate_tokens(main._messages_to_prompt(main.anthropic_to_internal_messages(req)))}) + + async def test_anthropic_count_tokens_requires_authentication(self) -> None: + req = AnthropicMessagesRequest( + model="claude-3-5-sonnet-20241022", + max_tokens=64, + messages=[{"role": "user", "content": "count me"}], + ) + + with patch.object(main.settings, "api_keys", ["test-key"]): + response = await main.v1_messages_count_tokens( + req, + _make_request( + "/v1/messages/count_tokens", + headers={"anthropic-version": "2023-06-01"}, + ), + ) + + payload = json.loads(response.body) + self.assertEqual(response.status_code, 401) + self.assertEqual(payload["type"], "error") + self.assertEqual(payload["error"]["type"], "authentication_error") + + async def test_anthropic_messages_requires_authentication(self) -> None: + req = AnthropicMessagesRequest( + model="claude-3-5-sonnet-20241022", + max_tokens=64, + messages=[{"role": "user", "content": "hi"}], + stream=False, + ) + + with patch.object(main.settings, "api_keys", ["test-key"]): + response = await main.v1_messages( + req, + _make_request( + "/v1/messages", + headers={"anthropic-version": "2023-06-01"}, + ), + ) + + payload = json.loads(response.body) + self.assertEqual(response.status_code, 401) + self.assertEqual(payload["type"], "error") + self.assertEqual(payload["error"]["type"], "authentication_error") + + async def test_anthropic_messages_backpressure_returns_overloaded_error(self) -> None: + fake_client = _FakeClient(stream_events=[], complete_result={"text": "ok", "toolEvents": []}) + req = AnthropicMessagesRequest( + model="claude-3-5-sonnet-20241022", + max_tokens=64, + messages=[{"role": "user", "content": "hi"}], + stream=False, + ) + fake_guard = types.SimpleNamespace( + in_flight=0, + try_acquire=AsyncMock(side_effect=main.BackpressureRejected(2.4)), + ) + + with ( + patch.object(main, "pool", _FakePool(_FakeInstance(fake_client))), + patch.object(main, "chat_guard", fake_guard), + patch.object(main, "_ensure_instance_logged_in", AsyncMock(return_value={"id": "u"})), + patch.object(main.settings, "api_keys", ["test-key"]), + ): + response = await main.v1_messages( + req, + _make_request( + "/v1/messages", + headers={"x-api-key": "test-key", "anthropic-version": "2023-06-01"}, + ), + ) + + payload = json.loads(response.body) + self.assertEqual(response.status_code, 429) + self.assertEqual(response.headers["Retry-After"], "2") + self.assertEqual(payload["type"], "error") + self.assertEqual(payload["error"]["type"], "overloaded_error") + self.assertIn("retry later", payload["error"]["message"]) + async def test_responses_non_stream_maps_chat_payload_shape_and_input(self) -> None: req = ResponsesRequest( model="org_auto",