Implement a thin responses layer that reuses existing chat/completions execution so auth, pooling, streaming, tool passthrough, and error semantics stay aligned across APIs. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
105 lines
3.0 KiB
Python
105 lines
3.0 KiB
Python
from __future__ import annotations
|
|
|
|
from typing import Any, Literal
|
|
|
|
from pydantic import BaseModel, Field
|
|
|
|
|
|
# Keep permissive: OpenAI clients routinely send list-of-parts (multi-modal) or None
|
|
# (for tool calls). We flatten to plain text downstream.
|
|
MessageContent = str | list[dict[str, Any]] | None
|
|
|
|
|
|
class ChatMessage(BaseModel):
|
|
# OpenAI supports "developer" on newer API versions in addition to the classic set.
|
|
role: Literal["system", "user", "assistant", "tool", "developer", "function"]
|
|
content: MessageContent = None
|
|
name: str | None = None
|
|
tool_call_id: str | None = None
|
|
tool_calls: list[dict[str, Any]] | None = None
|
|
|
|
|
|
class ChatCompletionsRequest(BaseModel):
|
|
model: str
|
|
messages: list[ChatMessage]
|
|
stream: bool = False
|
|
temperature: float | None = None
|
|
top_p: float | None = None
|
|
max_tokens: int | None = None
|
|
user: str | None = None
|
|
stream_options: dict[str, Any] | None = None
|
|
tools: list[dict[str, Any]] | None = None
|
|
tool_choice: Any | None = None
|
|
|
|
|
|
class ResponsesRequest(BaseModel):
|
|
model: str
|
|
input: Any | None = None
|
|
stream: bool = False
|
|
temperature: float | None = None
|
|
top_p: float | None = None
|
|
max_output_tokens: int | None = None
|
|
user: str | None = None
|
|
tools: list[dict[str, Any]] | None = None
|
|
tool_choice: Any | None = None
|
|
instructions: str | None = None
|
|
|
|
|
|
class ModelData(BaseModel):
|
|
id: str
|
|
name: str | None = None
|
|
object: str = "model"
|
|
created: int = 0
|
|
owned_by: str = "lingma"
|
|
|
|
|
|
class ModelsResponse(BaseModel):
|
|
object: str = "list"
|
|
data: list[ModelData]
|
|
|
|
|
|
class ChatCompletionChoice(BaseModel):
|
|
index: int = 0
|
|
finish_reason: str | None = "stop"
|
|
message: dict = Field(default_factory=dict)
|
|
logprobs: Any | None = None
|
|
|
|
|
|
class ChatCompletionResponse(BaseModel):
|
|
id: str
|
|
object: str = "chat.completion"
|
|
created: int
|
|
model: str
|
|
choices: list[ChatCompletionChoice]
|
|
system_fingerprint: str | None = None
|
|
|
|
|
|
def flatten_content(content: MessageContent) -> str:
|
|
"""Reduce OpenAI multi-part content to a plain string prompt for Lingma."""
|
|
if content is None:
|
|
return ""
|
|
if isinstance(content, str):
|
|
return content
|
|
if isinstance(content, list):
|
|
parts: list[str] = []
|
|
for item in content:
|
|
if not isinstance(item, dict):
|
|
parts.append(str(item))
|
|
continue
|
|
t = item.get("type")
|
|
if t == "text":
|
|
text = item.get("text") or ""
|
|
if text:
|
|
parts.append(text)
|
|
elif t in ("image_url", "input_image"):
|
|
# Lingma 不支持多模态,降级成占位符,保留语义信号
|
|
parts.append("[image]")
|
|
elif t == "input_audio":
|
|
parts.append("[audio]")
|
|
else:
|
|
text = item.get("text") or item.get("content")
|
|
if isinstance(text, str) and text:
|
|
parts.append(text)
|
|
return "\n".join(p for p in parts if p)
|
|
return str(content)
|