Release v1.4.9 remote image routing

This commit is contained in:
lutc5
2026-05-07 16:44:59 +08:00
parent 68e7843a45
commit 86fbdbc40c
12 changed files with 892 additions and 89 deletions

View File

@@ -62,9 +62,11 @@ type Image struct {
}
type ChatMessage struct {
Role string
Text string
Images []Image
Role string
Text string
Images []Image
ToolCallID string
ToolCalls []toolemulation.ToolCall
}
type ChatRequest struct {
@@ -353,11 +355,17 @@ func (s *Service) generateRemote(
req ChatRequest,
onDelta func(string),
) (*ChatResult, error) {
if requestHasImages(req) {
if len(req.Tools) > 0 && req.ToolChoice.Mode != "none" {
return s.generateRemoteWithImageContext(ctx, req, onDelta)
}
return s.generateWithReconnect(ctx, req, onDelta)
}
if strings.TrimSpace(req.Model) == "" {
req.Model = s.DefaultModel()
}
req.Model = normalizeModelForBackend(BackendRemote, req.Model)
prompt, err := buildLingmaPrompt(req, SessionModeFresh)
prompt, err := buildLingmaPrompt(req, SessionModeFresh, false)
if err != nil {
return nil, err
}
@@ -383,6 +391,23 @@ func (s *Service) generateRemote(
return nil, lastErr
}
func (s *Service) generateRemoteWithImageContext(
ctx context.Context,
req ChatRequest,
onDelta func(string),
) (*ChatResult, error) {
imageReq := req
imageReq.Tools = nil
imageReq.ToolChoice = toolemulation.ToolChoice{Mode: "none"}
imageReq.ParallelToolCalls = nil
imageResult, err := s.generateWithReconnect(ctx, imageReq, nil)
if err != nil {
return nil, fmt.Errorf("image context extraction through IPC failed: %w", err)
}
remoteReq := requestWithImageContext(req, imageResult.Text)
return s.generateRemote(ctx, remoteReq, onDelta)
}
func (s *Service) generateRemoteWithModel(
ctx context.Context,
client *remote.Client,
@@ -403,12 +428,32 @@ func (s *Service) generateRemoteWithModel(
remoteResult, err := client.Chat(ctx, remote.ChatRequest{
Model: model,
Prompt: prompt,
Messages: remoteMessagesFromRequest(req),
Images: remoteImagesFromRequest(req),
Stream: onDelta != nil,
Temperature: req.Temperature,
Tools: req.Tools,
ToolChoice: req.ToolChoice,
}, delta)
if err != nil {
return nil, emitted, err
}
if len(remoteResult.ToolCalls) == 0 && shouldRetryRemoteNativeTool(req, remoteResult.Text) {
retryResult, retryErr := client.Chat(ctx, remote.ChatRequest{
Model: model,
Prompt: prompt,
Messages: remoteMessagesFromRequest(req),
Images: remoteImagesFromRequest(req),
Stream: false,
Temperature: req.Temperature,
Tools: req.Tools,
ToolChoice: toolemulation.ToolChoice{Mode: "any"},
}, nil)
if retryErr == nil && len(retryResult.ToolCalls) > 0 {
remoteResult = retryResult
emitted = false
}
}
result := &ChatResult{
Text: remoteResult.Text,
@@ -422,25 +467,133 @@ func (s *Service) generateRemoteWithModel(
Endpoint: remote.ResolveBaseURL(s.cfg.RemoteBaseURL),
Transport: "remote",
EffectiveSession: SessionModeFresh,
ToolCalls: remoteResult.ToolCalls,
}
s.applyToolEmulation(ctx, req, prompt, result, onDelta, func(hintPrompt string) (string, int, error) {
retryResult, retryErr := client.Chat(ctx, remote.ChatRequest{
Model: model,
Prompt: hintPrompt,
Stream: onDelta != nil,
Temperature: req.Temperature,
}, onDelta)
if retryErr != nil {
return "", 0, retryErr
}
if retryResult == nil {
return "", 0, nil
}
return retryResult.Text, retryResult.OutputTokens, nil
})
return result, emitted, nil
}
func remoteMessagesFromRequest(req ChatRequest) []remote.Message {
out := make([]remote.Message, 0, len(req.Messages)+1)
if system := strings.TrimSpace(req.System); system != "" {
out = append(out, remote.Message{Role: "system", Content: system})
}
for _, message := range req.Messages {
role := strings.ToLower(strings.TrimSpace(message.Role))
if role == "" {
continue
}
content := strings.TrimSpace(message.Text)
if content == "" && len(message.Images) == 0 && len(message.ToolCalls) == 0 {
continue
}
out = append(out, remote.Message{
Role: role,
Content: content,
Images: remoteImagesFromChatMessage(message),
ToolCallID: strings.TrimSpace(message.ToolCallID),
ToolCalls: message.ToolCalls,
})
}
return out
}
func remoteImagesFromChatMessage(message ChatMessage) []remote.Image {
if len(message.Images) == 0 {
return nil
}
images := make([]remote.Image, 0, len(message.Images))
for _, img := range message.Images {
if strings.TrimSpace(img.Data) == "" && strings.TrimSpace(img.URL) == "" {
continue
}
images = append(images, remote.Image{
MediaType: strings.TrimSpace(img.MediaType),
Data: img.Data,
URL: strings.TrimSpace(img.URL),
})
}
return images
}
func remoteImagesFromRequest(req ChatRequest) []remote.Image {
var images []remote.Image
for _, message := range req.Messages {
for _, img := range message.Images {
if strings.TrimSpace(img.Data) == "" && strings.TrimSpace(img.URL) == "" {
continue
}
images = append(images, remote.Image{
MediaType: strings.TrimSpace(img.MediaType),
Data: img.Data,
URL: strings.TrimSpace(img.URL),
})
}
}
return images
}
func requestHasImages(req ChatRequest) bool {
for _, message := range req.Messages {
if len(remoteImagesFromChatMessage(message)) > 0 {
return true
}
}
return false
}
func requestWithImageContext(req ChatRequest, imageContext string) ChatRequest {
out := req
out.Messages = make([]ChatMessage, len(req.Messages))
copy(out.Messages, req.Messages)
for i := range out.Messages {
out.Messages[i].Images = nil
}
contextText := strings.TrimSpace(imageContext)
if contextText == "" {
return out
}
addition := "\n\n[图片上下文]\n" + contextText
for i := len(out.Messages) - 1; i >= 0; i-- {
if strings.EqualFold(strings.TrimSpace(out.Messages[i].Role), "user") {
out.Messages[i].Text = strings.TrimSpace(out.Messages[i].Text + addition)
return out
}
}
out.Messages = append(out.Messages, ChatMessage{Role: "user", Text: strings.TrimSpace("[图片上下文]\n" + contextText)})
return out
}
func shouldRetryRemoteNativeTool(req ChatRequest, text string) bool {
if len(req.Tools) == 0 || req.ToolChoice.Mode == "none" {
return false
}
trimmed := strings.TrimSpace(text)
if trimmed == "" || len([]rune(trimmed)) > 180 {
return false
}
lower := strings.ToLower(trimmed)
cues := []string{
"让我", "我来", "我将", "接下来", "继续", "查看", "检查", "搜索", "读取", "运行", "执行",
"let me", "i'll", "i will", "next", "continue", "check", "inspect", "search", "read", "run",
}
hasCue := false
for _, cue := range cues {
if strings.Contains(lower, cue) {
hasCue = true
break
}
}
if !hasCue {
return false
}
return strings.HasSuffix(trimmed, ":") ||
strings.HasSuffix(trimmed, "") ||
strings.Contains(trimmed, "\n") ||
strings.Contains(lower, "use ") ||
strings.Contains(lower, "call ") ||
strings.Contains(trimmed, "工具")
}
func (s *Service) remoteAttemptModels(ctx context.Context, primary string) []string {
primary = normalizeModelForBackend(BackendRemote, primary)
models := []string{primary}
@@ -526,7 +679,7 @@ func (s *Service) generateLocked(
}
effectiveMode := resolveSessionMode(req, s.cfg.SessionMode)
prompt, err := buildLingmaPrompt(req, effectiveMode)
prompt, err := buildLingmaPrompt(req, effectiveMode, true)
if err != nil {
return nil, err
}
@@ -1078,14 +1231,14 @@ func resolveSessionMode(req ChatRequest, configured SessionMode) SessionMode {
func extractLastUserImages(messages []ChatMessage) []Image {
for i := len(messages) - 1; i >= 0; i-- {
if messages[i].Role == "user" {
if messages[i].Role == "user" && len(messages[i].Images) > 0 {
return messages[i].Images
}
}
return nil
}
func buildLingmaPrompt(req ChatRequest, mode SessionMode) (string, error) {
func buildLingmaPrompt(req ChatRequest, mode SessionMode, emulateTools bool) (string, error) {
messages := filteredMessages(req.Messages)
var lastUser string
for i := len(messages) - 1; i >= 0; i-- {
@@ -1102,7 +1255,7 @@ func buildLingmaPrompt(req ChatRequest, mode SessionMode) (string, error) {
}
system := strings.TrimSpace(req.System)
if len(req.Tools) > 0 && req.ToolChoice.Mode != "none" {
if emulateTools && len(req.Tools) > 0 && req.ToolChoice.Mode != "none" {
system = toolemulation.InjectTooling(system, req.Tools, req.ToolChoice, req.ParallelToolCalls)
}
@@ -1110,7 +1263,7 @@ func buildLingmaPrompt(req ChatRequest, mode SessionMode) (string, error) {
return lastUser, nil
}
if len(req.Tools) > 0 {
if emulateTools && len(req.Tools) > 0 {
parts := make([]string, 0, len(messages)+3)
for _, message := range messages {
role := "User"
@@ -1152,6 +1305,10 @@ func filteredMessages(messages []ChatMessage) []ChatMessage {
if text == "" {
continue
}
if role == "tool" {
text = toolemulation.ActionOutputPrompt(message.ToolCallID, text)
role = "user"
}
if role != "user" && role != "assistant" {
continue
}

View File

@@ -3,8 +3,11 @@ package service
import (
"context"
"errors"
"strings"
"testing"
"time"
"lingma-ipc-proxy/internal/toolemulation"
)
func TestIsRecoverableIPCError(t *testing.T) {
@@ -48,3 +51,126 @@ func TestContextWithOptionalTimeoutPositiveSetsDeadline(t *testing.T) {
t.Fatal("positive timeout should set a deadline")
}
}
func TestBuildLingmaPromptOnlyInjectsToolingWhenEmulationEnabled(t *testing.T) {
req := ChatRequest{
Messages: []ChatMessage{{Role: "user", Text: "查看项目结构"}},
Tools: []toolemulation.ToolDef{{
Name: "Bash",
InputSchema: map[string]any{
"properties": map[string]any{
"command": map[string]any{"type": "string"},
},
"required": []any{"command"},
},
}},
ToolChoice: toolemulation.ToolChoice{Mode: "auto"},
}
remotePrompt, err := buildLingmaPrompt(req, SessionModeFresh, false)
if err != nil {
t.Fatal(err)
}
if strings.Contains(remotePrompt, "```json action") || strings.Contains(remotePrompt, "DIRECT tool access") {
t.Fatalf("remote prompt should not include tool emulation:\n%s", remotePrompt)
}
ipcPrompt, err := buildLingmaPrompt(req, SessionModeFresh, true)
if err != nil {
t.Fatal(err)
}
if !strings.Contains(ipcPrompt, "```json action") || !strings.Contains(ipcPrompt, "DIRECT tool access") {
t.Fatalf("ipc prompt should include tool emulation:\n%s", ipcPrompt)
}
}
func TestShouldRetryRemoteNativeToolForContinuationText(t *testing.T) {
req := ChatRequest{
Tools: []toolemulation.ToolDef{{Name: "Bash"}},
ToolChoice: toolemulation.ToolChoice{
Mode: "auto",
},
}
if !shouldRetryRemoteNativeTool(req, "让我查看一下项目的整体结构,特别是源代码目录:") {
t.Fatal("expected continuation text to trigger native tool retry")
}
if shouldRetryRemoteNativeTool(req, "这是一个 uni-app 项目,核心目录是 src。") {
t.Fatal("substantive answer should not trigger retry")
}
req.ToolChoice = toolemulation.ToolChoice{Mode: "none"}
if shouldRetryRemoteNativeTool(req, "让我查看一下:") {
t.Fatal("tool_choice none should not trigger retry")
}
}
func TestBuildLingmaPromptKeepsToolResultsForIPC(t *testing.T) {
req := ChatRequest{
Messages: []ChatMessage{
{Role: "user", Text: "查看项目"},
{Role: "assistant", ToolCalls: []toolemulation.ToolCall{{ID: "call_1", Name: "Bash", Arguments: map[string]any{"command": "pwd"}}}},
{Role: "tool", ToolCallID: "call_1", Text: "/tmp/project"},
},
Tools: []toolemulation.ToolDef{{Name: "Bash"}},
ToolChoice: toolemulation.ToolChoice{Mode: "auto"},
}
prompt, err := buildLingmaPrompt(req, SessionModeFresh, true)
if err != nil {
t.Fatal(err)
}
if !strings.Contains(prompt, "Tool result for call_1") || !strings.Contains(prompt, "/tmp/project") {
t.Fatalf("ipc prompt should include tool result:\n%s", prompt)
}
if strings.Contains(prompt, "Assistant used tool") {
t.Fatalf("ipc prompt should not include textualized assistant tool calls:\n%s", prompt)
}
}
func TestRemoteImagesFromRequest(t *testing.T) {
req := ChatRequest{Messages: []ChatMessage{{Role: "user", Text: "see", Images: []Image{{MediaType: "image/png", Data: "AAAA"}}}}}
images := remoteImagesFromRequest(req)
if len(images) != 1 {
t.Fatalf("images = %#v", images)
}
if images[0].MediaType != "image/png" || images[0].Data != "AAAA" {
t.Fatalf("unexpected image = %#v", images[0])
}
}
func TestRequestHasImages(t *testing.T) {
if requestHasImages(ChatRequest{Messages: []ChatMessage{{Role: "user", Text: "plain"}}}) {
t.Fatal("plain request should not have images")
}
if !requestHasImages(ChatRequest{Messages: []ChatMessage{{Role: "user", Images: []Image{{URL: "file:///tmp/a.png"}}}}}) {
t.Fatal("image URL request should have images")
}
}
func TestExtractLastUserImagesFindsPreviousImageTurn(t *testing.T) {
images := extractLastUserImages([]ChatMessage{
{Role: "user", Text: "看这张图", Images: []Image{{URL: "file:///tmp/a.png"}}},
{Role: "assistant", Text: "这是一张图片"},
{Role: "user", Text: "继续基于上图分析"},
})
if len(images) != 1 || images[0].URL != "file:///tmp/a.png" {
t.Fatalf("images = %#v", images)
}
}
func TestRequestWithImageContextRemovesImagesAndAppendsContext(t *testing.T) {
req := ChatRequest{
Messages: []ChatMessage{
{Role: "user", Text: "看图", Images: []Image{{URL: "file:///tmp/a.png"}}},
{Role: "assistant", Text: "好的"},
{Role: "user", Text: "继续分析"},
},
}
out := requestWithImageContext(req, "海边礁石和海浪")
for _, message := range out.Messages {
if len(message.Images) > 0 {
t.Fatalf("images should be removed: %#v", out.Messages)
}
}
if !strings.Contains(out.Messages[2].Text, "[图片上下文]") || !strings.Contains(out.Messages[2].Text, "海边礁石和海浪") {
t.Fatalf("latest user message missing image context: %#v", out.Messages[2])
}
}