Release v1.4.9 remote image routing
This commit is contained in:
@@ -62,9 +62,11 @@ type Image struct {
|
||||
}
|
||||
|
||||
type ChatMessage struct {
|
||||
Role string
|
||||
Text string
|
||||
Images []Image
|
||||
Role string
|
||||
Text string
|
||||
Images []Image
|
||||
ToolCallID string
|
||||
ToolCalls []toolemulation.ToolCall
|
||||
}
|
||||
|
||||
type ChatRequest struct {
|
||||
@@ -353,11 +355,17 @@ func (s *Service) generateRemote(
|
||||
req ChatRequest,
|
||||
onDelta func(string),
|
||||
) (*ChatResult, error) {
|
||||
if requestHasImages(req) {
|
||||
if len(req.Tools) > 0 && req.ToolChoice.Mode != "none" {
|
||||
return s.generateRemoteWithImageContext(ctx, req, onDelta)
|
||||
}
|
||||
return s.generateWithReconnect(ctx, req, onDelta)
|
||||
}
|
||||
if strings.TrimSpace(req.Model) == "" {
|
||||
req.Model = s.DefaultModel()
|
||||
}
|
||||
req.Model = normalizeModelForBackend(BackendRemote, req.Model)
|
||||
prompt, err := buildLingmaPrompt(req, SessionModeFresh)
|
||||
prompt, err := buildLingmaPrompt(req, SessionModeFresh, false)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -383,6 +391,23 @@ func (s *Service) generateRemote(
|
||||
return nil, lastErr
|
||||
}
|
||||
|
||||
func (s *Service) generateRemoteWithImageContext(
|
||||
ctx context.Context,
|
||||
req ChatRequest,
|
||||
onDelta func(string),
|
||||
) (*ChatResult, error) {
|
||||
imageReq := req
|
||||
imageReq.Tools = nil
|
||||
imageReq.ToolChoice = toolemulation.ToolChoice{Mode: "none"}
|
||||
imageReq.ParallelToolCalls = nil
|
||||
imageResult, err := s.generateWithReconnect(ctx, imageReq, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("image context extraction through IPC failed: %w", err)
|
||||
}
|
||||
remoteReq := requestWithImageContext(req, imageResult.Text)
|
||||
return s.generateRemote(ctx, remoteReq, onDelta)
|
||||
}
|
||||
|
||||
func (s *Service) generateRemoteWithModel(
|
||||
ctx context.Context,
|
||||
client *remote.Client,
|
||||
@@ -403,12 +428,32 @@ func (s *Service) generateRemoteWithModel(
|
||||
remoteResult, err := client.Chat(ctx, remote.ChatRequest{
|
||||
Model: model,
|
||||
Prompt: prompt,
|
||||
Messages: remoteMessagesFromRequest(req),
|
||||
Images: remoteImagesFromRequest(req),
|
||||
Stream: onDelta != nil,
|
||||
Temperature: req.Temperature,
|
||||
Tools: req.Tools,
|
||||
ToolChoice: req.ToolChoice,
|
||||
}, delta)
|
||||
if err != nil {
|
||||
return nil, emitted, err
|
||||
}
|
||||
if len(remoteResult.ToolCalls) == 0 && shouldRetryRemoteNativeTool(req, remoteResult.Text) {
|
||||
retryResult, retryErr := client.Chat(ctx, remote.ChatRequest{
|
||||
Model: model,
|
||||
Prompt: prompt,
|
||||
Messages: remoteMessagesFromRequest(req),
|
||||
Images: remoteImagesFromRequest(req),
|
||||
Stream: false,
|
||||
Temperature: req.Temperature,
|
||||
Tools: req.Tools,
|
||||
ToolChoice: toolemulation.ToolChoice{Mode: "any"},
|
||||
}, nil)
|
||||
if retryErr == nil && len(retryResult.ToolCalls) > 0 {
|
||||
remoteResult = retryResult
|
||||
emitted = false
|
||||
}
|
||||
}
|
||||
|
||||
result := &ChatResult{
|
||||
Text: remoteResult.Text,
|
||||
@@ -422,25 +467,133 @@ func (s *Service) generateRemoteWithModel(
|
||||
Endpoint: remote.ResolveBaseURL(s.cfg.RemoteBaseURL),
|
||||
Transport: "remote",
|
||||
EffectiveSession: SessionModeFresh,
|
||||
ToolCalls: remoteResult.ToolCalls,
|
||||
}
|
||||
s.applyToolEmulation(ctx, req, prompt, result, onDelta, func(hintPrompt string) (string, int, error) {
|
||||
retryResult, retryErr := client.Chat(ctx, remote.ChatRequest{
|
||||
Model: model,
|
||||
Prompt: hintPrompt,
|
||||
Stream: onDelta != nil,
|
||||
Temperature: req.Temperature,
|
||||
}, onDelta)
|
||||
if retryErr != nil {
|
||||
return "", 0, retryErr
|
||||
}
|
||||
if retryResult == nil {
|
||||
return "", 0, nil
|
||||
}
|
||||
return retryResult.Text, retryResult.OutputTokens, nil
|
||||
})
|
||||
return result, emitted, nil
|
||||
}
|
||||
|
||||
func remoteMessagesFromRequest(req ChatRequest) []remote.Message {
|
||||
out := make([]remote.Message, 0, len(req.Messages)+1)
|
||||
if system := strings.TrimSpace(req.System); system != "" {
|
||||
out = append(out, remote.Message{Role: "system", Content: system})
|
||||
}
|
||||
for _, message := range req.Messages {
|
||||
role := strings.ToLower(strings.TrimSpace(message.Role))
|
||||
if role == "" {
|
||||
continue
|
||||
}
|
||||
content := strings.TrimSpace(message.Text)
|
||||
if content == "" && len(message.Images) == 0 && len(message.ToolCalls) == 0 {
|
||||
continue
|
||||
}
|
||||
out = append(out, remote.Message{
|
||||
Role: role,
|
||||
Content: content,
|
||||
Images: remoteImagesFromChatMessage(message),
|
||||
ToolCallID: strings.TrimSpace(message.ToolCallID),
|
||||
ToolCalls: message.ToolCalls,
|
||||
})
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func remoteImagesFromChatMessage(message ChatMessage) []remote.Image {
|
||||
if len(message.Images) == 0 {
|
||||
return nil
|
||||
}
|
||||
images := make([]remote.Image, 0, len(message.Images))
|
||||
for _, img := range message.Images {
|
||||
if strings.TrimSpace(img.Data) == "" && strings.TrimSpace(img.URL) == "" {
|
||||
continue
|
||||
}
|
||||
images = append(images, remote.Image{
|
||||
MediaType: strings.TrimSpace(img.MediaType),
|
||||
Data: img.Data,
|
||||
URL: strings.TrimSpace(img.URL),
|
||||
})
|
||||
}
|
||||
return images
|
||||
}
|
||||
|
||||
func remoteImagesFromRequest(req ChatRequest) []remote.Image {
|
||||
var images []remote.Image
|
||||
for _, message := range req.Messages {
|
||||
for _, img := range message.Images {
|
||||
if strings.TrimSpace(img.Data) == "" && strings.TrimSpace(img.URL) == "" {
|
||||
continue
|
||||
}
|
||||
images = append(images, remote.Image{
|
||||
MediaType: strings.TrimSpace(img.MediaType),
|
||||
Data: img.Data,
|
||||
URL: strings.TrimSpace(img.URL),
|
||||
})
|
||||
}
|
||||
}
|
||||
return images
|
||||
}
|
||||
|
||||
func requestHasImages(req ChatRequest) bool {
|
||||
for _, message := range req.Messages {
|
||||
if len(remoteImagesFromChatMessage(message)) > 0 {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func requestWithImageContext(req ChatRequest, imageContext string) ChatRequest {
|
||||
out := req
|
||||
out.Messages = make([]ChatMessage, len(req.Messages))
|
||||
copy(out.Messages, req.Messages)
|
||||
for i := range out.Messages {
|
||||
out.Messages[i].Images = nil
|
||||
}
|
||||
contextText := strings.TrimSpace(imageContext)
|
||||
if contextText == "" {
|
||||
return out
|
||||
}
|
||||
addition := "\n\n[图片上下文]\n" + contextText
|
||||
for i := len(out.Messages) - 1; i >= 0; i-- {
|
||||
if strings.EqualFold(strings.TrimSpace(out.Messages[i].Role), "user") {
|
||||
out.Messages[i].Text = strings.TrimSpace(out.Messages[i].Text + addition)
|
||||
return out
|
||||
}
|
||||
}
|
||||
out.Messages = append(out.Messages, ChatMessage{Role: "user", Text: strings.TrimSpace("[图片上下文]\n" + contextText)})
|
||||
return out
|
||||
}
|
||||
|
||||
func shouldRetryRemoteNativeTool(req ChatRequest, text string) bool {
|
||||
if len(req.Tools) == 0 || req.ToolChoice.Mode == "none" {
|
||||
return false
|
||||
}
|
||||
trimmed := strings.TrimSpace(text)
|
||||
if trimmed == "" || len([]rune(trimmed)) > 180 {
|
||||
return false
|
||||
}
|
||||
lower := strings.ToLower(trimmed)
|
||||
cues := []string{
|
||||
"让我", "我来", "我将", "接下来", "继续", "查看", "检查", "搜索", "读取", "运行", "执行",
|
||||
"let me", "i'll", "i will", "next", "continue", "check", "inspect", "search", "read", "run",
|
||||
}
|
||||
hasCue := false
|
||||
for _, cue := range cues {
|
||||
if strings.Contains(lower, cue) {
|
||||
hasCue = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !hasCue {
|
||||
return false
|
||||
}
|
||||
return strings.HasSuffix(trimmed, ":") ||
|
||||
strings.HasSuffix(trimmed, ":") ||
|
||||
strings.Contains(trimmed, ":\n") ||
|
||||
strings.Contains(lower, "use ") ||
|
||||
strings.Contains(lower, "call ") ||
|
||||
strings.Contains(trimmed, "工具")
|
||||
}
|
||||
|
||||
func (s *Service) remoteAttemptModels(ctx context.Context, primary string) []string {
|
||||
primary = normalizeModelForBackend(BackendRemote, primary)
|
||||
models := []string{primary}
|
||||
@@ -526,7 +679,7 @@ func (s *Service) generateLocked(
|
||||
}
|
||||
|
||||
effectiveMode := resolveSessionMode(req, s.cfg.SessionMode)
|
||||
prompt, err := buildLingmaPrompt(req, effectiveMode)
|
||||
prompt, err := buildLingmaPrompt(req, effectiveMode, true)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -1078,14 +1231,14 @@ func resolveSessionMode(req ChatRequest, configured SessionMode) SessionMode {
|
||||
|
||||
func extractLastUserImages(messages []ChatMessage) []Image {
|
||||
for i := len(messages) - 1; i >= 0; i-- {
|
||||
if messages[i].Role == "user" {
|
||||
if messages[i].Role == "user" && len(messages[i].Images) > 0 {
|
||||
return messages[i].Images
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func buildLingmaPrompt(req ChatRequest, mode SessionMode) (string, error) {
|
||||
func buildLingmaPrompt(req ChatRequest, mode SessionMode, emulateTools bool) (string, error) {
|
||||
messages := filteredMessages(req.Messages)
|
||||
var lastUser string
|
||||
for i := len(messages) - 1; i >= 0; i-- {
|
||||
@@ -1102,7 +1255,7 @@ func buildLingmaPrompt(req ChatRequest, mode SessionMode) (string, error) {
|
||||
}
|
||||
|
||||
system := strings.TrimSpace(req.System)
|
||||
if len(req.Tools) > 0 && req.ToolChoice.Mode != "none" {
|
||||
if emulateTools && len(req.Tools) > 0 && req.ToolChoice.Mode != "none" {
|
||||
system = toolemulation.InjectTooling(system, req.Tools, req.ToolChoice, req.ParallelToolCalls)
|
||||
}
|
||||
|
||||
@@ -1110,7 +1263,7 @@ func buildLingmaPrompt(req ChatRequest, mode SessionMode) (string, error) {
|
||||
return lastUser, nil
|
||||
}
|
||||
|
||||
if len(req.Tools) > 0 {
|
||||
if emulateTools && len(req.Tools) > 0 {
|
||||
parts := make([]string, 0, len(messages)+3)
|
||||
for _, message := range messages {
|
||||
role := "User"
|
||||
@@ -1152,6 +1305,10 @@ func filteredMessages(messages []ChatMessage) []ChatMessage {
|
||||
if text == "" {
|
||||
continue
|
||||
}
|
||||
if role == "tool" {
|
||||
text = toolemulation.ActionOutputPrompt(message.ToolCallID, text)
|
||||
role = "user"
|
||||
}
|
||||
if role != "user" && role != "assistant" {
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -3,8 +3,11 @@ package service
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"lingma-ipc-proxy/internal/toolemulation"
|
||||
)
|
||||
|
||||
func TestIsRecoverableIPCError(t *testing.T) {
|
||||
@@ -48,3 +51,126 @@ func TestContextWithOptionalTimeoutPositiveSetsDeadline(t *testing.T) {
|
||||
t.Fatal("positive timeout should set a deadline")
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildLingmaPromptOnlyInjectsToolingWhenEmulationEnabled(t *testing.T) {
|
||||
req := ChatRequest{
|
||||
Messages: []ChatMessage{{Role: "user", Text: "查看项目结构"}},
|
||||
Tools: []toolemulation.ToolDef{{
|
||||
Name: "Bash",
|
||||
InputSchema: map[string]any{
|
||||
"properties": map[string]any{
|
||||
"command": map[string]any{"type": "string"},
|
||||
},
|
||||
"required": []any{"command"},
|
||||
},
|
||||
}},
|
||||
ToolChoice: toolemulation.ToolChoice{Mode: "auto"},
|
||||
}
|
||||
|
||||
remotePrompt, err := buildLingmaPrompt(req, SessionModeFresh, false)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if strings.Contains(remotePrompt, "```json action") || strings.Contains(remotePrompt, "DIRECT tool access") {
|
||||
t.Fatalf("remote prompt should not include tool emulation:\n%s", remotePrompt)
|
||||
}
|
||||
|
||||
ipcPrompt, err := buildLingmaPrompt(req, SessionModeFresh, true)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if !strings.Contains(ipcPrompt, "```json action") || !strings.Contains(ipcPrompt, "DIRECT tool access") {
|
||||
t.Fatalf("ipc prompt should include tool emulation:\n%s", ipcPrompt)
|
||||
}
|
||||
}
|
||||
|
||||
func TestShouldRetryRemoteNativeToolForContinuationText(t *testing.T) {
|
||||
req := ChatRequest{
|
||||
Tools: []toolemulation.ToolDef{{Name: "Bash"}},
|
||||
ToolChoice: toolemulation.ToolChoice{
|
||||
Mode: "auto",
|
||||
},
|
||||
}
|
||||
if !shouldRetryRemoteNativeTool(req, "让我查看一下项目的整体结构,特别是源代码目录:") {
|
||||
t.Fatal("expected continuation text to trigger native tool retry")
|
||||
}
|
||||
if shouldRetryRemoteNativeTool(req, "这是一个 uni-app 项目,核心目录是 src。") {
|
||||
t.Fatal("substantive answer should not trigger retry")
|
||||
}
|
||||
req.ToolChoice = toolemulation.ToolChoice{Mode: "none"}
|
||||
if shouldRetryRemoteNativeTool(req, "让我查看一下:") {
|
||||
t.Fatal("tool_choice none should not trigger retry")
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildLingmaPromptKeepsToolResultsForIPC(t *testing.T) {
|
||||
req := ChatRequest{
|
||||
Messages: []ChatMessage{
|
||||
{Role: "user", Text: "查看项目"},
|
||||
{Role: "assistant", ToolCalls: []toolemulation.ToolCall{{ID: "call_1", Name: "Bash", Arguments: map[string]any{"command": "pwd"}}}},
|
||||
{Role: "tool", ToolCallID: "call_1", Text: "/tmp/project"},
|
||||
},
|
||||
Tools: []toolemulation.ToolDef{{Name: "Bash"}},
|
||||
ToolChoice: toolemulation.ToolChoice{Mode: "auto"},
|
||||
}
|
||||
prompt, err := buildLingmaPrompt(req, SessionModeFresh, true)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if !strings.Contains(prompt, "Tool result for call_1") || !strings.Contains(prompt, "/tmp/project") {
|
||||
t.Fatalf("ipc prompt should include tool result:\n%s", prompt)
|
||||
}
|
||||
if strings.Contains(prompt, "Assistant used tool") {
|
||||
t.Fatalf("ipc prompt should not include textualized assistant tool calls:\n%s", prompt)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRemoteImagesFromRequest(t *testing.T) {
|
||||
req := ChatRequest{Messages: []ChatMessage{{Role: "user", Text: "see", Images: []Image{{MediaType: "image/png", Data: "AAAA"}}}}}
|
||||
images := remoteImagesFromRequest(req)
|
||||
if len(images) != 1 {
|
||||
t.Fatalf("images = %#v", images)
|
||||
}
|
||||
if images[0].MediaType != "image/png" || images[0].Data != "AAAA" {
|
||||
t.Fatalf("unexpected image = %#v", images[0])
|
||||
}
|
||||
}
|
||||
|
||||
func TestRequestHasImages(t *testing.T) {
|
||||
if requestHasImages(ChatRequest{Messages: []ChatMessage{{Role: "user", Text: "plain"}}}) {
|
||||
t.Fatal("plain request should not have images")
|
||||
}
|
||||
if !requestHasImages(ChatRequest{Messages: []ChatMessage{{Role: "user", Images: []Image{{URL: "file:///tmp/a.png"}}}}}) {
|
||||
t.Fatal("image URL request should have images")
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractLastUserImagesFindsPreviousImageTurn(t *testing.T) {
|
||||
images := extractLastUserImages([]ChatMessage{
|
||||
{Role: "user", Text: "看这张图", Images: []Image{{URL: "file:///tmp/a.png"}}},
|
||||
{Role: "assistant", Text: "这是一张图片"},
|
||||
{Role: "user", Text: "继续基于上图分析"},
|
||||
})
|
||||
if len(images) != 1 || images[0].URL != "file:///tmp/a.png" {
|
||||
t.Fatalf("images = %#v", images)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRequestWithImageContextRemovesImagesAndAppendsContext(t *testing.T) {
|
||||
req := ChatRequest{
|
||||
Messages: []ChatMessage{
|
||||
{Role: "user", Text: "看图", Images: []Image{{URL: "file:///tmp/a.png"}}},
|
||||
{Role: "assistant", Text: "好的"},
|
||||
{Role: "user", Text: "继续分析"},
|
||||
},
|
||||
}
|
||||
out := requestWithImageContext(req, "海边礁石和海浪")
|
||||
for _, message := range out.Messages {
|
||||
if len(message.Images) > 0 {
|
||||
t.Fatalf("images should be removed: %#v", out.Messages)
|
||||
}
|
||||
}
|
||||
if !strings.Contains(out.Messages[2].Text, "[图片上下文]") || !strings.Contains(out.Messages[2].Text, "海边礁石和海浪") {
|
||||
t.Fatalf("latest user message missing image context: %#v", out.Messages[2])
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user