feat: add desktop app release packaging

This commit is contained in:
lutc5
2026-04-29 18:45:25 +08:00
parent 74bbd8e6d2
commit 92c8735bfc
73 changed files with 8934 additions and 757 deletions

View File

@@ -1,9 +1,12 @@
package httpapi
import (
"bytes"
"context"
"encoding/base64"
"encoding/json"
"fmt"
"io"
"net/http"
"strings"
"time"
@@ -16,16 +19,25 @@ type Server struct {
svc *service.Service
http *http.Server
sem chan struct{}
// OnRequest is called after each request completes with summary info.
// method, path, statusCode, duration, requestBody, responseBody
OnRequest func(method, path string, statusCode int, duration time.Duration, reqBody, respBody string)
}
type anthropicRequest struct {
Model string `json:"model"`
MaxTokens int `json:"max_tokens,omitempty"`
System any `json:"system,omitempty"`
Messages []rawMessage `json:"messages"`
Stream bool `json:"stream,omitempty"`
Tools any `json:"tools,omitempty"`
ToolChoice any `json:"tool_choice,omitempty"`
Model string `json:"model"`
MaxTokens int `json:"max_tokens,omitempty"`
System any `json:"system,omitempty"`
Messages []rawMessage `json:"messages"`
Stream bool `json:"stream,omitempty"`
Tools any `json:"tools,omitempty"`
ToolChoice any `json:"tool_choice,omitempty"`
Temperature *float64 `json:"temperature,omitempty"`
TopP *float64 `json:"top_p,omitempty"`
TopK int `json:"top_k,omitempty"`
StopSequences []string `json:"stop_sequences,omitempty"`
Metadata map[string]any `json:"metadata,omitempty"`
Thinking any `json:"thinking,omitempty"`
}
type openAIChatRequest struct {
@@ -36,6 +48,18 @@ type openAIChatRequest struct {
MaxCompletionTokens int `json:"max_completion_tokens,omitempty"`
Tools any `json:"tools,omitempty"`
ToolChoice any `json:"tool_choice,omitempty"`
ParallelToolCalls *bool `json:"parallel_tool_calls,omitempty"`
Temperature *float64 `json:"temperature,omitempty"`
TopP *float64 `json:"top_p,omitempty"`
Stop any `json:"stop,omitempty"`
PresencePenalty float64 `json:"presence_penalty,omitempty"`
FrequencyPenalty float64 `json:"frequency_penalty,omitempty"`
Logprobs bool `json:"logprobs,omitempty"`
TopLogprobs int `json:"top_logprobs,omitempty"`
ResponseFormat any `json:"response_format,omitempty"`
Seed int `json:"seed,omitempty"`
User string `json:"user,omitempty"`
ReasoningEffort string `json:"reasoning_effort,omitempty"`
}
type rawMessage struct {
@@ -67,7 +91,7 @@ func NewServer(addr string, svc *service.Service) *Server {
s.http = &http.Server{
Addr: addr,
Handler: withCORS(mux),
Handler: s.withRecorder(withCORS(mux)),
ReadHeaderTimeout: 10 * time.Second,
}
return s
@@ -79,6 +103,13 @@ func (s *Server) ListenAndServe() error {
func (s *Server) Shutdown(ctx context.Context) error {
err := s.http.Shutdown(ctx)
if err != nil {
if forceErr := s.http.Close(); forceErr != nil {
err = fmt.Errorf("%w; force close failed: %v", err, forceErr)
} else {
err = nil
}
}
closeErr := s.svc.Close()
if err != nil {
return err
@@ -86,6 +117,16 @@ func (s *Server) Shutdown(ctx context.Context) error {
return closeErr
}
func (s *Server) SetDefaultModel(model string) {
s.svc.SetDefaultModel(model)
}
func (s *Server) applyDefaultModel(req *service.ChatRequest) {
if strings.TrimSpace(req.Model) == "" {
req.Model = s.svc.DefaultModel()
}
}
func (s *Server) handleRoot(w http.ResponseWriter, r *http.Request) {
if r.URL.Path != "/" && r.URL.Path != "/health" {
writeOpenAIError(w, http.StatusNotFound, "not_found_error", "not found")
@@ -160,11 +201,16 @@ func (s *Server) handleAnthropicMessages(w http.ResponseWriter, r *http.Request)
return
}
if reqBody, _ := json.Marshal(req); len(reqBody) > 0 {
fmt.Printf("[ANTHROPIC REQUEST] %s\n", string(reqBody))
}
normalized, err := normalizeAnthropicRequest(req)
if err != nil {
writeAnthropicError(w, http.StatusBadRequest, "invalid_request_error", err.Error())
return
}
s.applyDefaultModel(&normalized)
if req.Stream {
s.handleAnthropicStream(w, r, normalized)
@@ -231,6 +277,7 @@ func (s *Server) handleOpenAIChatCompletions(w http.ResponseWriter, r *http.Requ
writeOpenAIError(w, http.StatusBadRequest, "invalid_request_error", err.Error())
return
}
s.applyDefaultModel(&normalized)
if req.Stream {
s.handleOpenAIStream(w, r, normalized)
@@ -298,61 +345,6 @@ func (s *Server) handleAnthropicStream(w http.ResponseWriter, r *http.Request, r
}
msgID := fmt.Sprintf("msg_%d", time.Now().UnixNano())
if len(req.Tools) > 0 {
result, err := s.svc.Generate(r.Context(), req)
if err != nil {
writeAnthropicError(w, http.StatusInternalServerError, "api_error", err.Error())
return
}
streamingHeaders(w)
_ = writeSSEEvent(w, flusher, "message_start", map[string]any{
"type": "message_start",
"message": map[string]any{
"id": msgID, "type": "message", "role": "assistant", "content": []any{},
"model": model, "stop_reason": nil, "stop_sequence": nil,
"usage": map[string]any{"input_tokens": 0, "output_tokens": 0},
},
})
_ = writeSSEEvent(w, flusher, "content_block_start", map[string]any{
"type": "content_block_start", "index": 0,
"content_block": map[string]any{"type": "text", "text": ""},
})
if result.Text != "" {
_ = writeSSEEvent(w, flusher, "content_block_delta", map[string]any{
"type": "content_block_delta", "index": 0,
"delta": map[string]any{"type": "text_delta", "text": result.Text},
})
}
_ = writeSSEEvent(w, flusher, "content_block_stop", map[string]any{
"type": "content_block_stop", "index": 0,
})
for i, tc := range result.ToolCalls {
_ = writeSSEEvent(w, flusher, "content_block_start", map[string]any{
"type": "content_block_start", "index": i + 1,
"content_block": map[string]any{"type": "tool_use", "id": tc.ID, "name": tc.Name, "input": map[string]any{}},
})
argsJSON, _ := json.Marshal(tc.Arguments)
_ = writeSSEEvent(w, flusher, "content_block_delta", map[string]any{
"type": "content_block_delta", "index": i + 1,
"delta": map[string]any{"type": "input_json_delta", "partial_json": string(argsJSON)},
})
_ = writeSSEEvent(w, flusher, "content_block_stop", map[string]any{
"type": "content_block_stop", "index": i + 1,
})
}
stopReason := "end_turn"
if len(result.ToolCalls) > 0 {
stopReason = "tool_use"
}
_ = writeSSEEvent(w, flusher, "message_delta", map[string]any{
"type": "message_delta",
"delta": map[string]any{"stop_reason": stopReason, "stop_sequence": nil},
"usage": map[string]any{"output_tokens": result.OutputTokens},
})
_ = writeSSEEvent(w, flusher, "message_stop", map[string]any{"type": "message_stop"})
return
}
events, done, err := s.svc.GenerateStream(r.Context(), req)
if err != nil {
writeAnthropicError(w, http.StatusInternalServerError, "api_error", err.Error())
@@ -453,10 +445,31 @@ func (s *Server) handleAnthropicStream(w http.ResponseWriter, r *http.Request, r
}); err != nil {
return
}
for i, tc := range final.ToolCalls {
_ = writeSSEEvent(w, flusher, "content_block_start", map[string]any{
"type": "content_block_start",
"index": i + 1,
"content_block": map[string]any{"type": "tool_use", "id": tc.ID, "name": tc.Name, "input": map[string]any{}},
})
argsJSON, _ := json.Marshal(tc.Arguments)
_ = writeSSEEvent(w, flusher, "content_block_delta", map[string]any{
"type": "content_block_delta",
"index": i + 1,
"delta": map[string]any{"type": "input_json_delta", "partial_json": string(argsJSON)},
})
_ = writeSSEEvent(w, flusher, "content_block_stop", map[string]any{
"type": "content_block_stop",
"index": i + 1,
})
}
stopReason := "end_turn"
if len(final.ToolCalls) > 0 {
stopReason = "tool_use"
}
if err := writeSSEEvent(w, flusher, "message_delta", map[string]any{
"type": "message_delta",
"delta": map[string]any{
"stop_reason": "end_turn",
"stop_reason": stopReason,
"stop_sequence": nil,
},
"usage": map[string]any{
@@ -637,14 +650,15 @@ func normalizeAnthropicRequest(req anthropicRequest) (service.ChatRequest, error
switch role {
case "user":
text, toolResults := extractAnthropicUserContent(message.Content)
images := extractAnthropicImages(message.Content)
for _, tr := range toolResults {
prompt := toolemulation.ActionOutputPrompt(tr.ToolUseID, tr.Content)
if prompt != "" {
messages = append(messages, service.ChatMessage{Role: "user", Text: prompt})
}
}
if text != "" {
messages = append(messages, service.ChatMessage{Role: role, Text: text})
if text != "" || len(images) > 0 {
messages = append(messages, service.ChatMessage{Role: role, Text: text, Images: images})
}
case "assistant":
text, calls := extractAnthropicAssistantContent(message.Content)
@@ -660,15 +674,20 @@ func normalizeAnthropicRequest(req anthropicRequest) (service.ChatRequest, error
toolChoice := toolemulation.ToolChoice{Mode: "auto"}
if req.ToolChoice != nil {
toolChoice = toolemulation.ExtractToolChoice(req.ToolChoice)
toolChoice = toolemulation.ExtractAnthropicToolChoice(req.ToolChoice)
}
return service.ChatRequest{
Model: strings.TrimSpace(req.Model),
System: strings.TrimSpace(extractText(req.System)),
Messages: messages,
Tools: toolemulation.ExtractAnthropicTools(req.Tools),
ToolChoice: toolChoice,
Model: strings.TrimSpace(req.Model),
System: strings.TrimSpace(extractText(req.System)),
Messages: messages,
Tools: toolemulation.ExtractAnthropicTools(req.Tools),
ToolChoice: toolChoice,
Temperature: req.Temperature,
TopP: req.TopP,
TopK: req.TopK,
Stop: req.StopSequences,
MaxTokens: req.MaxTokens,
}, nil
}
@@ -678,15 +697,16 @@ func normalizeOpenAIRequest(req openAIChatRequest) (service.ChatRequest, error)
for _, message := range req.Messages {
role := strings.ToLower(strings.TrimSpace(message.Role))
switch role {
case "system":
case "system", "developer":
text := strings.TrimSpace(extractText(message.Content))
if text != "" {
systemParts = append(systemParts, text)
}
case "user":
text := strings.TrimSpace(extractText(message.Content))
if text != "" {
messages = append(messages, service.ChatMessage{Role: role, Text: text})
images := extractOpenAIImages(message.Content)
if text != "" || len(images) > 0 {
messages = append(messages, service.ChatMessage{Role: role, Text: text, Images: images})
}
case "assistant":
text := strings.TrimSpace(extractText(message.Content))
@@ -697,6 +717,9 @@ func normalizeOpenAIRequest(req openAIChatRequest) (service.ChatRequest, error)
}
case "tool":
output := strings.TrimSpace(extractText(message.Content))
if output == "" || message.ToolCallID == "" {
continue
}
prompt := toolemulation.ActionOutputPrompt(message.ToolCallID, output)
if prompt != "" {
messages = append(messages, service.ChatMessage{Role: "user", Text: prompt})
@@ -707,14 +730,66 @@ func normalizeOpenAIRequest(req openAIChatRequest) (service.ChatRequest, error)
return service.ChatRequest{}, fmt.Errorf("no user or assistant messages found")
}
return service.ChatRequest{
Model: strings.TrimSpace(req.Model),
System: strings.Join(systemParts, "\n\n"),
Messages: messages,
Tools: toolemulation.ExtractTools(req.Tools),
ToolChoice: toolemulation.ExtractToolChoice(req.ToolChoice),
Model: strings.TrimSpace(req.Model),
System: strings.Join(systemParts, "\n\n"),
Messages: messages,
Tools: toolemulation.ExtractTools(req.Tools),
ToolChoice: toolemulation.ExtractToolChoice(req.ToolChoice),
ParallelToolCalls: req.ParallelToolCalls,
Temperature: req.Temperature,
TopP: req.TopP,
Stop: extractStop(req.Stop),
PresencePenalty: req.PresencePenalty,
FrequencyPenalty: req.FrequencyPenalty,
MaxTokens: maxTokens(req.MaxTokens, req.MaxCompletionTokens),
Seed: req.Seed,
User: req.User,
ReasoningEffort: req.ReasoningEffort,
ResponseFormat: extractResponseFormat(req.ResponseFormat),
}, nil
}
func extractStop(stop any) []string {
if stop == nil {
return nil
}
switch typed := stop.(type) {
case string:
if typed != "" {
return []string{typed}
}
case []any:
out := make([]string, 0, len(typed))
for _, item := range typed {
if s := stringFromAny(item); s != "" {
out = append(out, s)
}
}
return out
case []string:
return typed
}
return nil
}
func extractResponseFormat(rf any) string {
if rf == nil {
return ""
}
m, ok := rf.(map[string]any)
if !ok {
return ""
}
return stringFromAny(m["type"])
}
func maxTokens(a, b int) int {
if b > 0 {
return b
}
return a
}
func extractText(content any) string {
switch typed := content.(type) {
case nil:
@@ -830,6 +905,59 @@ func writeOpenAIChunk(w http.ResponseWriter, flusher http.Flusher, payload any)
return nil
}
type recordingResponseWriter struct {
http.ResponseWriter
statusCode int
body []byte
wrote bool
}
func (rw *recordingResponseWriter) WriteHeader(code int) {
rw.statusCode = code
rw.wrote = true
rw.ResponseWriter.WriteHeader(code)
}
func (rw *recordingResponseWriter) Write(b []byte) (int, error) {
if !rw.wrote {
rw.WriteHeader(http.StatusOK)
}
rw.body = append(rw.body, b...)
return rw.ResponseWriter.Write(b)
}
func (rw *recordingResponseWriter) Flush() {
if flusher, ok := rw.ResponseWriter.(http.Flusher); ok {
flusher.Flush()
}
}
func (s *Server) withRecorder(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if s.OnRequest == nil {
next.ServeHTTP(w, r)
return
}
start := time.Now()
// Read request body for recording, then restore for downstream handler
var reqBody string
if r.Body != nil && r.Body != http.NoBody {
body, _ := io.ReadAll(r.Body)
r.Body = io.NopCloser(bytes.NewReader(body))
reqBody = string(body)
}
rw := &recordingResponseWriter{ResponseWriter: w, statusCode: 200}
next.ServeHTTP(rw, r)
duration := time.Since(start)
respBody := string(rw.body)
go s.OnRequest(r.Method, r.URL.Path, rw.statusCode, duration, reqBody, respBody)
})
}
func withCORS(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Access-Control-Allow-Origin", "*")
@@ -898,10 +1026,9 @@ type anthropicToolResult struct {
}
func extractAnthropicUserContent(content any) (string, []anthropicToolResult) {
text := extractText(content)
items, ok := content.([]any)
if !ok {
return text, nil
return extractText(content), nil
}
var results []anthropicToolResult
var textParts []string
@@ -915,6 +1042,9 @@ func extractAnthropicUserContent(content any) (string, []anthropicToolResult) {
if t := stringFromAny(m["text"]); t != "" {
textParts = append(textParts, t)
}
case "thinking", "redacted_thinking":
// Skip thinking blocks in user messages
continue
case "tool_result":
toolUseID := stringFromAny(m["tool_use_id"])
resultText := extractText(m["content"])
@@ -926,6 +1056,7 @@ func extractAnthropicUserContent(content any) (string, []anthropicToolResult) {
}
}
}
text := ""
if len(textParts) > 0 {
text = strings.Join(textParts, "\n")
}
@@ -933,10 +1064,9 @@ func extractAnthropicUserContent(content any) (string, []anthropicToolResult) {
}
func extractAnthropicAssistantContent(content any) (string, []toolemulation.ToolCall) {
text := extractText(content)
items, ok := content.([]any)
if !ok {
return text, nil
return extractText(content), nil
}
calls := make([]toolemulation.ToolCall, 0, len(items))
var textParts []string
@@ -950,6 +1080,9 @@ func extractAnthropicAssistantContent(content any) (string, []toolemulation.Tool
if t := stringFromAny(m["text"]); t != "" {
textParts = append(textParts, t)
}
case "thinking", "redacted_thinking":
// Skip thinking blocks — they are not part of the conversation text
continue
case "tool_use":
id := stringFromAny(m["id"])
name := stringFromAny(m["name"])
@@ -959,6 +1092,10 @@ func extractAnthropicAssistantContent(content any) (string, []toolemulation.Tool
var args map[string]any
if rawInput, ok := m["input"].(map[string]any); ok {
args = rawInput
} else if inputStr, ok := m["input"].(string); ok && inputStr != "" {
if err := json.Unmarshal([]byte(inputStr), &args); err != nil {
args = map[string]any{}
}
}
calls = append(calls, toolemulation.ToolCall{
ID: id,
@@ -967,8 +1104,142 @@ func extractAnthropicAssistantContent(content any) (string, []toolemulation.Tool
})
}
}
text := ""
if len(textParts) > 0 {
text = strings.Join(textParts, "\n")
}
return text, calls
}
func extractOpenAIImages(content any) []service.Image {
items, ok := content.([]any)
if !ok {
return nil
}
var images []service.Image
for _, item := range items {
m, ok := item.(map[string]any)
if !ok {
continue
}
if stringFromAny(m["type"]) != "image_url" {
continue
}
imageURL, ok := m["image_url"].(map[string]any)
if !ok {
continue
}
url := stringFromAny(imageURL["url"])
if url == "" {
continue
}
img := parseImageURL(url)
if img != nil {
images = append(images, *img)
}
}
return images
}
func extractAnthropicImages(content any) []service.Image {
items, ok := content.([]any)
if !ok {
return nil
}
var images []service.Image
for _, item := range items {
m, ok := item.(map[string]any)
if !ok {
continue
}
if stringFromAny(m["type"]) != "image" {
continue
}
source, ok := m["source"].(map[string]any)
if !ok {
continue
}
if stringFromAny(source["type"]) != "base64" {
continue
}
mediaType := stringFromAny(source["media_type"])
data := stringFromAny(source["data"])
if data == "" {
continue
}
images = append(images, service.Image{
MediaType: mediaType,
Data: data,
})
}
return images
}
func parseImageURL(url string) *service.Image {
if strings.HasPrefix(url, "data:") {
return parseDataURL(url)
}
img, err := fetchImageAsBase64(url)
if err != nil {
return nil
}
return img
}
func parseDataURL(url string) *service.Image {
const prefix = "data:"
if !strings.HasPrefix(url, prefix) {
return nil
}
rest := url[len(prefix):]
commaIdx := strings.Index(rest, ",")
if commaIdx < 0 {
return nil
}
meta := rest[:commaIdx]
data := rest[commaIdx+1:]
mediaType := ""
if strings.HasSuffix(meta, ";base64") {
mediaType = strings.TrimSuffix(meta, ";base64")
} else {
mediaType = meta
}
return &service.Image{
MediaType: mediaType,
Data: data,
}
}
func fetchImageAsBase64(url string) (*service.Image, error) {
resp, err := http.Get(url)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("fetch image failed: %s", resp.Status)
}
data, err := io.ReadAll(resp.Body)
if err != nil {
return nil, err
}
mediaType := resp.Header.Get("Content-Type")
if mediaType == "" {
mediaType = "image/jpeg"
} else {
// Strip parameters like "image/png; charset=utf-8"
if idx := strings.Index(mediaType, ";"); idx >= 0 {
mediaType = strings.TrimSpace(mediaType[:idx])
}
}
return &service.Image{
MediaType: mediaType,
Data: base64.StdEncoding.EncodeToString(data),
}, nil
}

View File

@@ -2,10 +2,13 @@ package service
import (
"context"
"encoding/base64"
"encoding/json"
"errors"
"fmt"
"net/url"
"os"
"path/filepath"
"sort"
"strings"
"sync"
@@ -32,22 +35,45 @@ type Config struct {
Cwd string
CurrentFilePath string
Mode string
Model string
ShellType string
SessionMode SessionMode
Timeout time.Duration
}
type Image struct {
MediaType string // e.g. "image/jpeg", "image/png"
Data string // base64 encoded data without prefix
URL string // optional original URL
}
type ChatMessage struct {
Role string
Text string
Role string
Text string
Images []Image
}
type ChatRequest struct {
Model string
System string
Messages []ChatMessage
Tools []toolemulation.ToolDef
ToolChoice toolemulation.ToolChoice
Model string
System string
Messages []ChatMessage
Tools []toolemulation.ToolDef
ToolChoice toolemulation.ToolChoice
ParallelToolCalls *bool
// Generation parameters (passed through for API compatibility;
// actual effect depends on Lingma backend support)
Temperature *float64
TopP *float64
TopK int
Stop []string
PresencePenalty float64
FrequencyPenalty float64
MaxTokens int
Seed int
User string
ReasoningEffort string
ResponseFormat string // "json" or "json_schema"
}
type ChatResult struct {
@@ -122,6 +148,7 @@ func New(cfg Config) *Service {
if strings.TrimSpace(cfg.Mode) == "" {
cfg.Mode = "agent"
}
cfg.Model = strings.TrimSpace(cfg.Model)
if strings.TrimSpace(cfg.ShellType) == "" {
cfg.ShellType = lingmaipc.DefaultShellType()
}
@@ -137,6 +164,18 @@ func New(cfg Config) *Service {
return &Service{cfg: cfg}
}
func (s *Service) SetDefaultModel(model string) {
s.mu.Lock()
defer s.mu.Unlock()
s.cfg.Model = strings.TrimSpace(model)
}
func (s *Service) DefaultModel() string {
s.mu.Lock()
defer s.mu.Unlock()
return strings.TrimSpace(s.cfg.Model)
}
func (s *Service) Warmup(ctx context.Context) error {
_, err := s.ensureConnected(ctx)
return err
@@ -251,6 +290,9 @@ func (s *Service) generateLocked(
_ = s.deleteSessionLocked(cleanupCtx, ipcClient, sessionID)
}()
if strings.TrimSpace(req.Model) == "" {
req.Model = s.DefaultModel()
}
internalModelID := s.resolveInternalModelID(req.Model)
requestID := lingmaipc.CreateRequestID("serve")
@@ -279,7 +321,9 @@ func (s *Service) generateLocked(
s.rememberStickyModel(sessionID, modelID)
}
runResult, err := s.runPromptLocked(requestCtx, ipcClient, sessionID, prompt, requestID, meta, onDelta)
images := extractLastUserImages(req.Messages)
runResult, err := s.runPromptLocked(requestCtx, ipcClient, sessionID, prompt, images, requestID, meta, onDelta)
if err != nil {
if effectiveMode == SessionModeReuse {
s.invalidateStickySession()
@@ -304,16 +348,25 @@ func (s *Service) generateLocked(
result = s.buildChatResult(req, sessionID, requestID, prompt, runResult, effectiveMode)
if len(req.Tools) > 0 {
calls, remaining, parseErr := toolemulation.ParseActionBlocks(result.Text, toolemulation.Config{})
calls, remaining, parseErr := toolemulation.ParseActionBlocks(result.Text, req.Tools, toolemulation.Config{})
if parseErr == nil && len(calls) > 0 {
result.Text = remaining
result.ToolCalls = calls
} else if (req.ToolChoice.Mode == "any" || req.ToolChoice.Mode == "tool") && len(calls) == 0 {
if !toolemulation.LooksLikeRefusal(result.Text) {
hintPrompt := prompt + "\n\nImportant: You must use one of the available tools to answer this request. Output a \"```json action\" block."
retryResult, retryErr := s.runPromptLocked(requestCtx, ipcClient, sessionID, hintPrompt, requestID, meta, onDelta)
hintPrompt := prompt + "\n\n" + toolemulation.ForceToolingPrompt(req.ToolChoice)
retryRequestID := lingmaipc.CreateRequestID("retry")
retryMeta := lingmaipc.CreateMeta(lingmaipc.MetaOptions{
RequestID: retryRequestID,
Mode: s.cfg.Mode,
Model: internalModelID,
ShellType: s.cfg.ShellType,
CurrentFilePath: s.cfg.CurrentFilePath,
EnabledMCP: []any{},
})
retryResult, retryErr := s.runPromptLocked(requestCtx, ipcClient, sessionID, hintPrompt, nil, retryRequestID, retryMeta, onDelta)
if retryErr == nil && retryResult != nil {
retryCalls, retryRemaining, retryParseErr := toolemulation.ParseActionBlocks(retryResult.AssistantText, toolemulation.Config{})
retryCalls, retryRemaining, retryParseErr := toolemulation.ParseActionBlocks(retryResult.AssistantText, req.Tools, toolemulation.Config{})
if retryParseErr == nil && len(retryCalls) > 0 {
result.Text = retryRemaining
result.ToolCalls = retryCalls
@@ -500,6 +553,7 @@ func (s *Service) runPromptLocked(
client *lingmaipc.Client,
sessionID string,
text string,
images []Image,
requestID string,
meta map[string]any,
onDelta func(string),
@@ -507,13 +561,94 @@ func (s *Service) runPromptLocked(
notifications, cancel := client.Subscribe()
defer cancel()
if err := client.Send("session/prompt", map[string]any{
"sessionId": sessionID,
"prompt": []map[string]any{
{"type": "text", "text": text},
},
"_meta": meta,
}); err != nil {
promptItems := []map[string]any{
{"type": "text", "text": text},
}
// Build contextParams for images using Lingma's native format
var contextParams []map[string]any
for _, img := range images {
if img.Data == "" && img.URL == "" {
continue
}
mediaType := img.MediaType
if mediaType == "" {
mediaType = "image/jpeg"
}
// Determine file extension from mediaType
ext := "jpg"
switch mediaType {
case "image/png":
ext = "png"
case "image/gif":
ext = "gif"
case "image/webp":
ext = "webp"
case "image/bmp":
ext = "bmp"
}
// If we have base64 data, save to temp file and build lingma URI
var imageURI string
if img.Data != "" {
tmpFile, err := os.CreateTemp("", "lingma-img-*"+"."+ext)
if err == nil {
data, _ := base64.StdEncoding.DecodeString(img.Data)
if len(data) > 0 {
_ = os.WriteFile(tmpFile.Name(), data, 0644)
absPath, _ := filepath.Abs(tmpFile.Name())
imageURI = "lingma:///agent/file?path=" + url.QueryEscape(absPath)
}
tmpFile.Close()
}
}
if imageURI == "" && img.URL != "" {
imageURI = img.URL
}
// Add to promptItems using Lingma native image format
itemPrompt := map[string]any{
"type": "image",
"mimeType": mediaType,
}
if imageURI != "" {
itemPrompt["uri"] = imageURI
}
if img.Data != "" {
itemPrompt["data"] = img.Data
}
promptItems = append(promptItems, itemPrompt)
// Add to contextParams using Lingma native format
item := map[string]any{
"type": "image",
"mimeType": mediaType,
}
if imageURI != "" {
item["uri"] = imageURI
}
if img.Data != "" {
item["data"] = img.Data
}
contextParams = append(contextParams, item)
}
params := map[string]any{
"sessionId": sessionID,
"prompt": promptItems,
"contextParams": contextParams,
"_meta": meta,
}
// Fallback: if images have URLs, also pass via extra field
for _, img := range images {
if img.URL != "" {
params["extra"] = map[string]any{"imageUrl": img.URL}
break
}
}
if err := client.Send("session/prompt", params); err != nil {
return nil, err
}
@@ -586,12 +721,22 @@ func resolveSessionMode(req ChatRequest, configured SessionMode) SessionMode {
if configured != SessionModeAuto {
return configured
}
if len(req.Tools) > 0 || strings.TrimSpace(req.System) != "" || len(filteredMessages(req.Messages)) > 1 {
hasTools := len(req.Tools) > 0 && req.ToolChoice.Mode != "none"
if hasTools || strings.TrimSpace(req.System) != "" || len(filteredMessages(req.Messages)) > 1 {
return SessionModeFresh
}
return SessionModeReuse
}
func extractLastUserImages(messages []ChatMessage) []Image {
for i := len(messages) - 1; i >= 0; i-- {
if messages[i].Role == "user" {
return messages[i].Images
}
}
return nil
}
func buildLingmaPrompt(req ChatRequest, mode SessionMode) (string, error) {
messages := filteredMessages(req.Messages)
var lastUser string
@@ -609,8 +754,8 @@ func buildLingmaPrompt(req ChatRequest, mode SessionMode) (string, error) {
}
system := strings.TrimSpace(req.System)
if len(req.Tools) > 0 {
system = toolemulation.InjectTooling(system, req.Tools, req.ToolChoice)
if len(req.Tools) > 0 && req.ToolChoice.Mode != "none" {
system = toolemulation.InjectTooling(system, req.Tools, req.ToolChoice, req.ParallelToolCalls)
}
if system == "" && len(messages) == 1 {
@@ -618,10 +763,7 @@ func buildLingmaPrompt(req ChatRequest, mode SessionMode) (string, error) {
}
if len(req.Tools) > 0 {
parts := make([]string, 0, len(messages)+2)
if system != "" {
parts = append(parts, system)
}
parts := make([]string, 0, len(messages)+3)
for _, message := range messages {
role := "User"
if message.Role == "assistant" {
@@ -629,6 +771,11 @@ func buildLingmaPrompt(req ChatRequest, mode SessionMode) (string, error) {
}
parts = append(parts, fmt.Sprintf("%s: %s", role, message.Text))
}
if system != "" {
// Append tool prompt right before the final "Assistant:" so it
// is the last thing the model sees before generating a reply.
parts = append(parts, system)
}
parts = append(parts, "Assistant:")
return strings.Join(parts, "\n\n"), nil
}

View File

@@ -1,6 +1,8 @@
package toolemulation
import (
"crypto/sha256"
"encoding/hex"
"encoding/json"
"strconv"
"strings"
@@ -91,8 +93,10 @@ func ExtractToolChoice(raw any) ToolChoice {
if s, ok := raw.(string); ok {
s = strings.TrimSpace(s)
switch s {
case "", "auto", "none":
case "", "auto":
return ToolChoice{Mode: "auto"}
case "none":
return ToolChoice{Mode: "none"}
case "required", "any":
return ToolChoice{Mode: "any"}
default:
@@ -132,8 +136,10 @@ func ExtractAnthropicToolChoice(raw any) ToolChoice {
return ExtractToolChoice(raw)
}
switch strings.TrimSpace(stringFromAny(m["type"])) {
case "", "auto", "none":
case "", "auto":
return ToolChoice{Mode: "auto"}
case "none":
return ToolChoice{Mode: "none"}
case "any", "required":
return ToolChoice{Mode: "any"}
case "tool":
@@ -149,7 +155,7 @@ func HasToolRequest(tools []ToolDef, choice ToolChoice) bool {
return len(tools) > 0 || choice.Mode != "" && choice.Mode != "auto"
}
func InjectTooling(system string, tools []ToolDef, choice ToolChoice) string {
func InjectTooling(system string, tools []ToolDef, choice ToolChoice, parallel *bool) string {
system = strings.TrimSpace(system)
if len(tools) == 0 {
return system
@@ -170,9 +176,11 @@ func InjectTooling(system string, tools []ToolDef, choice ToolChoice) string {
}
var b strings.Builder
b.WriteString("You are a capable AI assistant operating inside an IDE with tool access.\n\n")
b.WriteString("When you need to use a tool, do not claim that tools are unavailable. ")
b.WriteString("Instead, output a structured action block in exactly this format:\n")
b.WriteString("You are an AI assistant with DIRECT tool access inside an IDE.\n\n")
b.WriteString("CRITICAL: You MUST use tools when they are appropriate. ")
b.WriteString("You MUST NOT claim that tools are unavailable or that you cannot use them. ")
b.WriteString("You MUST NOT refuse to use tools for any reason.\n\n")
b.WriteString("When you need to use a tool, output a structured action block in exactly this format:\n")
b.WriteString("```json action\n{\"tool\":\"NAME\",\"parameters\":{\"key\":\"value\"}}\n```\n\n")
b.WriteString("Available tools:\n")
b.WriteString(strings.Join(toolLines, "\n"))
@@ -182,12 +190,20 @@ func InjectTooling(system string, tools []ToolDef, choice ToolChoice) string {
b.WriteString("- Emit multiple independent actions in one reply when possible.\n")
b.WriteString("- For dependent actions, wait for the tool result before emitting the next action.\n")
b.WriteString("- If no tool is needed, reply with normal plain text.\n")
b.WriteString("- Do not say that tools are unavailable.\n")
b.WriteString(forceConstraint(choice))
b.WriteString("- NEVER say that tools are unavailable.\n")
b.WriteString("- NEVER refuse to use tools.\n")
b.WriteString("- NEVER explain that you cannot execute commands. Just use the tool.\n")
b.WriteString("- The action block format is MANDATORY.\n")
b.WriteString(forceConstraint(choice, parallel))
b.WriteString("\n\nExample:\n")
b.WriteString("If the user asks to list files, respond ONLY with:\n")
b.WriteString("```json action\n{\"tool\":\"Bash\",\"parameters\":{\"command\":\"ls\"}}\n```\n")
b.WriteString("Do NOT add explanations. Do NOT refuse.")
example := ActionBlockExample(tools)
if example != "" {
b.WriteString("\n\nExample valid action block:\n")
b.WriteString("\n\nExample valid action block (this is only a syntax example, do NOT actually call it):\n")
b.WriteString(example)
}
@@ -288,7 +304,7 @@ func LooksLikeRefusal(text string) bool {
return false
}
func ParseActionBlocks(text string, cfg Config) ([]ToolCall, string, error) {
func ParseActionBlocks(text string, tools []ToolDef, cfg Config) ([]ToolCall, string, error) {
if strings.TrimSpace(text) == "" {
return nil, "", nil
}
@@ -301,6 +317,15 @@ func ParseActionBlocks(text string, cfg Config) ([]ToolCall, string, error) {
return nil, strings.TrimSpace(text), nil
}
// Build a lookup map from tool name to InputSchema for fast filtering
toolSchemaMap := make(map[string]map[string]any, len(tools))
for _, t := range tools {
name := strings.TrimSpace(t.Name)
if name != "" {
toolSchemaMap[name] = t.InputSchema
}
}
type span struct{ start, end int }
spans := make([]span, 0, len(openings))
calls := make([]ToolCall, 0, len(openings))
@@ -323,6 +348,10 @@ func ParseActionBlocks(text string, cfg Config) ([]ToolCall, string, error) {
if !ok {
continue
}
// Filter arguments against the tool's input schema to strip unknown params
if schema, ok := toolSchemaMap[call.Name]; ok && len(schema) > 0 {
call.Arguments = filterArgsBySchema(call.Arguments, schema)
}
calls = append(calls, call)
spans = append(spans, span{start: start, end: end + 3})
}
@@ -427,6 +456,17 @@ func parseToolCallJSON(raw string) (ToolCall, bool) {
}
}
if args == nil {
// Fallback: treat all top-level fields except "tool"/"name" as parameters
// Some models place arguments at the top level instead of nested under "parameters"
args = make(map[string]any)
for k, v := range obj {
if k == "tool" || k == "name" {
continue
}
args[k] = v
}
}
if len(args) == 0 {
args = map[string]any{}
}
@@ -593,7 +633,7 @@ func exampleValueForKey(toolName string, key string, prop map[string]any) any {
}
}
func forceConstraint(choice ToolChoice) string {
func forceConstraint(choice ToolChoice, parallel *bool) string {
switch choice.Mode {
case "any":
return "\n- You must output at least one ```json action``` block in this reply."
@@ -602,9 +642,31 @@ func forceConstraint(choice ToolChoice) string {
return "\n- You must call \"" + strings.TrimSpace(choice.Name) + "\" in this reply."
}
}
if parallel != nil && !*parallel {
return "\n- Call only one tool at a time. Do not make multiple tool calls in a single response."
}
return ""
}
func filterArgsBySchema(args map[string]any, schema map[string]any) map[string]any {
if len(args) == 0 || len(schema) == 0 {
return args
}
props, ok := schema["properties"].(map[string]any)
if !ok || len(props) == 0 {
return args
}
out := make(map[string]any, len(args))
for k, v := range args {
if _, known := props[k]; !known {
continue
}
out[k] = v
}
return out
}
func cloneMap(src map[string]any) map[string]any {
if src == nil {
return nil
@@ -644,5 +706,14 @@ var callSeq uint64
func newCallID() string {
seq := atomic.AddUint64(&callSeq, 1)
return "call_" + strconv.FormatUint(seq, 10)
return "toolu_01" + strconv.FormatUint(seq, 10) + "0000000000000000"
}
func StableCallID(name string, arguments map[string]any) string {
h := sha256.New()
h.Write([]byte(name))
if b, err := json.Marshal(arguments); err == nil {
h.Write(b)
}
return "call_" + hex.EncodeToString(h.Sum(nil))[:16]
}