Skip to content

Commit 456acf7

Browse files
committed
Bring back the image support
1 parent ff89544 commit 456acf7

2 files changed

Lines changed: 50 additions & 30 deletions

File tree

src/repl/llm/llamacpp.go

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ func (p *LlamaCppProvider) SendMessage(messages []Message, stream bool, images [
8888
}
8989

9090
request := map[string]interface{}{
91-
"messages": buildRequestMessages(messages, images),
91+
"messages": mergeImagesIntoLastUser(messages, images),
9292
}
9393
if effectiveModel != "" {
9494
request["model"] = effectiveModel
@@ -355,3 +355,51 @@ func parseLlamaCppModels(body []byte) []Model {
355355
}}
356356
}
357357

358+
// mergeImagesIntoLastUser returns a JSON-ready messages slice that matches
359+
// the standard OpenAI vision format: image blocks live inside the same user
360+
// turn as the prompt text. The last user message's content is replaced with
361+
// a combined [text, image_url...] block array; if no user message exists yet
362+
// a new one is appended. Shared by all OpenAI-compatible providers (OpenAI,
363+
// llama.cpp, ...) that accept structured content blocks for images.
364+
func mergeImagesIntoLastUser(messages []Message, images []string) []interface{} {
365+
out := make([]interface{}, 0, len(messages)+1)
366+
for _, m := range messages {
367+
out = append(out, m)
368+
}
369+
if len(images) == 0 {
370+
return out
371+
}
372+
373+
blocks := make([]ContentBlock, 0, len(images)+1)
374+
for _, uri := range images {
375+
blocks = append(blocks, ContentBlock{
376+
Type: "image_url",
377+
ImageURL: &struct {
378+
URL string `json:"url"`
379+
}{URL: uri},
380+
})
381+
}
382+
383+
for i := len(out) - 1; i >= 0; i-- {
384+
msg, ok := out[i].(Message)
385+
if !ok || msg.Role != "user" {
386+
continue
387+
}
388+
combined := blocks
389+
if strings.TrimSpace(msg.Content) != "" {
390+
combined = append([]ContentBlock{{Type: "text", Text: msg.Content}}, blocks...)
391+
}
392+
out[i] = map[string]interface{}{
393+
"role": "user",
394+
"content": combined,
395+
}
396+
return out
397+
}
398+
399+
out = append(out, map[string]interface{}{
400+
"role": "user",
401+
"content": blocks,
402+
})
403+
return out
404+
}
405+

src/repl/llm/openai.go

Lines changed: 1 addition & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -333,34 +333,6 @@ func (p *OpenAIProvider) ListModels(ctx context.Context) ([]Model, error) {
333333
// nothing more to do; parsing helper already returned results or an error
334334
}
335335

336-
// buildRequestMessages returns a JSON-ready slice of chat messages. Regular
337-
// messages are forwarded as-is (their Content is a plain string). When image
338-
// URIs are supplied, a user message carrying the OpenAI-style content blocks
339-
// is prepended — that payload is assembled locally as a map so the public
340-
// Message type can keep a flat string content.
341-
func buildRequestMessages(messages []Message, images []string) []interface{} {
342-
out := make([]interface{}, 0, len(messages)+1)
343-
if len(images) > 0 {
344-
blocks := make([]ContentBlock, 0, len(images))
345-
for _, uri := range images {
346-
blocks = append(blocks, ContentBlock{
347-
Type: "image_url",
348-
ImageURL: &struct {
349-
URL string `json:"url"`
350-
}{URL: uri},
351-
})
352-
}
353-
out = append(out, map[string]interface{}{
354-
"role": "user",
355-
"content": blocks,
356-
})
357-
}
358-
for _, m := range messages {
359-
out = append(out, m)
360-
}
361-
return out
362-
}
363-
364336
func (p *OpenAIProvider) SendMessage(messages []Message, stream bool, images []string, tools []OpenAITool) (string, error) {
365337
provider := strings.ToLower(p.config.PROVIDER)
366338

@@ -386,7 +358,7 @@ func (p *OpenAIProvider) SendMessage(messages []Message, stream bool, images []s
386358
}
387359
request := map[string]interface{}{
388360
"model": effectiveModel,
389-
"messages": buildRequestMessages(messages, images),
361+
"messages": mergeImagesIntoLastUser(messages, images),
390362
}
391363

392364
// Add tools if provided

0 commit comments

Comments
 (0)