Skip to content

Commit 681cc8c

Browse files
committed
Fix OpenAI Chat transforms silently dropping :document shorthand
The normalize_message method in OpenAI Chat transforms handled :image shorthand but not :document. When { document: "data:..." } was passed, the document data was silently dropped and leaked as an extra param. Changes: - Add :document handling with URL and data URI support - Add :document to extra_params.except() to prevent leaking - Add build_file_content helper for file content blocks - Add 4 tests covering document shorthand scenarios Fixes #331
1 parent 9a5c6a2 commit 681cc8c

2 files changed

Lines changed: 84 additions & 1 deletion

File tree

lib/active_agent/providers/open_ai/chat/transforms.rb

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# frozen_string_literal: true
22

33
require "active_support/core_ext/hash/keys"
4+
require "uri"
45

56
module ActiveAgent
67
module Providers
@@ -124,10 +125,19 @@ def normalize_message(message)
124125
{ type: "text", text: msg_hash[:text] },
125126
{ type: "image_url", image_url: { url: msg_hash[:image] } }
126127
]
128+
elsif msg_hash.key?(:text) && msg_hash.key?(:document)
129+
# Shorthand with both text and document: { text: "...", document: "url" }
130+
[
131+
{ type: "text", text: msg_hash[:text] },
132+
build_file_content(msg_hash[:document])
133+
]
127134
elsif msg_hash.key?(:image)
128135
# Shorthand with only image: { image: "url" }
129136
# Text comes from adjacent prompt arguments
130137
[ { type: "image_url", image_url: { url: msg_hash[:image] } } ]
138+
elsif msg_hash.key?(:document)
139+
# Shorthand with only document: { document: "url" }
140+
[ build_file_content(msg_hash[:document]) ]
131141
elsif msg_hash.key?(:text)
132142
# Shorthand: { text: "..." } or { role: "...", text: "..." }
133143
msg_hash[:text]
@@ -137,13 +147,39 @@ def normalize_message(message)
137147
end
138148

139149
# Create appropriate message param based on role and content
140-
extra_params = msg_hash.except(:role, :content, :text, :image)
150+
extra_params = msg_hash.except(:role, :content, :text, :image, :document)
141151
create_message_param(role, content, extra_params)
142152
else
143153
raise ArgumentError, "Cannot normalize #{message.class} to message"
144154
end
145155
end
146156

157+
# Builds a file content block for the Chat API file type
158+
#
159+
# Handles both URL and data URI formats:
160+
# - URL: "http://example.com/document.pdf" → { type: "file", file: { filename: "...", url: "..." } }
161+
# - Data URI: "data:application/pdf;base64,..." → { type: "file", file: { filename: "...", file_data: "..." } }
162+
#
163+
# @param document [String] URL or data URI
164+
# @return [Hash] file content block
165+
def build_file_content(document)
166+
if document.start_with?("data:")
167+
# Data URI - extract or infer filename from media type
168+
media_type = document.match(%r{\Adata:([^;,]+)})&.[](1) || "application/octet-stream"
169+
extension = media_type.split("/").last&.gsub(/[^a-zA-Z0-9]/, "_") || "bin"
170+
filename = "document.#{extension}"
171+
{ type: "file", file: { filename: filename, file_data: document } }
172+
else
173+
# Regular URL
174+
filename = File.basename(URI.parse(document).path.to_s)
175+
filename = "document.pdf" if filename.empty?
176+
{ type: "file", file: { filename: filename, url: document } }
177+
end
178+
rescue URI::Error
179+
# Fallback for invalid URLs
180+
{ type: "file", file: { filename: "document.pdf", url: document } }
181+
end
182+
147183
# Creates the appropriate gem message param class for the given role
148184
#
149185
# @param role [String] message role (developer, system, user, assistant, tool, function)

test/providers/open_ai/chat/transforms_test.rb

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,53 @@ def transforms
203203
assert_equal "image_url", result.content[1][:type]
204204
end
205205

206+
test "normalize_message handles shorthand document format with URL" do
207+
message = { document: "http://example.com/document.pdf" }
208+
209+
result = transforms.normalize_message(message)
210+
211+
assert_equal :user, result.role
212+
assert_equal 1, result.content.size
213+
assert_equal "file", result.content[0][:type]
214+
assert_equal "http://example.com/document.pdf", result.content[0][:file][:url]
215+
assert_equal "document.pdf", result.content[0][:file][:filename]
216+
end
217+
218+
test "normalize_message handles shorthand document format with data URI" do
219+
message = { document: "data:application/pdf;base64,JVBERi0xL..." }
220+
221+
result = transforms.normalize_message(message)
222+
223+
assert_equal :user, result.role
224+
assert_equal 1, result.content.size
225+
assert_equal "file", result.content[0][:type]
226+
assert_equal "data:application/pdf;base64,JVBERi0xL...", result.content[0][:file][:file_data]
227+
assert_equal "document.pdf", result.content[0][:file][:filename]
228+
end
229+
230+
test "normalize_message handles text and document shorthand" do
231+
message = { text: "Analyze this", document: "http://example.com/document.pdf" }
232+
233+
result = transforms.normalize_message(message)
234+
235+
assert_equal :user, result.role
236+
assert_equal 2, result.content.size
237+
assert_equal "text", result.content[0][:type]
238+
assert_equal "Analyze this", result.content[0][:text]
239+
assert_equal "file", result.content[1][:type]
240+
assert_equal "http://example.com/document.pdf", result.content[1][:file][:url]
241+
end
242+
243+
test "normalize_message does not leak document as extra param" do
244+
message = { role: "user", document: "http://example.com/document.pdf" }
245+
246+
result = transforms.normalize_message(message)
247+
248+
# Document should be in content, not as an extra param on the message object
249+
serialized = transforms.gem_to_hash(result)
250+
assert_nil serialized[:document], "document key should not leak as extra param"
251+
end
252+
206253
test "normalize_message handles hash without role as user" do
207254
message = { content: "hello" }
208255

0 commit comments

Comments
 (0)