Skip to content

Commit e5a3140

Browse files
Merge pull request #1185 from Kiln-AI/dchiang/KIL-467/openai-sdg-required-missing-v2
Fix OpenAI SDG structured output schemas missing required fields
2 parents 02d0912 + aacc754 commit e5a3140

File tree

4 files changed

+181
-5
lines changed

4 files changed

+181
-5
lines changed

libs/core/kiln_ai/adapters/model_adapters/litellm_adapter.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -430,7 +430,7 @@ def json_schema_response_format(self) -> dict[str, Any]:
430430
raise ValueError(
431431
"Invalid output schema for this task. Cannot use JSON schema response format."
432432
)
433-
output_schema = close_object_schemas(output_schema)
433+
output_schema = close_object_schemas(output_schema, strict=True)
434434
return {
435435
"response_format": {
436436
"type": "json_schema",
@@ -448,7 +448,7 @@ def tool_call_params(self, strict: bool) -> dict[str, Any]:
448448
raise ValueError(
449449
"Invalid output schema for this task. Can not use tool calls."
450450
)
451-
output_schema = close_object_schemas(output_schema)
451+
output_schema = close_object_schemas(output_schema, strict=strict)
452452

453453
function_params = {
454454
"name": "task_response",

libs/core/kiln_ai/adapters/model_adapters/test_litellm_adapter.py

Lines changed: 77 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ async def test_response_format_options_json_schema(config, mock_task):
200200
patch.object(adapter, "has_structured_output", return_value=True),
201201
):
202202
options = await adapter.response_format_options()
203-
expected_schema = close_object_schemas(mock_task.output_schema())
203+
expected_schema = close_object_schemas(mock_task.output_schema(), strict=True)
204204
assert options == {
205205
"response_format": {
206206
"type": "json_schema",
@@ -240,7 +240,7 @@ def test_tool_call_params_strict(config, mock_task):
240240
adapter = LiteLlmAdapter(config=config, kiln_task=mock_task)
241241

242242
params = adapter.tool_call_params(strict=True)
243-
expected_schema = close_object_schemas(mock_task.output_schema())
243+
expected_schema = close_object_schemas(mock_task.output_schema(), strict=True)
244244

245245
assert params == {
246246
"tools": [
@@ -260,6 +260,81 @@ def test_tool_call_params_strict(config, mock_task):
260260
}
261261

262262

263+
def test_tool_call_params_strict_adds_required_to_nested(config, tmp_path):
264+
project_path = tmp_path / "test_project_nested" / "project.kiln"
265+
project_path.parent.mkdir()
266+
project = Project(name="Nested Project", path=str(project_path))
267+
project.save_to_file()
268+
269+
nested_schema = {
270+
"type": "object",
271+
"properties": {
272+
"user": {
273+
"type": "object",
274+
"properties": {
275+
"name": {"type": "string"},
276+
"age": {"type": "integer"},
277+
},
278+
},
279+
"status": {"type": "string"},
280+
},
281+
}
282+
task = Task(
283+
name="Nested Task",
284+
instruction="Test instruction",
285+
parent=project,
286+
output_json_schema=json.dumps(nested_schema),
287+
)
288+
task.save_to_file()
289+
290+
adapter = LiteLlmAdapter(config=config, kiln_task=task)
291+
params = adapter.tool_call_params(strict=True)
292+
293+
result_schema = params["tools"][0]["function"]["parameters"]
294+
assert result_schema["required"] == ["user", "status"]
295+
assert result_schema["properties"]["user"]["required"] == ["name", "age"]
296+
297+
298+
@pytest.mark.asyncio
299+
async def test_json_schema_response_format_adds_required_to_nested(config, tmp_path):
300+
project_path = tmp_path / "test_project_nested2" / "project.kiln"
301+
project_path.parent.mkdir()
302+
project = Project(name="Nested Project 2", path=str(project_path))
303+
project.save_to_file()
304+
305+
nested_schema = {
306+
"type": "object",
307+
"properties": {
308+
"result": {
309+
"type": "object",
310+
"properties": {
311+
"value": {"type": "number"},
312+
"unit": {"type": "string"},
313+
},
314+
},
315+
},
316+
}
317+
task = Task(
318+
name="Nested Task 2",
319+
instruction="Test instruction",
320+
parent=project,
321+
output_json_schema=json.dumps(nested_schema),
322+
)
323+
task.save_to_file()
324+
325+
config.run_config_properties.structured_output_mode = (
326+
StructuredOutputMode.json_schema
327+
)
328+
adapter = LiteLlmAdapter(config=config, kiln_task=task)
329+
330+
with patch.object(adapter, "has_structured_output", return_value=True):
331+
options = await adapter.response_format_options()
332+
333+
result_schema = options["response_format"]["json_schema"]["schema"]
334+
assert result_schema["required"] == ["result"]
335+
assert result_schema["properties"]["result"]["required"] == ["value", "unit"]
336+
337+
263338
@pytest.mark.parametrize(
264339
"provider_name,expected_prefix",
265340
[

libs/core/kiln_ai/datamodel/json_schema.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,14 +157,18 @@ def single_string_field_name(schema: Dict) -> str | None:
157157
return None
158158

159159

160-
def close_object_schemas(schema: Dict) -> Dict:
160+
def close_object_schemas(schema: Dict, strict: bool = False) -> Dict:
161161
"""Return a deep-copied schema with object nodes closed by default.
162162
163163
Any schema node with 'type == "object"' gets 'additionalProperties: false'
164164
if it is not already set. This normalization is recursive and walks nested
165165
schema structures such as 'properties', 'items', '$defs', and composed
166166
schemas like 'anyOf'/'oneOf'/'allOf'. Existing explicit
167167
'additionalProperties' values are preserved.
168+
169+
When strict=True, also sets 'required' to list all property keys on every
170+
object node with 'properties'. This is needed for OpenAI's strict
171+
structured output mode, which does not support optional properties.
168172
"""
169173

170174
def _normalize(node: Any) -> Any:
@@ -203,6 +207,13 @@ def _normalize(node: Any) -> Any:
203207
):
204208
normalized["additionalProperties"] = False
205209

210+
if (
211+
strict
212+
and normalized.get("type") == "object"
213+
and isinstance(normalized.get("properties"), dict)
214+
):
215+
normalized["required"] = list(normalized["properties"].keys())
216+
206217
return normalized
207218

208219
return _normalize(schema)

libs/core/kiln_ai/datamodel/test_json_schema.py

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,96 @@ def test_close_object_schemas_preserves_explicit_additional_properties():
248248
assert normalized["properties"]["metadata"]["additionalProperties"] is True
249249

250250

251+
def test_close_object_schemas_strict_adds_required():
252+
schema = {
253+
"type": "object",
254+
"properties": {
255+
"name": {"type": "string"},
256+
"age": {"type": "integer"},
257+
},
258+
}
259+
result = close_object_schemas(schema, strict=True)
260+
assert result["required"] == ["name", "age"]
261+
assert result["additionalProperties"] is False
262+
263+
264+
def test_close_object_schemas_strict_overwrites_partial_required():
265+
schema = {
266+
"type": "object",
267+
"properties": {
268+
"name": {"type": "string"},
269+
"age": {"type": "integer"},
270+
"email": {"type": "string"},
271+
},
272+
"required": ["name"],
273+
}
274+
result = close_object_schemas(schema, strict=True)
275+
assert result["required"] == ["name", "age", "email"]
276+
277+
278+
def test_close_object_schemas_strict_nested():
279+
schema = {
280+
"type": "object",
281+
"properties": {
282+
"user": {
283+
"type": "object",
284+
"properties": {
285+
"name": {"type": "string"},
286+
"address": {
287+
"type": "object",
288+
"properties": {
289+
"street": {"type": "string"},
290+
"city": {"type": "string"},
291+
},
292+
},
293+
},
294+
},
295+
"tags": {
296+
"type": "array",
297+
"items": {
298+
"type": "object",
299+
"properties": {
300+
"key": {"type": "string"},
301+
"value": {"type": "string"},
302+
},
303+
},
304+
},
305+
},
306+
}
307+
result = close_object_schemas(schema, strict=True)
308+
assert result["required"] == ["user", "tags"]
309+
assert result["properties"]["user"]["required"] == ["name", "address"]
310+
assert result["properties"]["user"]["properties"]["address"]["required"] == [
311+
"street",
312+
"city",
313+
]
314+
assert result["properties"]["tags"]["items"]["required"] == ["key", "value"]
315+
316+
317+
def test_close_object_schemas_non_strict_no_required():
318+
schema = {
319+
"type": "object",
320+
"properties": {
321+
"name": {"type": "string"},
322+
"age": {"type": "integer"},
323+
},
324+
}
325+
result = close_object_schemas(schema)
326+
assert "required" not in result
327+
328+
result_explicit = close_object_schemas(schema, strict=False)
329+
assert "required" not in result_explicit
330+
331+
332+
def test_close_object_schemas_strict_no_properties():
333+
schema = {
334+
"type": "object",
335+
"additionalProperties": {"type": "string"},
336+
}
337+
result = close_object_schemas(schema, strict=True)
338+
assert "required" not in result
339+
340+
251341
@pytest.mark.parametrize(
252342
"schema,expected",
253343
[

0 commit comments

Comments
 (0)