Skip to content

Commit 5d1e8bc

Browse files
author
lucasliu
committed
feat(api): add reasoning_effort parameter (OpenAI standard)
Maps `reasoning_effort` (high/medium/low/none) to internal `thinkingBudget` on both OpenAI and Anthropic endpoints: - high → 32768 tokens (deep thinking) - medium → 8192 tokens (balanced) - low → 1024 tokens (brief thinking) - none → 0 (disables thinking entirely) Explicit `thinking_budget` takes precedence over `reasoning_effort`. Case-insensitive matching. 3 new tests covering decode, mapping, and precedence.
1 parent 075a344 commit 5d1e8bc

4 files changed

Lines changed: 91 additions & 5 deletions

File tree

Sources/NovaMLXAPI/APIServer.swift

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -481,7 +481,7 @@ public final class NovaMLXAPIServer: @unchecked Sendable {
481481
maxTokens: ocrSampling.maxTokens,
482482
topP: anthropicReq.topP, topK: anthropicReq.topK,
483483
stream: false, stop: ocrStop,
484-
thinkingBudget: anthropicReq.thinkingBudget,
484+
thinkingBudget: anthropicReq.resolvedThinkingBudget,
485485
enableThinking: anthropicReq.resolvedEnableThinking,
486486
preserveThinking: anthropicReq.resolvedPreserveThinking
487487
)
@@ -1726,7 +1726,7 @@ public final class NovaMLXAPIServer: @unchecked Sendable {
17261726
sessionId: sessionId, responseFormat: responseFormat,
17271727
jsonSchemaDef: jsonSchemaDef,
17281728
regexPattern: regexPattern, gbnfGrammar: gbnfGrammar,
1729-
thinkingBudget: openAIReq.thinkingBudget,
1729+
thinkingBudget: openAIReq.resolvedThinkingBudget,
17301730
enableThinking: openAIReq.resolvedEnableThinking,
17311731
preserveThinking: openAIReq.resolvedPreserveThinking
17321732
)
@@ -1863,7 +1863,7 @@ public final class NovaMLXAPIServer: @unchecked Sendable {
18631863
sessionId: sessionId, responseFormat: responseFormat,
18641864
jsonSchemaDef: jsonSchemaDef,
18651865
regexPattern: regexPattern, gbnfGrammar: gbnfGrammar,
1866-
thinkingBudget: openAIReq.thinkingBudget,
1866+
thinkingBudget: openAIReq.resolvedThinkingBudget,
18671867
enableThinking: openAIReq.resolvedEnableThinking,
18681868
preserveThinking: openAIReq.resolvedPreserveThinking
18691869
)

Sources/NovaMLXAPI/AnthropicTypes.swift

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ public struct AnthropicRequest: Codable, Sendable {
3535
public let enableThinking: Bool?
3636
public let preserveThinking: Bool?
3737
public let chatTemplateKwargs: [String: AnyCodable]?
38+
public let reasoningEffort: String?
3839

3940
private enum CodingKeys: String, CodingKey {
4041
case model, messages, maxTokens = "max_tokens", system, temperature
@@ -44,6 +45,7 @@ public struct AnthropicRequest: Codable, Sendable {
4445
case enableThinking = "enable_thinking"
4546
case preserveThinking = "preserve_thinking"
4647
case chatTemplateKwargs = "chat_template_kwargs"
48+
case reasoningEffort = "reasoning_effort"
4749
}
4850

4951
public init(
@@ -61,7 +63,8 @@ public struct AnthropicRequest: Codable, Sendable {
6163
thinkingBudget: Int? = nil,
6264
enableThinking: Bool? = nil,
6365
preserveThinking: Bool? = nil,
64-
chatTemplateKwargs: [String: AnyCodable]? = nil
66+
chatTemplateKwargs: [String: AnyCodable]? = nil,
67+
reasoningEffort: String? = nil
6568
) {
6669
self.model = model
6770
self.messages = messages
@@ -78,6 +81,7 @@ public struct AnthropicRequest: Codable, Sendable {
7881
self.enableThinking = enableThinking
7982
self.preserveThinking = preserveThinking
8083
self.chatTemplateKwargs = chatTemplateKwargs
84+
self.reasoningEffort = reasoningEffort
8185
}
8286

8387
/// Resolve thinking toggle from multiple client formats, mirrors OpenAI pattern
@@ -104,6 +108,19 @@ public struct AnthropicRequest: Codable, Sendable {
104108
}
105109
return nil
106110
}
111+
112+
/// Map reasoning_effort to thinkingBudget (same mapping as OpenAI).
113+
public var resolvedThinkingBudget: Int? {
114+
if let budget = thinkingBudget { return budget }
115+
guard let effort = reasoningEffort?.lowercased() else { return nil }
116+
switch effort {
117+
case "high": return 32768
118+
case "medium": return 8192
119+
case "low": return 1024
120+
case "none": return 0
121+
default: return nil
122+
}
123+
}
107124
}
108125

109126
public struct AnthropicTokenCountRequest: Codable, Sendable {

Sources/NovaMLXAPI/OpenAITypes.swift

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,7 @@ public struct OpenAIRequest: Codable, Sendable {
179179
public let enableThinking: Bool?
180180
public let preserveThinking: Bool?
181181
public let chatTemplateKwargs: [String: AnyCodable]?
182+
public let reasoningEffort: String?
182183

183184
private enum CodingKeys: String, CodingKey {
184185
case model, messages, temperature, stream, stop, n, seed, tools
@@ -197,6 +198,7 @@ public struct OpenAIRequest: Codable, Sendable {
197198
case enableThinking = "enable_thinking"
198199
case preserveThinking = "preserve_thinking"
199200
case chatTemplateKwargs = "chat_template_kwargs"
201+
case reasoningEffort = "reasoning_effort"
200202
}
201203

202204
public init(
@@ -222,7 +224,8 @@ public struct OpenAIRequest: Codable, Sendable {
222224
thinkingBudget: Int? = nil,
223225
enableThinking: Bool? = nil,
224226
preserveThinking: Bool? = nil,
225-
chatTemplateKwargs: [String: AnyCodable]? = nil
227+
chatTemplateKwargs: [String: AnyCodable]? = nil,
228+
reasoningEffort: String? = nil
226229
) {
227230
self.model = model
228231
self.messages = messages
@@ -247,6 +250,7 @@ public struct OpenAIRequest: Codable, Sendable {
247250
self.enableThinking = enableThinking
248251
self.preserveThinking = preserveThinking
249252
self.chatTemplateKwargs = chatTemplateKwargs
253+
self.reasoningEffort = reasoningEffort
250254
}
251255

252256
/// Resolve thinking toggle from multiple client formats:
@@ -286,6 +290,25 @@ public struct OpenAIRequest: Codable, Sendable {
286290
return nil
287291
}
288292

293+
/// Map OpenAI-standard `reasoning_effort` to internal `thinkingBudget`.
294+
/// Only used when `thinkingBudget` is not explicitly set — explicit budget wins.
295+
/// - "high" → 32768 tokens (deep thinking)
296+
/// - "medium" → 8192 tokens (balanced)
297+
/// - "low" → 1024 tokens (brief thinking)
298+
/// - "none" → 0 (disables thinking entirely)
299+
public var resolvedThinkingBudget: Int? {
300+
// Explicit thinking_budget takes precedence
301+
if let budget = thinkingBudget { return budget }
302+
guard let effort = reasoningEffort?.lowercased() else { return nil }
303+
switch effort {
304+
case "high": return 32768
305+
case "medium": return 8192
306+
case "low": return 1024
307+
case "none": return 0
308+
default: return nil
309+
}
310+
}
311+
289312
}
290313

291314
public enum MessageContent: Codable, Sendable {

Tests/NovaMLXAPITests/APITests.swift

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -442,4 +442,50 @@ struct APITypesTests {
442442
#expect(img.url == "https://example.com/photo.jpg")
443443
#expect(img.detail == "high")
444444
}
445+
446+
// MARK: - reasoning_effort mapping
447+
448+
@Test("reasoning_effort decodes from JSON")
449+
func reasoningEffortDecodes() throws {
450+
let json = """
451+
{"model":"test","messages":[],"reasoning_effort":"high"}
452+
"""
453+
let data = json.data(using: .utf8)!
454+
let req = try JSONDecoder().decode(OpenAIRequest.self, from: data)
455+
#expect(req.reasoningEffort == "high")
456+
}
457+
458+
@Test("reasoning_effort maps to thinking budget")
459+
func reasoningEffortMapping() throws {
460+
let cases: [(String?, Int?)] = [
461+
("high", 32768),
462+
("medium", 8192),
463+
("low", 1024),
464+
("none", 0),
465+
("HIGH", 32768), // case-insensitive
466+
(nil, nil),
467+
("unknown", nil),
468+
]
469+
for (effort, expected) in cases {
470+
let req = OpenAIRequest(
471+
model: "test",
472+
messages: [],
473+
reasoningEffort: effort
474+
)
475+
#expect(req.resolvedThinkingBudget == expected,
476+
"reasoning_effort=\(effort ?? "nil") should map to \(String(describing: expected))")
477+
}
478+
}
479+
480+
@Test("explicit thinking_budget wins over reasoning_effort")
481+
func thinkingBudgetPrecedence() throws {
482+
let req = OpenAIRequest(
483+
model: "test",
484+
messages: [],
485+
thinkingBudget: 2048,
486+
reasoningEffort: "high"
487+
)
488+
#expect(req.resolvedThinkingBudget == 2048,
489+
"Explicit thinking_budget should take precedence over reasoning_effort")
490+
}
445491
}

0 commit comments

Comments
 (0)