Skip to content

Commit 0422061

Browse files
committed
fix: extract reasoning from tool calls if not available in response
Closes coder/balatrobench#10
1 parent 2e3be75 commit 0422061

1 file changed

Lines changed: 28 additions & 3 deletions

File tree

src/balatrollm/benchmark.py

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,26 @@ def extract_request_content(self, requests_file: Path) -> dict[str, str]:
304304
)
305305
return content_by_id
306306

307+
def extract_reasoning_from_tool_calls(self, tool_calls: list) -> str:
308+
"""Extract reasoning from tool call arguments if available.
309+
310+
Looks for a "reasoning" field in the parsed arguments of tool calls.
311+
Returns the first reasoning found, or empty string if none found.
312+
"""
313+
for tool_call in tool_calls:
314+
if "function" in tool_call:
315+
function = tool_call["function"]
316+
if "arguments" in function:
317+
try:
318+
# arguments is a JSON string that needs to be parsed
319+
args = json.loads(function["arguments"])
320+
if "reasoning" in args and args["reasoning"]:
321+
return args["reasoning"]
322+
except (json.JSONDecodeError, TypeError):
323+
# Skip if arguments can't be parsed as JSON
324+
continue
325+
return ""
326+
307327
def extract_response_data(self, responses_file: Path) -> dict[str, dict]:
308328
"""Extract reasoning and tool_call from responses.jsonl by custom_id."""
309329
response_by_id = {}
@@ -320,10 +340,15 @@ def extract_response_data(self, responses_file: Path) -> dict[str, dict]:
320340
choice = body["choices"][0]
321341
message = choice.get("message", {})
322342

343+
tool_calls = message.get("tool_calls", [])
344+
# Try explicit reasoning first, then fall back to tool call arguments
345+
reasoning = message.get(
346+
"reasoning", ""
347+
) or self.extract_reasoning_from_tool_calls(tool_calls)
348+
323349
response_data = {
324-
"reasoning": message.get("reasoning", "")
325-
or "Reasoning not available for the model",
326-
"tool_call": message.get("tool_calls", []),
350+
"reasoning": reasoning,
351+
"tool_call": tool_calls,
327352
}
328353
response_by_id[custom_id] = response_data
329354
return response_by_id

0 commit comments

Comments
 (0)