1+ {
2+ "generated_at" : " 2025-09-04T10:11:00.427201" ,
3+ "version" : " 0.7.0" ,
4+ "strategy" : " default" ,
5+ "total_entries" : 3 ,
6+ "total_runs_analyzed" : 14 ,
7+ "entries" : [
8+ {
9+ "rank" : 1 ,
10+ "config" : {
11+ "model" : " qwen/qwen3-235b-a22b-thinking-2507" ,
12+ "strategy" : " default" ,
13+ "deck" : " Red Deck" ,
14+ "stake" : 1 ,
15+ "seed" : " OOOO155" ,
16+ "challenge" : null ,
17+ "version" : " 0.7.0" ,
18+ "name" : " Unknown Name" ,
19+ "description" : " Unknown Description" ,
20+ "author" : " BalatroBench" ,
21+ "tags" : []
22+ },
23+ "total_runs" : 5 ,
24+ "completed_runs" : 5 ,
25+ "won_runs" : 0 ,
26+ "averaged_stats" : {
27+ "avg_final_round" : 3.4 ,
28+ "avg_ante_reached" : 1.6 ,
29+ "avg_jokers_bought" : 0 ,
30+ "avg_jokers_sold" : 0.4 ,
31+ "avg_consumables_used" : 0.6 ,
32+ "avg_rerolls" : 0.6 ,
33+ "avg_money_spent" : 12.8 ,
34+ "avg_successful_calls" : 29 ,
35+ "avg_invalid_responses" : 0 ,
36+ "avg_failed_calls" : 2 ,
37+ "avg_avg_input_tokens" : 2749.81 ,
38+ "avg_avg_output_tokens" : 2643.91 ,
39+ "avg_avg_reasoning_tokens" : 2507.57 ,
40+ "avg_avg_total_tokens" : 5393.72 ,
41+ "avg_avg_response_time_ms" : 2319.17 ,
42+ "avg_total_input_tokens" : 81573.4 ,
43+ "avg_total_output_tokens" : 78361.6 ,
44+ "avg_total_reasoning_tokens" : 74651.2 ,
45+ "avg_total_tokens" : 159935 ,
46+ "avg_total_response_time_ms" : 62944 ,
47+ "avg_total_cost" : 0.26 ,
48+ "avg_avg_cost_per_call" : 0.01 ,
49+ "avg_total_upstream_inference_cost" : 0 ,
50+ "avg_total_upstream_prompt_cost" : 0.02 ,
51+ "avg_total_upstream_completion_cost" : 0.24 ,
52+ "avg_providers_used_count" : 1 ,
53+ "avg_reasoning_calls" : 0 ,
54+ "avg_avg_reasoning_content_length" : 0.0 ,
55+ "avg_total_reasoning_content_length" : 0 ,
56+ "avg_request_ids_count" : 29
57+ }
58+ },
59+ {
60+ "rank" : 2 ,
61+ "config" : {
62+ "model" : " openai/gpt-oss-120b" ,
63+ "strategy" : " default" ,
64+ "deck" : " Red Deck" ,
65+ "stake" : 1 ,
66+ "seed" : " OOOO155" ,
67+ "challenge" : null ,
68+ "version" : " 0.7.0" ,
69+ "name" : " Unknown Name" ,
70+ "description" : " Unknown Description" ,
71+ "author" : " BalatroBench" ,
72+ "tags" : []
73+ },
74+ "total_runs" : 4 ,
75+ "completed_runs" : 4 ,
76+ "won_runs" : 0 ,
77+ "averaged_stats" : {
78+ "avg_final_round" : 3 ,
79+ "avg_ante_reached" : 2 ,
80+ "avg_jokers_bought" : 0 ,
81+ "avg_jokers_sold" : 0.25 ,
82+ "avg_consumables_used" : 0.75 ,
83+ "avg_rerolls" : 0 ,
84+ "avg_money_spent" : 10 ,
85+ "avg_successful_calls" : 24.75 ,
86+ "avg_invalid_responses" : 0.5 ,
87+ "avg_failed_calls" : 0.25 ,
88+ "avg_avg_input_tokens" : 2210.44 ,
89+ "avg_avg_output_tokens" : 610.14 ,
90+ "avg_avg_reasoning_tokens" : 484.28 ,
91+ "avg_avg_total_tokens" : 2820.58 ,
92+ "avg_avg_response_time_ms" : 2566.22 ,
93+ "avg_total_input_tokens" : 54700.5 ,
94+ "avg_total_output_tokens" : 15144 ,
95+ "avg_total_reasoning_tokens" : 12037 ,
96+ "avg_total_tokens" : 69844.5 ,
97+ "avg_total_response_time_ms" : 64102 ,
98+ "avg_total_cost" : 0.01 ,
99+ "avg_avg_cost_per_call" : 0.0 ,
100+ "avg_total_upstream_inference_cost" : 0 ,
101+ "avg_total_upstream_prompt_cost" : 0.0 ,
102+ "avg_total_upstream_completion_cost" : 0.01 ,
103+ "avg_providers_used_count" : 1 ,
104+ "avg_reasoning_calls" : 0 ,
105+ "avg_avg_reasoning_content_length" : 0.0 ,
106+ "avg_total_reasoning_content_length" : 0 ,
107+ "avg_request_ids_count" : 24.75
108+ }
109+ },
110+ {
111+ "rank" : 3 ,
112+ "config" : {
113+ "model" : " openai/gpt-oss-20b" ,
114+ "strategy" : " default" ,
115+ "deck" : " Red Deck" ,
116+ "stake" : 1 ,
117+ "seed" : " OOOO155" ,
118+ "challenge" : null ,
119+ "version" : " 0.7.0" ,
120+ "name" : " Unknown Name" ,
121+ "description" : " Unknown Description" ,
122+ "author" : " BalatroBench" ,
123+ "tags" : []
124+ },
125+ "total_runs" : 5 ,
126+ "completed_runs" : 4 ,
127+ "won_runs" : 0 ,
128+ "averaged_stats" : {
129+ "avg_final_round" : 2 ,
130+ "avg_ante_reached" : 1.2 ,
131+ "avg_jokers_bought" : 0 ,
132+ "avg_jokers_sold" : 0 ,
133+ "avg_consumables_used" : 0.4 ,
134+ "avg_rerolls" : 0 ,
135+ "avg_money_spent" : 1.2 ,
136+ "avg_successful_calls" : 12 ,
137+ "avg_invalid_responses" : 2 ,
138+ "avg_failed_calls" : 0 ,
139+ "avg_avg_input_tokens" : 2284.9 ,
140+ "avg_avg_output_tokens" : 725.17 ,
141+ "avg_avg_reasoning_tokens" : 605.28 ,
142+ "avg_avg_total_tokens" : 3010.07 ,
143+ "avg_avg_response_time_ms" : 2795.64 ,
144+ "avg_total_input_tokens" : 27975.2 ,
145+ "avg_total_output_tokens" : 9010.4 ,
146+ "avg_total_reasoning_tokens" : 7446 ,
147+ "avg_total_tokens" : 36985.6 ,
148+ "avg_total_response_time_ms" : 29405.6 ,
149+ "avg_total_cost" : 0.0 ,
150+ "avg_avg_cost_per_call" : 0.0 ,
151+ "avg_total_upstream_inference_cost" : 0 ,
152+ "avg_total_upstream_prompt_cost" : 0.0 ,
153+ "avg_total_upstream_completion_cost" : 0.0 ,
154+ "avg_providers_used_count" : 1.2 ,
155+ "avg_reasoning_calls" : 0 ,
156+ "avg_avg_reasoning_content_length" : 0.0 ,
157+ "avg_total_reasoning_content_length" : 0 ,
158+ "avg_request_ids_count" : 12
159+ }
160+ }
161+ ]
162+ }
0 commit comments