Skip to content

Commit 4a3d833

Browse files
committed
feat: add benchmarks data for v0.6.0
1 parent e09cd19 commit 4a3d833

5 files changed

Lines changed: 904 additions & 0 deletions

File tree

Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
{
2+
"config": {
3+
"model": "cerebras/gpt-oss-120b",
4+
"strategy": "default",
5+
"deck": "Red Deck",
6+
"stake": 1,
7+
"seed": "OOOO155",
8+
"challenge": null,
9+
"version": "0.6.0",
10+
"name": "Unknown Name",
11+
"description": "Unknown Description",
12+
"author": "BalatroBench",
13+
"tags": []
14+
},
15+
"total_runs": 5,
16+
"completed_runs": 5,
17+
"won_runs": 0,
18+
"averaged_stats": {
19+
"avg_final_round": 3.4,
20+
"avg_ante_reached": 1.6,
21+
"avg_jokers_bought": 0,
22+
"avg_jokers_sold": 0,
23+
"avg_consumables_used": 0.6,
24+
"avg_rerolls": 0,
25+
"avg_money_spent": 12,
26+
"avg_successful_calls": 27,
27+
"avg_error_calls": 0.2,
28+
"avg_failed_calls": 1,
29+
"avg_total_input_tokens": 61645,
30+
"avg_total_output_tokens": 17184.4,
31+
"avg_total_reasoning_tokens": 0,
32+
"avg_total_tokens": 78829.4,
33+
"avg_total_response_time_ms": 73676.8
34+
},
35+
"stats": [
36+
{
37+
"run_won": false,
38+
"completed": true,
39+
"ante_reached": 1,
40+
"final_round": 2,
41+
"jokers_bought": [],
42+
"jokers_sold": [],
43+
"consumables_used": [
44+
"Jupiter"
45+
],
46+
"rerolls": 0,
47+
"money_spent": 9,
48+
"hands_played": {
49+
"High Card": 8
50+
},
51+
"successful_calls": 17,
52+
"invalid_responses": 1,
53+
"failed_calls": [
54+
"E011: Card index out of range"
55+
],
56+
"avg_input_tokens": 2139.9411764705883,
57+
"avg_output_tokens": 561.7058823529412,
58+
"avg_reasoning_tokens": 0.0,
59+
"avg_total_tokens": 2701.6470588235293,
60+
"avg_response_time_ms": 2656.0,
61+
"total_input_tokens": 36379,
62+
"total_output_tokens": 9549,
63+
"total_reasoning_tokens": 0,
64+
"total_tokens": 45928,
65+
"total_response_time_ms": 42496
66+
},
67+
{
68+
"run_won": false,
69+
"completed": true,
70+
"ante_reached": 3,
71+
"final_round": 8,
72+
"jokers_bought": [],
73+
"jokers_sold": [],
74+
"consumables_used": [
75+
"Jupiter"
76+
],
77+
"rerolls": 0,
78+
"money_spent": 27,
79+
"hands_played": {
80+
"High Card": 34
81+
},
82+
"successful_calls": 65,
83+
"invalid_responses": 0,
84+
"failed_calls": [
85+
"E011: Card index out of range",
86+
"E011: Card index out of range"
87+
],
88+
"avg_input_tokens": 2308.3846153846152,
89+
"avg_output_tokens": 666.9692307692308,
90+
"avg_reasoning_tokens": 0.0,
91+
"avg_total_tokens": 2975.353846153846,
92+
"avg_response_time_ms": 2972.234375,
93+
"total_input_tokens": 150045,
94+
"total_output_tokens": 43353,
95+
"total_reasoning_tokens": 0,
96+
"total_tokens": 193398,
97+
"total_response_time_ms": 190223
98+
},
99+
{
100+
"run_won": false,
101+
"completed": true,
102+
"ante_reached": 1,
103+
"final_round": 2,
104+
"jokers_bought": [],
105+
"jokers_sold": [],
106+
"consumables_used": [],
107+
"rerolls": 0,
108+
"money_spent": 9,
109+
"hands_played": {
110+
"High Card": 8
111+
},
112+
"successful_calls": 15,
113+
"invalid_responses": 0,
114+
"failed_calls": [
115+
"E011: Invalid number of cards"
116+
],
117+
"avg_input_tokens": 2282.8,
118+
"avg_output_tokens": 665.4,
119+
"avg_reasoning_tokens": 0.0,
120+
"avg_total_tokens": 2948.2,
121+
"avg_response_time_ms": 2354.0666666666666,
122+
"total_input_tokens": 34242,
123+
"total_output_tokens": 9981,
124+
"total_reasoning_tokens": 0,
125+
"total_tokens": 44223,
126+
"total_response_time_ms": 35311
127+
},
128+
{
129+
"run_won": false,
130+
"completed": true,
131+
"ante_reached": 2,
132+
"final_round": 3,
133+
"jokers_bought": [],
134+
"jokers_sold": [],
135+
"consumables_used": [
136+
"Jupiter"
137+
],
138+
"rerolls": 0,
139+
"money_spent": 9,
140+
"hands_played": {
141+
"High Card": 13
142+
},
143+
"successful_calls": 23,
144+
"invalid_responses": 0,
145+
"failed_calls": [],
146+
"avg_input_tokens": 2323.7391304347825,
147+
"avg_output_tokens": 638.5217391304348,
148+
"avg_reasoning_tokens": 0.0,
149+
"avg_total_tokens": 2962.2608695652175,
150+
"avg_response_time_ms": 2596.0833333333335,
151+
"total_input_tokens": 53446,
152+
"total_output_tokens": 14686,
153+
"total_reasoning_tokens": 0,
154+
"total_tokens": 68132,
155+
"total_response_time_ms": 62306
156+
},
157+
{
158+
"run_won": false,
159+
"completed": true,
160+
"ante_reached": 1,
161+
"final_round": 2,
162+
"jokers_bought": [],
163+
"jokers_sold": [],
164+
"consumables_used": [],
165+
"rerolls": 0,
166+
"money_spent": 6,
167+
"hands_played": {
168+
"High Card": 8
169+
},
170+
"successful_calls": 15,
171+
"invalid_responses": 0,
172+
"failed_calls": [
173+
"E010: Cannot skip Boss blind. Use select instead"
174+
],
175+
"avg_input_tokens": 2274.2,
176+
"avg_output_tokens": 556.8666666666667,
177+
"avg_reasoning_tokens": 0.0,
178+
"avg_total_tokens": 2831.0666666666666,
179+
"avg_response_time_ms": 2536.5333333333333,
180+
"total_input_tokens": 34113,
181+
"total_output_tokens": 8353,
182+
"total_reasoning_tokens": 0,
183+
"total_tokens": 42466,
184+
"total_response_time_ms": 38048
185+
}
186+
]
187+
}
Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
{
2+
"config": {
3+
"model": "cerebras/qwen-3-235b-a22b-instruct-2507",
4+
"strategy": "default",
5+
"deck": "Red Deck",
6+
"stake": 1,
7+
"seed": "OOOO155",
8+
"challenge": null,
9+
"version": "0.6.0",
10+
"name": "Unknown Name",
11+
"description": "Unknown Description",
12+
"author": "BalatroBench",
13+
"tags": []
14+
},
15+
"total_runs": 5,
16+
"completed_runs": 5,
17+
"won_runs": 0,
18+
"averaged_stats": {
19+
"avg_final_round": 2,
20+
"avg_ante_reached": 1,
21+
"avg_jokers_bought": 0,
22+
"avg_jokers_sold": 0,
23+
"avg_consumables_used": 0,
24+
"avg_rerolls": 0,
25+
"avg_money_spent": 7.8,
26+
"avg_successful_calls": 14.4,
27+
"avg_error_calls": 0,
28+
"avg_failed_calls": 0,
29+
"avg_total_input_tokens": 43841.8,
30+
"avg_total_output_tokens": 2736.8,
31+
"avg_total_reasoning_tokens": 0,
32+
"avg_total_tokens": 46578.6,
33+
"avg_total_response_time_ms": 33654.2
34+
},
35+
"stats": [
36+
{
37+
"run_won": false,
38+
"completed": true,
39+
"ante_reached": 1,
40+
"final_round": 2,
41+
"jokers_bought": [],
42+
"jokers_sold": [],
43+
"consumables_used": [],
44+
"rerolls": 0,
45+
"money_spent": 9,
46+
"hands_played": {
47+
"High Card": 8
48+
},
49+
"successful_calls": 14,
50+
"invalid_responses": 0,
51+
"failed_calls": [],
52+
"avg_input_tokens": 3193.785714285714,
53+
"avg_output_tokens": 183.71428571428572,
54+
"avg_reasoning_tokens": 0.0,
55+
"avg_total_tokens": 3377.5,
56+
"avg_response_time_ms": 2291.4,
57+
"total_input_tokens": 44713,
58+
"total_output_tokens": 2572,
59+
"total_reasoning_tokens": 0,
60+
"total_tokens": 47285,
61+
"total_response_time_ms": 34371
62+
},
63+
{
64+
"run_won": false,
65+
"completed": true,
66+
"ante_reached": 1,
67+
"final_round": 2,
68+
"jokers_bought": [],
69+
"jokers_sold": [],
70+
"consumables_used": [],
71+
"rerolls": 0,
72+
"money_spent": 9,
73+
"hands_played": {
74+
"High Card": 8
75+
},
76+
"successful_calls": 14,
77+
"invalid_responses": 0,
78+
"failed_calls": [],
79+
"avg_input_tokens": 2761.3571428571427,
80+
"avg_output_tokens": 166.85714285714286,
81+
"avg_reasoning_tokens": 0.0,
82+
"avg_total_tokens": 2928.214285714286,
83+
"avg_response_time_ms": 2338.4666666666667,
84+
"total_input_tokens": 38659,
85+
"total_output_tokens": 2336,
86+
"total_reasoning_tokens": 0,
87+
"total_tokens": 40995,
88+
"total_response_time_ms": 35077
89+
},
90+
{
91+
"run_won": false,
92+
"completed": true,
93+
"ante_reached": 1,
94+
"final_round": 2,
95+
"jokers_bought": [],
96+
"jokers_sold": [],
97+
"consumables_used": [],
98+
"rerolls": 0,
99+
"money_spent": 3,
100+
"hands_played": {
101+
"High Card": 5
102+
},
103+
"successful_calls": 14,
104+
"invalid_responses": 0,
105+
"failed_calls": [],
106+
"avg_input_tokens": 3210.1428571428573,
107+
"avg_output_tokens": 238.78571428571428,
108+
"avg_reasoning_tokens": 0.0,
109+
"avg_total_tokens": 3448.9285714285716,
110+
"avg_response_time_ms": 1987.2142857142858,
111+
"total_input_tokens": 44942,
112+
"total_output_tokens": 3343,
113+
"total_reasoning_tokens": 0,
114+
"total_tokens": 48285,
115+
"total_response_time_ms": 27821
116+
},
117+
{
118+
"run_won": false,
119+
"completed": true,
120+
"ante_reached": 1,
121+
"final_round": 2,
122+
"jokers_bought": [],
123+
"jokers_sold": [],
124+
"consumables_used": [],
125+
"rerolls": 0,
126+
"money_spent": 9,
127+
"hands_played": {
128+
"High Card": 8
129+
},
130+
"successful_calls": 14,
131+
"invalid_responses": 0,
132+
"failed_calls": [],
133+
"avg_input_tokens": 3006.1428571428573,
134+
"avg_output_tokens": 171.92857142857142,
135+
"avg_reasoning_tokens": 0.0,
136+
"avg_total_tokens": 3178.0714285714284,
137+
"avg_response_time_ms": 2253.3333333333335,
138+
"total_input_tokens": 42086,
139+
"total_output_tokens": 2407,
140+
"total_reasoning_tokens": 0,
141+
"total_tokens": 44493,
142+
"total_response_time_ms": 33800
143+
},
144+
{
145+
"run_won": false,
146+
"completed": true,
147+
"ante_reached": 1,
148+
"final_round": 2,
149+
"jokers_bought": [],
150+
"jokers_sold": [],
151+
"consumables_used": [],
152+
"rerolls": 0,
153+
"money_spent": 9,
154+
"hands_played": {
155+
"High Card": 8
156+
},
157+
"successful_calls": 16,
158+
"invalid_responses": 0,
159+
"failed_calls": [],
160+
"avg_input_tokens": 3050.5625,
161+
"avg_output_tokens": 189.125,
162+
"avg_reasoning_tokens": 0.0,
163+
"avg_total_tokens": 3239.6875,
164+
"avg_response_time_ms": 2480.133333333333,
165+
"total_input_tokens": 48809,
166+
"total_output_tokens": 3026,
167+
"total_reasoning_tokens": 0,
168+
"total_tokens": 51835,
169+
"total_response_time_ms": 37202
170+
}
171+
]
172+
}

0 commit comments

Comments
 (0)