Skip to content

Commit e09cd19

Browse files
committed
feat: update benchmark data
1 parent cc62f4c commit e09cd19

5 files changed

Lines changed: 853 additions & 15 deletions

File tree

Lines changed: 178 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,178 @@
1-
version https://git-lfs.github.com/spec/v1
2-
oid sha256:d8f90b2e35197a701445e8dfab125a81377e518a302219c0373948da83fd9d50
3-
size 3154
1+
{
2+
"config": {
3+
"model": "cerebras/gpt-oss-120b",
4+
"strategy": "default",
5+
"deck": "Red Deck",
6+
"stake": 1,
7+
"seed": "OOOO155",
8+
"challenge": null,
9+
"version": "0.4.0",
10+
"name": "Unknown Name",
11+
"description": "Unknown Description",
12+
"author": "BalatroBench",
13+
"tags": []
14+
},
15+
"total_runs": 5,
16+
"completed_runs": 2,
17+
"won_runs": 0,
18+
"averaged_stats": {
19+
"avg_final_round": 2.4,
20+
"avg_ante_reached": 1.6,
21+
"avg_jokers_bought": 0,
22+
"avg_jokers_sold": 0,
23+
"avg_consumables_used": 0.6,
24+
"avg_rerolls": 0.2,
25+
"avg_money_spent": 10.8,
26+
"avg_successful_calls": 18,
27+
"avg_error_calls": 0,
28+
"avg_failed_calls": 0,
29+
"avg_total_input_tokens": 37237.4,
30+
"avg_total_output_tokens": 9526,
31+
"avg_total_reasoning_tokens": 0,
32+
"avg_total_tokens": 46763.4,
33+
"avg_total_response_time_ms": 48158
34+
},
35+
"stats": [
36+
{
37+
"run_won": false,
38+
"completed": true,
39+
"ante_reached": 2,
40+
"final_round": 3,
41+
"jokers_bought": [],
42+
"jokers_sold": [],
43+
"consumables_used": [
44+
"Jupiter"
45+
],
46+
"rerolls": 0,
47+
"money_spent": 9,
48+
"hands_played": {
49+
"High Card": 13
50+
},
51+
"successful_calls": 23,
52+
"error_calls": [],
53+
"failed_calls": [],
54+
"avg_input_tokens": 2084.608695652174,
55+
"avg_output_tokens": 545.9130434782609,
56+
"avg_reasoning_tokens": 0.0,
57+
"avg_total_tokens": 2630.521739130435,
58+
"avg_response_time_ms": 2723.75,
59+
"total_input_tokens": 47946,
60+
"total_output_tokens": 12556,
61+
"total_reasoning_tokens": 0,
62+
"total_tokens": 60502,
63+
"total_response_time_ms": 65370
64+
},
65+
{
66+
"run_won": false,
67+
"completed": false,
68+
"ante_reached": 1,
69+
"final_round": 1,
70+
"jokers_bought": [],
71+
"jokers_sold": [],
72+
"consumables_used": [],
73+
"rerolls": 0,
74+
"money_spent": 0,
75+
"hands_played": {
76+
"High Card": 1
77+
},
78+
"successful_calls": 3,
79+
"error_calls": [],
80+
"failed_calls": [],
81+
"avg_input_tokens": 1953.0,
82+
"avg_output_tokens": 268.3333333333333,
83+
"avg_reasoning_tokens": 0.0,
84+
"avg_total_tokens": 2221.3333333333335,
85+
"avg_response_time_ms": 3357.0,
86+
"total_input_tokens": 5859,
87+
"total_output_tokens": 805,
88+
"total_reasoning_tokens": 0,
89+
"total_tokens": 6664,
90+
"total_response_time_ms": 10071
91+
},
92+
{
93+
"run_won": false,
94+
"completed": false,
95+
"ante_reached": 2,
96+
"final_round": 4,
97+
"jokers_bought": [],
98+
"jokers_sold": [],
99+
"consumables_used": [
100+
"Jupiter"
101+
],
102+
"rerolls": 1,
103+
"money_spent": 30,
104+
"hands_played": {
105+
"High Card": 14
106+
},
107+
"successful_calls": 35,
108+
"error_calls": [],
109+
"failed_calls": [],
110+
"avg_input_tokens": 2089.457142857143,
111+
"avg_output_tokens": 559.1142857142858,
112+
"avg_reasoning_tokens": 0.0,
113+
"avg_total_tokens": 2648.5714285714284,
114+
"avg_response_time_ms": 2455.8,
115+
"total_input_tokens": 73131,
116+
"total_output_tokens": 19569,
117+
"total_reasoning_tokens": 0,
118+
"total_tokens": 92700,
119+
"total_response_time_ms": 85953
120+
},
121+
{
122+
"run_won": false,
123+
"completed": true,
124+
"ante_reached": 2,
125+
"final_round": 3,
126+
"jokers_bought": [],
127+
"jokers_sold": [],
128+
"consumables_used": [
129+
"Jupiter"
130+
],
131+
"rerolls": 0,
132+
"money_spent": 9,
133+
"hands_played": {
134+
"High Card": 14
135+
},
136+
"successful_calls": 24,
137+
"error_calls": [],
138+
"failed_calls": [],
139+
"avg_input_tokens": 2081.0833333333335,
140+
"avg_output_tokens": 518.7083333333334,
141+
"avg_reasoning_tokens": 0.0,
142+
"avg_total_tokens": 2599.7916666666665,
143+
"avg_response_time_ms": 2717.2,
144+
"total_input_tokens": 49946,
145+
"total_output_tokens": 12449,
146+
"total_reasoning_tokens": 0,
147+
"total_tokens": 62395,
148+
"total_response_time_ms": 67930
149+
},
150+
{
151+
"run_won": false,
152+
"completed": false,
153+
"ante_reached": 1,
154+
"final_round": 1,
155+
"jokers_bought": [],
156+
"jokers_sold": [],
157+
"consumables_used": [],
158+
"rerolls": 0,
159+
"money_spent": 6,
160+
"hands_played": {
161+
"High Card": 1
162+
},
163+
"successful_calls": 5,
164+
"error_calls": [],
165+
"failed_calls": [],
166+
"avg_input_tokens": 1861.0,
167+
"avg_output_tokens": 450.2,
168+
"avg_reasoning_tokens": 0.0,
169+
"avg_total_tokens": 2311.2,
170+
"avg_response_time_ms": 2293.2,
171+
"total_input_tokens": 9305,
172+
"total_output_tokens": 2251,
173+
"total_reasoning_tokens": 0,
174+
"total_tokens": 11556,
175+
"total_response_time_ms": 11466
176+
}
177+
]
178+
}
Lines changed: 172 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,172 @@
1-
version https://git-lfs.github.com/spec/v1
2-
oid sha256:bcf61c79a670f0b20f347c0195269606e4e6911aaca19a4ec1a6d1be2c3c004e
3-
size 3190
1+
{
2+
"config": {
3+
"model": "cerebras/qwen-3-235b-a22b-instruct-2507",
4+
"strategy": "default",
5+
"deck": "Red Deck",
6+
"stake": 1,
7+
"seed": "OOOO155",
8+
"challenge": null,
9+
"version": "0.4.0",
10+
"name": "Unknown Name",
11+
"description": "Unknown Description",
12+
"author": "BalatroBench",
13+
"tags": []
14+
},
15+
"total_runs": 5,
16+
"completed_runs": 5,
17+
"won_runs": 0,
18+
"averaged_stats": {
19+
"avg_final_round": 2,
20+
"avg_ante_reached": 1,
21+
"avg_jokers_bought": 0,
22+
"avg_jokers_sold": 0,
23+
"avg_consumables_used": 0,
24+
"avg_rerolls": 0,
25+
"avg_money_spent": 9,
26+
"avg_successful_calls": 14.2,
27+
"avg_error_calls": 0,
28+
"avg_failed_calls": 0,
29+
"avg_total_input_tokens": 37420.6,
30+
"avg_total_output_tokens": 2200,
31+
"avg_total_reasoning_tokens": 0,
32+
"avg_total_tokens": 39620.6,
33+
"avg_total_response_time_ms": 38168
34+
},
35+
"stats": [
36+
{
37+
"run_won": false,
38+
"completed": true,
39+
"ante_reached": 1,
40+
"final_round": 2,
41+
"jokers_bought": [],
42+
"jokers_sold": [],
43+
"consumables_used": [],
44+
"rerolls": 0,
45+
"money_spent": 9,
46+
"hands_played": {
47+
"High Card": 8
48+
},
49+
"successful_calls": 14,
50+
"error_calls": [],
51+
"failed_calls": [],
52+
"avg_input_tokens": 2670.1428571428573,
53+
"avg_output_tokens": 136.42857142857142,
54+
"avg_reasoning_tokens": 0.0,
55+
"avg_total_tokens": 2806.5714285714284,
56+
"avg_response_time_ms": 2436.8,
57+
"total_input_tokens": 37382,
58+
"total_output_tokens": 1910,
59+
"total_reasoning_tokens": 0,
60+
"total_tokens": 39292,
61+
"total_response_time_ms": 36552
62+
},
63+
{
64+
"run_won": false,
65+
"completed": true,
66+
"ante_reached": 1,
67+
"final_round": 2,
68+
"jokers_bought": [],
69+
"jokers_sold": [],
70+
"consumables_used": [],
71+
"rerolls": 0,
72+
"money_spent": 9,
73+
"hands_played": {
74+
"High Card": 9
75+
},
76+
"successful_calls": 15,
77+
"error_calls": [],
78+
"failed_calls": [],
79+
"avg_input_tokens": 2544.2,
80+
"avg_output_tokens": 153.26666666666668,
81+
"avg_reasoning_tokens": 0.0,
82+
"avg_total_tokens": 2697.4666666666667,
83+
"avg_response_time_ms": 2555.5625,
84+
"total_input_tokens": 38163,
85+
"total_output_tokens": 2299,
86+
"total_reasoning_tokens": 0,
87+
"total_tokens": 40462,
88+
"total_response_time_ms": 40889
89+
},
90+
{
91+
"run_won": false,
92+
"completed": true,
93+
"ante_reached": 1,
94+
"final_round": 2,
95+
"jokers_bought": [],
96+
"jokers_sold": [],
97+
"consumables_used": [],
98+
"rerolls": 0,
99+
"money_spent": 9,
100+
"hands_played": {
101+
"High Card": 8
102+
},
103+
"successful_calls": 14,
104+
"error_calls": [],
105+
"failed_calls": [],
106+
"avg_input_tokens": 2712.285714285714,
107+
"avg_output_tokens": 177.78571428571428,
108+
"avg_reasoning_tokens": 0.0,
109+
"avg_total_tokens": 2890.0714285714284,
110+
"avg_response_time_ms": 2582.6,
111+
"total_input_tokens": 37972,
112+
"total_output_tokens": 2489,
113+
"total_reasoning_tokens": 0,
114+
"total_tokens": 40461,
115+
"total_response_time_ms": 38739
116+
},
117+
{
118+
"run_won": false,
119+
"completed": true,
120+
"ante_reached": 1,
121+
"final_round": 2,
122+
"jokers_bought": [],
123+
"jokers_sold": [],
124+
"consumables_used": [],
125+
"rerolls": 0,
126+
"money_spent": 9,
127+
"hands_played": {
128+
"High Card": 8
129+
},
130+
"successful_calls": 14,
131+
"error_calls": [],
132+
"failed_calls": [],
133+
"avg_input_tokens": 2644.5,
134+
"avg_output_tokens": 149.0,
135+
"avg_reasoning_tokens": 0.0,
136+
"avg_total_tokens": 2793.5,
137+
"avg_response_time_ms": 2459.0,
138+
"total_input_tokens": 37023,
139+
"total_output_tokens": 2086,
140+
"total_reasoning_tokens": 0,
141+
"total_tokens": 39109,
142+
"total_response_time_ms": 36885
143+
},
144+
{
145+
"run_won": false,
146+
"completed": true,
147+
"ante_reached": 1,
148+
"final_round": 2,
149+
"jokers_bought": [],
150+
"jokers_sold": [],
151+
"consumables_used": [],
152+
"rerolls": 0,
153+
"money_spent": 9,
154+
"hands_played": {
155+
"High Card": 8
156+
},
157+
"successful_calls": 14,
158+
"error_calls": [],
159+
"failed_calls": [],
160+
"avg_input_tokens": 2611.6428571428573,
161+
"avg_output_tokens": 158.28571428571428,
162+
"avg_reasoning_tokens": 0.0,
163+
"avg_total_tokens": 2769.9285714285716,
164+
"avg_response_time_ms": 2518.3333333333335,
165+
"total_input_tokens": 36563,
166+
"total_output_tokens": 2216,
167+
"total_reasoning_tokens": 0,
168+
"total_tokens": 38779,
169+
"total_response_time_ms": 37775
170+
}
171+
]
172+
}

0 commit comments

Comments
 (0)