Skip to content

Commit fbb8007

Browse files
S1M0N38claude
andcommitted
feat: add v0.4.0 data structure and initial content
- Introduce new benchmark and community data organization - Add v0.4.0 directory structure for enhanced data management - Prepare platform for next generation benchmark results 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 393ad06 commit fbb8007

9 files changed

Lines changed: 1111 additions & 0 deletions
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
{
2+
"config": {
3+
"model": "cerebras/gpt-oss-120b",
4+
"strategy": "default",
5+
"deck": "Red Deck",
6+
"stake": 1,
7+
"seed": "OOOO155",
8+
"challenge": null,
9+
"version": "0.4.0",
10+
"name": "Unknown Name",
11+
"description": "Unknown Description",
12+
"author": "BalatroBench",
13+
"tags": []
14+
},
15+
"total_runs": 3,
16+
"completed_runs": 0,
17+
"won_runs": 0,
18+
"averaged_stats": {
19+
"avg_final_round": 2.33,
20+
"avg_ante_reached": 1.33,
21+
"avg_jokers_bought": 0,
22+
"avg_jokers_sold": 0.33,
23+
"avg_consumables_used": 0.67,
24+
"avg_rerolls": 0,
25+
"avg_money_spent": 13.33,
26+
"avg_successful_calls": 16.33,
27+
"avg_error_calls": 0,
28+
"avg_failed_calls": 0,
29+
"avg_total_input_tokens": 33409.33,
30+
"avg_total_output_tokens": 9150.33,
31+
"avg_total_reasoning_tokens": 0,
32+
"avg_total_tokens": 42559.67,
33+
"avg_total_response_time_ms": 37359.33
34+
},
35+
"stats": [
36+
{
37+
"run_won": false,
38+
"completed": false,
39+
"ante_reached": 1,
40+
"final_round": 2,
41+
"jokers_bought": [],
42+
"jokers_sold": [],
43+
"consumables_used": [
44+
"Jupiter"
45+
],
46+
"rerolls": 0,
47+
"money_spent": 9,
48+
"hands_played": {
49+
"High Card": 7
50+
},
51+
"successful_calls": 15,
52+
"error_calls": [],
53+
"failed_calls": [],
54+
"avg_input_tokens": 2057.4666666666667,
55+
"avg_output_tokens": 551.0666666666667,
56+
"avg_reasoning_tokens": 0.0,
57+
"avg_total_tokens": 2608.5333333333333,
58+
"avg_response_time_ms": 2279.4,
59+
"total_input_tokens": 30862,
60+
"total_output_tokens": 8266,
61+
"total_reasoning_tokens": 0,
62+
"total_tokens": 39128,
63+
"total_response_time_ms": 34191
64+
},
65+
{
66+
"run_won": false,
67+
"completed": false,
68+
"ante_reached": 1,
69+
"final_round": 1,
70+
"jokers_bought": [],
71+
"jokers_sold": [],
72+
"consumables_used": [],
73+
"rerolls": 0,
74+
"money_spent": 0,
75+
"hands_played": {},
76+
"successful_calls": 2,
77+
"error_calls": [],
78+
"failed_calls": [],
79+
"avg_input_tokens": 2030.0,
80+
"avg_output_tokens": 504.5,
81+
"avg_reasoning_tokens": 0.0,
82+
"avg_total_tokens": 2534.5,
83+
"avg_response_time_ms": 1760.0,
84+
"total_input_tokens": 4060,
85+
"total_output_tokens": 1009,
86+
"total_reasoning_tokens": 0,
87+
"total_tokens": 5069,
88+
"total_response_time_ms": 3520
89+
},
90+
{
91+
"run_won": false,
92+
"completed": false,
93+
"ante_reached": 2,
94+
"final_round": 4,
95+
"jokers_bought": [],
96+
"jokers_sold": [
97+
"Burglar"
98+
],
99+
"consumables_used": [
100+
"Jupiter"
101+
],
102+
"rerolls": 0,
103+
"money_spent": 31,
104+
"hands_played": {
105+
"High Card": 11
106+
},
107+
"successful_calls": 32,
108+
"error_calls": [],
109+
"failed_calls": [],
110+
"avg_input_tokens": 2040.8125,
111+
"avg_output_tokens": 568.0,
112+
"avg_reasoning_tokens": 0.0,
113+
"avg_total_tokens": 2608.8125,
114+
"avg_response_time_ms": 2323.96875,
115+
"total_input_tokens": 65306,
116+
"total_output_tokens": 18176,
117+
"total_reasoning_tokens": 0,
118+
"total_tokens": 83482,
119+
"total_response_time_ms": 74367
120+
}
121+
]
122+
}
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
{
2+
"config": {
3+
"model": "cerebras/qwen-3-235b-a22b-instruct-2507",
4+
"strategy": "default",
5+
"deck": "Red Deck",
6+
"stake": 1,
7+
"seed": "OOOO155",
8+
"challenge": null,
9+
"version": "0.4.0",
10+
"name": "Unknown Name",
11+
"description": "Unknown Description",
12+
"author": "BalatroBench",
13+
"tags": []
14+
},
15+
"total_runs": 3,
16+
"completed_runs": 2,
17+
"won_runs": 0,
18+
"averaged_stats": {
19+
"avg_final_round": 2,
20+
"avg_ante_reached": 1,
21+
"avg_jokers_bought": 0,
22+
"avg_jokers_sold": 0,
23+
"avg_consumables_used": 0,
24+
"avg_rerolls": 0,
25+
"avg_money_spent": 5,
26+
"avg_successful_calls": 13.33,
27+
"avg_error_calls": 0,
28+
"avg_failed_calls": 0,
29+
"avg_total_input_tokens": 35451,
30+
"avg_total_output_tokens": 2233.67,
31+
"avg_total_reasoning_tokens": 0,
32+
"avg_total_tokens": 37684.67,
33+
"avg_total_response_time_ms": 29097.67
34+
},
35+
"stats": [
36+
{
37+
"run_won": false,
38+
"completed": true,
39+
"ante_reached": 1,
40+
"final_round": 2,
41+
"jokers_bought": [],
42+
"jokers_sold": [],
43+
"consumables_used": [],
44+
"rerolls": 0,
45+
"money_spent": 3,
46+
"hands_played": {
47+
"High Card": 5
48+
},
49+
"successful_calls": 11,
50+
"error_calls": [],
51+
"failed_calls": [],
52+
"avg_input_tokens": 2674.818181818182,
53+
"avg_output_tokens": 159.0,
54+
"avg_reasoning_tokens": 0.0,
55+
"avg_total_tokens": 2833.818181818182,
56+
"avg_response_time_ms": 2641.3636363636365,
57+
"total_input_tokens": 29423,
58+
"total_output_tokens": 1749,
59+
"total_reasoning_tokens": 0,
60+
"total_tokens": 31172,
61+
"total_response_time_ms": 29055
62+
},
63+
{
64+
"run_won": false,
65+
"completed": true,
66+
"ante_reached": 1,
67+
"final_round": 2,
68+
"jokers_bought": [],
69+
"jokers_sold": [],
70+
"consumables_used": [],
71+
"rerolls": 0,
72+
"money_spent": 3,
73+
"hands_played": {
74+
"High Card": 7
75+
},
76+
"successful_calls": 18,
77+
"error_calls": [],
78+
"failed_calls": [],
79+
"avg_input_tokens": 2711.222222222222,
80+
"avg_output_tokens": 185.55555555555554,
81+
"avg_reasoning_tokens": 0.0,
82+
"avg_total_tokens": 2896.777777777778,
83+
"avg_response_time_ms": 2104.0588235294117,
84+
"total_input_tokens": 48802,
85+
"total_output_tokens": 3340,
86+
"total_reasoning_tokens": 0,
87+
"total_tokens": 52142,
88+
"total_response_time_ms": 35769
89+
},
90+
{
91+
"run_won": false,
92+
"completed": false,
93+
"ante_reached": 1,
94+
"final_round": 2,
95+
"jokers_bought": [],
96+
"jokers_sold": [],
97+
"consumables_used": [],
98+
"rerolls": 0,
99+
"money_spent": 9,
100+
"hands_played": {
101+
"High Card": 4
102+
},
103+
"successful_calls": 11,
104+
"error_calls": [],
105+
"failed_calls": [],
106+
"avg_input_tokens": 2557.090909090909,
107+
"avg_output_tokens": 146.54545454545453,
108+
"avg_reasoning_tokens": 0.0,
109+
"avg_total_tokens": 2703.6363636363635,
110+
"avg_response_time_ms": 2042.6363636363637,
111+
"total_input_tokens": 28128,
112+
"total_output_tokens": 1612,
113+
"total_reasoning_tokens": 0,
114+
"total_tokens": 29740,
115+
"total_response_time_ms": 22469
116+
}
117+
]
118+
}
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
{
2+
"config": {
3+
"model": "cerebras/qwen-3-235b-a22b-thinking-2507",
4+
"strategy": "default",
5+
"deck": "Red Deck",
6+
"stake": 1,
7+
"seed": "OOOO155",
8+
"challenge": null,
9+
"version": "0.4.0",
10+
"name": "Unknown Name",
11+
"description": "Unknown Description",
12+
"author": "BalatroBench",
13+
"tags": []
14+
},
15+
"total_runs": 3,
16+
"completed_runs": 2,
17+
"won_runs": 0,
18+
"averaged_stats": {
19+
"avg_final_round": 1.67,
20+
"avg_ante_reached": 1,
21+
"avg_jokers_bought": 0,
22+
"avg_jokers_sold": 0,
23+
"avg_consumables_used": 0.67,
24+
"avg_rerolls": 0,
25+
"avg_money_spent": 6,
26+
"avg_successful_calls": 11,
27+
"avg_error_calls": 0,
28+
"avg_failed_calls": 0,
29+
"avg_total_input_tokens": 27529.33,
30+
"avg_total_output_tokens": 23245.67,
31+
"avg_total_reasoning_tokens": 0,
32+
"avg_total_tokens": 50775,
33+
"avg_total_response_time_ms": 28993
34+
},
35+
"stats": [
36+
{
37+
"run_won": false,
38+
"completed": true,
39+
"ante_reached": 1,
40+
"final_round": 2,
41+
"jokers_bought": [],
42+
"jokers_sold": [],
43+
"consumables_used": [
44+
"Jupiter"
45+
],
46+
"rerolls": 0,
47+
"money_spent": 9,
48+
"hands_played": {
49+
"High Card": 8
50+
},
51+
"successful_calls": 15,
52+
"error_calls": [],
53+
"failed_calls": [],
54+
"avg_input_tokens": 2551.8,
55+
"avg_output_tokens": 1917.0666666666666,
56+
"avg_reasoning_tokens": 0.0,
57+
"avg_total_tokens": 4468.866666666667,
58+
"avg_response_time_ms": 2319.375,
59+
"total_input_tokens": 38277,
60+
"total_output_tokens": 28756,
61+
"total_reasoning_tokens": 0,
62+
"total_tokens": 67033,
63+
"total_response_time_ms": 37110
64+
},
65+
{
66+
"run_won": false,
67+
"completed": false,
68+
"ante_reached": 1,
69+
"final_round": 1,
70+
"jokers_bought": [],
71+
"jokers_sold": [],
72+
"consumables_used": [],
73+
"rerolls": 0,
74+
"money_spent": 0,
75+
"hands_played": {
76+
"High Card": 1
77+
},
78+
"successful_calls": 3,
79+
"error_calls": [],
80+
"failed_calls": [],
81+
"avg_input_tokens": 2412.3333333333335,
82+
"avg_output_tokens": 2757.6666666666665,
83+
"avg_reasoning_tokens": 0.0,
84+
"avg_total_tokens": 5170.0,
85+
"avg_response_time_ms": 3312.3333333333335,
86+
"total_input_tokens": 7237,
87+
"total_output_tokens": 8273,
88+
"total_reasoning_tokens": 0,
89+
"total_tokens": 15510,
90+
"total_response_time_ms": 9937
91+
},
92+
{
93+
"run_won": false,
94+
"completed": true,
95+
"ante_reached": 1,
96+
"final_round": 2,
97+
"jokers_bought": [],
98+
"jokers_sold": [],
99+
"consumables_used": [
100+
"Jupiter"
101+
],
102+
"rerolls": 0,
103+
"money_spent": 9,
104+
"hands_played": {
105+
"High Card": 8
106+
},
107+
"successful_calls": 15,
108+
"error_calls": [],
109+
"failed_calls": [],
110+
"avg_input_tokens": 2471.6,
111+
"avg_output_tokens": 2180.5333333333333,
112+
"avg_reasoning_tokens": 0.0,
113+
"avg_total_tokens": 4652.133333333333,
114+
"avg_response_time_ms": 2495.75,
115+
"total_input_tokens": 37074,
116+
"total_output_tokens": 32708,
117+
"total_reasoning_tokens": 0,
118+
"total_tokens": 69782,
119+
"total_response_time_ms": 39932
120+
}
121+
]
122+
}

0 commit comments

Comments
 (0)