Skip to content

Commit be5e381

Browse files
committed
feat: report averaged stats in leaderboard
1 parent dcf0c56 commit be5e381

1 file changed

Lines changed: 12 additions & 14 deletions

File tree

src/balatrollm/benchmark.py

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -39,15 +39,15 @@ class AveragedStats:
3939
avg_final_round: float
4040
avg_ante_reached: float
4141

42-
avg_jokers_bought: float # count the elements in the lists
43-
avg_jokers_sold: float # count the elements in the lists
44-
avg_consumables_used: float # count the elements in the lists
42+
avg_jokers_bought: float
43+
avg_jokers_sold: float
44+
avg_consumables_used: float
4545
avg_rerolls: float
4646
avg_money_spent: float
4747

4848
avg_successful_calls: float
49-
avg_error_calls: float # count the elements in the lists
50-
avg_failed_calls: float # count the elements in the lists
49+
avg_error_calls: float
50+
avg_failed_calls: float
5151

5252
avg_total_input_tokens: float
5353
avg_total_output_tokens: float
@@ -361,21 +361,19 @@ def generate_leaderboard(self, output_dir: Path = Path("benchmarks")) -> None:
361361
leaderboard_entries = []
362362
for rank, data in enumerate(sorted_data, 1):
363363
# Convert config dataclass to dict for JSON serialization
364+
averaged_stats_dict = asdict(data.averaged_stats)
365+
# Round all float values in averaged_stats
366+
for key, value in averaged_stats_dict.items():
367+
if isinstance(value, float):
368+
averaged_stats_dict[key] = round(value, 2)
369+
364370
entry = {
365371
"rank": rank,
366372
"config": asdict(data.config),
367373
"total_runs": data.total_runs,
368374
"completed_runs": data.completed_runs,
369375
"won_runs": data.won_runs,
370-
"avg_final_round": round(
371-
data.averaged_stats.avg_final_round, 2
372-
),
373-
"avg_ante_reached": round(
374-
data.averaged_stats.avg_ante_reached, 2
375-
),
376-
"avg_total_tokens": round(
377-
data.averaged_stats.avg_total_tokens, 2
378-
),
376+
"averaged_stats": averaged_stats_dict,
379377
}
380378
leaderboard_entries.append(entry)
381379

0 commit comments

Comments
 (0)