Skip to content

Commit b9a1284

Browse files
committed
feat: new separate command for benchmarking
1 parent e51b9ca commit b9a1284

2 files changed

Lines changed: 98 additions & 71 deletions

File tree

src/balatrollm/balatrobench_cli.py

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
"""BalatroLLM Benchmark CLI module."""
2+
3+
import sys
4+
from pathlib import Path
5+
6+
from . import __version__
7+
from .benchmark import BenchmarkAnalyzer
8+
9+
10+
def main() -> None:
11+
"""Main CLI entry point for balatrobench.
12+
13+
Parses command line arguments and runs benchmark analysis for a single version.
14+
"""
15+
import argparse
16+
17+
parser = argparse.ArgumentParser(
18+
description="Analyze BalatroLLM runs and generate benchmark leaderboards",
19+
formatter_class=argparse.RawDescriptionHelpFormatter,
20+
)
21+
22+
# Mutually exclusive group for analysis type
23+
group = parser.add_mutually_exclusive_group(required=True)
24+
group.add_argument(
25+
"--models",
26+
action="store_true",
27+
help="Analyze by models (compare models within strategies)",
28+
)
29+
group.add_argument(
30+
"--strategies",
31+
action="store_true",
32+
help="Analyze by strategies (compare strategies for each model)",
33+
)
34+
35+
parser.add_argument(
36+
"--input-dir",
37+
type=Path,
38+
default=None,
39+
help=f"Input directory with run data (default: runs/v{__version__})",
40+
)
41+
parser.add_argument(
42+
"--output-dir",
43+
type=Path,
44+
default=None,
45+
help=f"Output directory for benchmark results (default: benchmarks/[models|strategies]/v{__version__})",
46+
)
47+
parser.add_argument(
48+
"--avif",
49+
action="store_true",
50+
help="Convert PNG screenshots to AVIF format after analysis",
51+
)
52+
53+
args = parser.parse_args()
54+
55+
# Set default input directory
56+
if args.input_dir is None:
57+
args.input_dir = Path(f"runs/v{__version__}").resolve()
58+
else:
59+
args.input_dir = args.input_dir.resolve()
60+
61+
# Determine output directory based on flag
62+
if args.output_dir is None:
63+
if args.models:
64+
args.output_dir = Path(f"benchmarks/models/v{__version__}").resolve()
65+
else: # args.strategies
66+
args.output_dir = Path(f"benchmarks/strategies/v{__version__}").resolve()
67+
else:
68+
args.output_dir = args.output_dir.resolve()
69+
70+
try:
71+
analyzer = BenchmarkAnalyzer(args.input_dir, args.output_dir)
72+
73+
if args.models:
74+
analyzer.analyze_version_by_models(args.input_dir)
75+
else: # args.strategies
76+
analyzer.analyze_version_by_strategies(args.input_dir)
77+
78+
# Convert PNGs to AVIF if requested
79+
if args.avif:
80+
print("Converting PNG screenshots to AVIF format...")
81+
analyzer.convert_pngs_to_avif(args.output_dir)
82+
83+
# Generate manifest.json in the base benchmark directory
84+
manifest_base_dir = args.output_dir.parent
85+
analyzer.generate_manifest(manifest_base_dir, __version__)
86+
87+
print(f"Benchmark analysis complete. Results saved to {args.output_dir}")
88+
print(f"Manifest updated at {manifest_base_dir / 'manifest.json'}")
89+
except FileNotFoundError as e:
90+
print(f"Error: {e}")
91+
sys.exit(1)
92+
except Exception as e:
93+
print(f"Benchmark analysis failed: {e}")
94+
sys.exit(1)
Lines changed: 4 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -6,30 +6,20 @@
66
import sys
77
from pathlib import Path
88

9-
from .benchmark import BenchmarkAnalyzer
109
from .bot import LLMBot, setup_logging
1110
from .config import Config
1211

1312

1413
def main() -> None:
1514
"""Main CLI entry point for balatrollm.
1615
17-
Parses command line arguments and executes the appropriate command.
18-
Supports both the main balatrollm command and benchmark subcommand.
16+
Parses command line arguments and executes the game.
1917
"""
2018
parser = _create_argument_parser()
2119
args = parser.parse_args()
2220

2321
setup_logging()
24-
25-
match args.command:
26-
case "benchmark":
27-
cmd_benchmark(args)
28-
case None:
29-
asyncio.run(cmd_balatrollm(args))
30-
case _:
31-
print(f"Unknown command: {args.command}")
32-
sys.exit(1)
22+
asyncio.run(cmd_balatrollm(args))
3323

3424

3525
async def cmd_balatrollm(args) -> None:
@@ -178,52 +168,19 @@ async def _port_worker(
178168
await asyncio.sleep(1)
179169

180170

181-
def cmd_benchmark(args) -> None:
182-
"""Run the benchmark command.
183-
184-
Analyzes run data and generates comprehensive leaderboards.
185-
186-
Args:
187-
args: Parsed command line arguments containing runs_dir, output_dir, and avif flag.
188-
189-
Raises:
190-
FileNotFoundError: If the runs directory doesn't exist.
191-
Exception: If benchmark analysis fails for any other reason.
192-
"""
193-
try:
194-
analyzer = BenchmarkAnalyzer(args.runs_dir, args.output_dir)
195-
analyzer.analyze_all_runs()
196-
197-
# Convert PNGs to AVIF if requested
198-
if args.avif:
199-
print("Converting PNG screenshots to AVIF format...")
200-
analyzer.convert_pngs_to_avif(args.output_dir)
201-
except FileNotFoundError as e:
202-
print(f"Error: {e}")
203-
sys.exit(1)
204-
except Exception as e:
205-
print(f"Benchmark analysis failed: {e}")
206-
sys.exit(1)
207-
208-
209171
def _create_argument_parser() -> argparse.ArgumentParser:
210172
"""Create and configure argument parser.
211173
212-
Sets up command line argument parsing for both the main balatrollm
213-
command and the benchmark subcommand.
174+
Sets up command line argument parsing for the balatrollm command.
214175
215176
Returns:
216-
Configured ArgumentParser instance with all commands and options.
177+
Configured ArgumentParser instance with all options.
217178
"""
218179
parser = argparse.ArgumentParser(
219180
description="LLM-powered Balatro bot",
220181
formatter_class=argparse.RawDescriptionHelpFormatter,
221182
)
222183

223-
# Add subcommands
224-
subparsers = parser.add_subparsers(dest="command", help="Available commands")
225-
226-
# Default command (play game) - no subcommand needed, just use main parser
227184
parser.add_argument(
228185
"-m",
229186
"--model",
@@ -300,28 +257,4 @@ def _create_argument_parser() -> argparse.ArgumentParser:
300257
help="Use BalatroBot's default storage paths for screenshots and game logs",
301258
)
302259

303-
# Benchmark subcommand
304-
benchmark_parser = subparsers.add_parser(
305-
"benchmark",
306-
help="Analyze runs and generate leaderboards",
307-
description="Analyze BalatroLLM runs and generate comprehensive leaderboards",
308-
)
309-
benchmark_parser.add_argument(
310-
"--runs-dir",
311-
type=lambda p: Path(p).resolve(),
312-
default=Path("runs").resolve(),
313-
help="Directory containing run data (default: runs)",
314-
)
315-
benchmark_parser.add_argument(
316-
"--output-dir",
317-
type=Path,
318-
default=Path("benchmarks"),
319-
help="Output directory for benchmark results (default: benchmarks)",
320-
)
321-
benchmark_parser.add_argument(
322-
"--avif",
323-
action="store_true",
324-
help="Convert PNG screenshots to AVIF format after analysis",
325-
)
326-
327260
return parser

0 commit comments

Comments
 (0)