feat: new separate command for benchmarking

S1M0N38 · S1M0N38 · commit b9a12841111b · 2025-10-25T01:02:50.000+02:00
diff --git a/src/balatrollm/balatrobench_cli.py b/src/balatrollm/balatrobench_cli.py
@@ -0,0 +1,94 @@
+"""BalatroLLM Benchmark CLI module."""
+
+import sys
+from pathlib import Path
+
+from . import __version__
+from .benchmark import BenchmarkAnalyzer
+
+
+def main() -> None:
+    """Main CLI entry point for balatrobench.
+
+    Parses command line arguments and runs benchmark analysis for a single version.
+    """
+    import argparse
+
+    parser = argparse.ArgumentParser(
+        description="Analyze BalatroLLM runs and generate benchmark leaderboards",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+
+    # Mutually exclusive group for analysis type
+    group = parser.add_mutually_exclusive_group(required=True)
+    group.add_argument(
+        "--models",
+        action="store_true",
+        help="Analyze by models (compare models within strategies)",
+    )
+    group.add_argument(
+        "--strategies",
+        action="store_true",
+        help="Analyze by strategies (compare strategies for each model)",
+    )
+
+    parser.add_argument(
+        "--input-dir",
+        type=Path,
+        default=None,
+        help=f"Input directory with run data (default: runs/v{__version__})",
+    )
+    parser.add_argument(
+        "--output-dir",
+        type=Path,
+        default=None,
+        help=f"Output directory for benchmark results (default: benchmarks/[models|strategies]/v{__version__})",
+    )
+    parser.add_argument(
+        "--avif",
+        action="store_true",
+        help="Convert PNG screenshots to AVIF format after analysis",
+    )
+
+    args = parser.parse_args()
+
+    # Set default input directory
+    if args.input_dir is None:
+        args.input_dir = Path(f"runs/v{__version__}").resolve()
+    else:
+        args.input_dir = args.input_dir.resolve()
+
+    # Determine output directory based on flag
+    if args.output_dir is None:
+        if args.models:
+            args.output_dir = Path(f"benchmarks/models/v{__version__}").resolve()
+        else:  # args.strategies
+            args.output_dir = Path(f"benchmarks/strategies/v{__version__}").resolve()
+    else:
+        args.output_dir = args.output_dir.resolve()
+
+    try:
+        analyzer = BenchmarkAnalyzer(args.input_dir, args.output_dir)
+
+        if args.models:
+            analyzer.analyze_version_by_models(args.input_dir)
+        else:  # args.strategies
+            analyzer.analyze_version_by_strategies(args.input_dir)
+
+        # Convert PNGs to AVIF if requested
+        if args.avif:
+            print("Converting PNG screenshots to AVIF format...")
+            analyzer.convert_pngs_to_avif(args.output_dir)
+
+        # Generate manifest.json in the base benchmark directory
+        manifest_base_dir = args.output_dir.parent
+        analyzer.generate_manifest(manifest_base_dir, __version__)
+
+        print(f"Benchmark analysis complete. Results saved to {args.output_dir}")
+        print(f"Manifest updated at {manifest_base_dir / 'manifest.json'}")
+    except FileNotFoundError as e:
+        print(f"Error: {e}")
+        sys.exit(1)
+    except Exception as e:
+        print(f"Benchmark analysis failed: {e}")
+        sys.exit(1)
diff --git a/src/balatrollm/balatrollm_cli.py b/src/balatrollm/balatrollm_cli.py
@@ -6,30 +6,20 @@
 import sys
 from pathlib import Path
 
-from .benchmark import BenchmarkAnalyzer
 from .bot import LLMBot, setup_logging
 from .config import Config
 
 
 def main() -> None:
     """Main CLI entry point for balatrollm.
 
-    Parses command line arguments and executes the appropriate command.
-    Supports both the main balatrollm command and benchmark subcommand.
+    Parses command line arguments and executes the game.
     """
     parser = _create_argument_parser()
     args = parser.parse_args()
 
     setup_logging()
-
-    match args.command:
-        case "benchmark":
-            cmd_benchmark(args)
-        case None:
-            asyncio.run(cmd_balatrollm(args))
-        case _:
-            print(f"Unknown command: {args.command}")
-            sys.exit(1)
+    asyncio.run(cmd_balatrollm(args))
 
 
 async def cmd_balatrollm(args) -> None:
@@ -178,52 +168,19 @@ async def _port_worker(
         await asyncio.sleep(1)
 
 
-def cmd_benchmark(args) -> None:
-    """Run the benchmark command.
-
-    Analyzes run data and generates comprehensive leaderboards.
-
-    Args:
-        args: Parsed command line arguments containing runs_dir, output_dir, and avif flag.
-
-    Raises:
-        FileNotFoundError: If the runs directory doesn't exist.
-        Exception: If benchmark analysis fails for any other reason.
-    """
-    try:
-        analyzer = BenchmarkAnalyzer(args.runs_dir, args.output_dir)
-        analyzer.analyze_all_runs()
-
-        # Convert PNGs to AVIF if requested
-        if args.avif:
-            print("Converting PNG screenshots to AVIF format...")
-            analyzer.convert_pngs_to_avif(args.output_dir)
-    except FileNotFoundError as e:
-        print(f"Error: {e}")
-        sys.exit(1)
-    except Exception as e:
-        print(f"Benchmark analysis failed: {e}")
-        sys.exit(1)
-
-
 def _create_argument_parser() -> argparse.ArgumentParser:
     """Create and configure argument parser.
 
-    Sets up command line argument parsing for both the main balatrollm
-    command and the benchmark subcommand.
+    Sets up command line argument parsing for the balatrollm command.
 
     Returns:
-        Configured ArgumentParser instance with all commands and options.
+        Configured ArgumentParser instance with all options.
     """
     parser = argparse.ArgumentParser(
         description="LLM-powered Balatro bot",
         formatter_class=argparse.RawDescriptionHelpFormatter,
     )
 
-    # Add subcommands
-    subparsers = parser.add_subparsers(dest="command", help="Available commands")
-
-    # Default command (play game) - no subcommand needed, just use main parser
     parser.add_argument(
         "-m",
         "--model",
@@ -300,28 +257,4 @@ def _create_argument_parser() -> argparse.ArgumentParser:
         help="Use BalatroBot's default storage paths for screenshots and game logs",
     )
 
-    # Benchmark subcommand
-    benchmark_parser = subparsers.add_parser(
-        "benchmark",
-        help="Analyze runs and generate leaderboards",
-        description="Analyze BalatroLLM runs and generate comprehensive leaderboards",
-    )
-    benchmark_parser.add_argument(
-        "--runs-dir",
-        type=lambda p: Path(p).resolve(),
-        default=Path("runs").resolve(),
-        help="Directory containing run data (default: runs)",
-    )
-    benchmark_parser.add_argument(
-        "--output-dir",
-        type=Path,
-        default=Path("benchmarks"),
-        help="Output directory for benchmark results (default: benchmarks)",
-    )
-    benchmark_parser.add_argument(
-        "--avif",
-        action="store_true",
-        help="Convert PNG screenshots to AVIF format after analysis",
-    )
-
     return parser