@@ -13,6 +13,7 @@ PYTHON := python3
1313UV := uv
1414RUFF := ruff
1515TYPECHECK := basedpyright
16+ RUNS ?= 5
1617
1718help : # # Show this help message
1819 @echo " $( BLUE) BalatroLLM Development Makefile$( RESET) "
@@ -92,16 +93,16 @@ teardown: ## Stop LiteLLM server and Balatro processes
9293 @./balatro.sh --kill 2> /dev/null || true
9394 @echo " $( GREEN) ✓ Services stopped$( RESET) "
9495
95- balatrobench : # # Run benchmark for all models and generate analysis
96- @echo " $( YELLOW) Starting benchmark runs for all models...$( RESET) "
96+ balatrobench : # # Run benchmark for all models and generate analysis (RUNS=5)
97+ @echo " $( YELLOW) Starting benchmark runs for all models ( $( RUNS ) runs each) ...$( RESET) "
9798 @echo " $( YELLOW) Running cerebras/gpt-oss-120b...$( RESET) "
98- @balatrollm --runs-dir ./balatrobench --runs 3 --model cerebras/gpt-oss-120b || true
99+ @balatrollm --runs-dir ./balatrobench --runs $( RUNS ) --model cerebras/gpt-oss-120b || true
99100 @echo " $( YELLOW) Running cerebras/qwen-3-235b-a22b-thinking-2507...$( RESET) "
100- @balatrollm --runs-dir ./balatrobench --runs 3 --model cerebras/qwen-3-235b-a22b-thinking-2507 || true
101+ @balatrollm --runs-dir ./balatrobench --runs $( RUNS ) --model cerebras/qwen-3-235b-a22b-thinking-2507 || true
101102 @echo " $( YELLOW) Running cerebras/qwen-3-235b-a22b-instruct-2507...$( RESET) "
102- @balatrollm --runs-dir ./balatrobench --runs 3 --model cerebras/qwen-3-235b-a22b-instruct-2507 || true
103+ @balatrollm --runs-dir ./balatrobench --runs $( RUNS ) --model cerebras/qwen-3-235b-a22b-instruct-2507 || true
103104 @echo " $( YELLOW) Running groq/openai/gpt-oss-20b...$( RESET) "
104- @balatrollm --runs-dir ./balatrobench --runs 3 --model groq/openai/gpt-oss-20b || true
105+ @balatrollm --runs-dir ./balatrobench --runs $( RUNS ) --model groq/openai/gpt-oss-20b || true
105106 @echo " $( YELLOW) Generating benchmark analysis...$( RESET) "
106107 @balatrollm benchmark --runs-dir balatrobench/runs --output-dir balatrobench/benchmarks
107108 @echo " $( GREEN) ✓ Benchmark completed$( RESET) "
0 commit comments