55 < meta charset ="UTF-8 ">
66 < meta name ="viewport " content ="width=device-width, initial-scale=1.0 ">
77 < title > BalatroBench</ title >
8+ < meta name ="description " content ="Leaderboard benchmarking LLMs playing Balatro: rounds, tool-call reliability, cost, and speed. ">
9+ < link rel ="canonical " href ="https://coder.github.io/balatrobench/ ">
10+ < meta property ="og:type " content ="website ">
11+ < meta property ="og:title " content ="BalatroBench ">
12+ < meta property ="og:description " content ="Leaderboard benchmarking LLMs playing Balatro: rounds, tool-call reliability, cost, and speed. ">
13+ < meta property ="og:url " content ="https://coder.github.io/balatrobench/ ">
14+ < meta property ="og:image " content ="https://github.com/user-attachments/assets/33a52df0-a7f8-4784-a640-0212267ed199 ">
15+ < meta name ="twitter:card " content ="summary_large_image ">
16+ < meta name ="twitter:title " content ="BalatroBench ">
17+ < meta name ="twitter:description " content ="Leaderboard benchmarking LLMs playing Balatro: rounds, tool-call reliability, cost, and speed. ">
18+ < meta name ="twitter:image " content ="https://github.com/user-attachments/assets/33a52df0-a7f8-4784-a640-0212267ed199 ">
19+ < link rel ="preconnect " href ="https://balatrobench.b-cdn.net " crossorigin >
20+ < link rel ="preconnect " href ="https://cdn.jsdelivr.net " crossorigin >
21+ < link rel ="preconnect " href ="https://cdn.tailwindcss.com " crossorigin >
822 < script src ="https://cdn.tailwindcss.com "> </ script >
923 < script src ="https://cdn.jsdelivr.net/npm/chart.js "> </ script >
1024 < script src ="config.js "> </ script >
1428</ head >
1529
1630< body class ="bg-zinc-50 dark:bg-zinc-800 min-h-screen pb-8 ">
31+ < a href ="#main " class ="sr-only focus:not-sr-only focus:absolute focus:top-2 focus:left-2 bg-white dark:bg-zinc-700 text-zinc-800 dark:text-zinc-100 px-3 py-2 rounded "> Skip to main content</ a >
1732 < nav
1833 class ="w-full bg-white dark:bg-zinc-800 border-b border-zinc-200 dark:border-zinc-600 shadow-sm dark:shadow-md dark:ring-1 dark:ring-white/5 mb-8 ">
1934 < div class ="container mx-auto px-2 xl:px-4 ">
2742 </ div >
2843 </ div >
2944 </ nav >
30- < div class ="container mx-auto px-4 ">
45+ < div id =" main " class ="container mx-auto px-4 " role =" main ">
3146 <!-- Performance Bar Chart -->
3247 < div
3348 class ="bg-white dark:bg-zinc-800 rounded-lg shadow-lg dark:shadow-2xl dark:ring-1 dark:ring-white/5 mb-8 p-6 hidden lg:block ">
@@ -40,23 +55,24 @@ <h2 class="text-3xl font-semibold text-zinc-800 dark:text-zinc-100 mb-4 text-cen
4055 < div
4156 class ="overflow-x-auto bg-white dark:bg-zinc-800 rounded-lg shadow-lg dark:shadow-2xl dark:ring-1 dark:ring-white/5 ">
4257 < table id ="leaderboard " class ="w-full table-auto ">
58+ < caption class ="sr-only "> Model leaderboard with rounds, tool-call reliability, tokens, time, and cost</ caption >
4359 < thead class ="bg-zinc-100 dark:bg-zinc-700 ">
4460 < tr class ="border-b border-zinc-200 dark:border-zinc-600 ">
4561 <!-- Rank Section -->
46- < th
62+ < th scope =" col "
4763 class ="px-4 py-3 text-left text-sm font-semibold text-zinc-700 dark:text-zinc-300 border-r-2 border-zinc-300 dark:border-zinc-600 ">
4864 #</ th >
4965 <!-- Model Section -->
50- < th class ="px-4 py-3 text-center text-sm font-semibold text-zinc-700 dark:text-zinc-300 "> Model</ th >
51- < th
66+ < th scope =" col " class ="px-4 py-3 text-center text-sm font-semibold text-zinc-700 dark:text-zinc-300 "> Model</ th >
67+ < th scope =" col "
5268 class ="px-4 py-3 text-center text-sm font-semibold text-zinc-700 dark:text-zinc-300 hidden xl:table-cell ">
5369 Vendor</ th >
5470 <!-- Stats Section -->
55- < th
71+ < th scope =" col " aria-label =" Average final round "
5672 class ="px-4 py-3 text-center text-sm font-semibold text-zinc-700 dark:text-zinc-300 border-l-2 border-zinc-300 dark:border-zinc-600 ">
5773 Round</ th >
5874 <!-- Tool Calls Section -->
59- < th
75+ < th scope =" col " aria-label =" Valid tool calls executable in state "
6076 class ="px-4 py-3 text-center text-sm font-semibold text-zinc-700 dark:text-zinc-300 hidden sm:table-cell border-l-2 border-zinc-300 dark:border-zinc-600 ">
6177 < div class ="flex items-center justify-center space-x-1 ">
6278 < svg xmlns ="http://www.w3.org/2000/svg " viewBox ="0 0 16 16 " fill ="currentColor "
@@ -74,7 +90,7 @@ <h2 class="text-3xl font-semibold text-zinc-800 dark:text-zinc-100 mb-4 text-cen
7490 </ svg >
7591 </ div >
7692 </ th >
77- < th
93+ < th scope =" col " aria-label =" Valid tool calls not executable in state "
7894 class ="px-4 py-3 text-center text-sm font-semibold text-zinc-700 dark:text-zinc-300 hidden sm:table-cell ">
7995 < div class ="flex items-center justify-center space-x-1 ">
8096 < svg xmlns ="http://www.w3.org/2000/svg " viewBox ="0 0 16 16 " fill ="currentColor "
@@ -92,7 +108,7 @@ <h2 class="text-3xl font-semibold text-zinc-800 dark:text-zinc-100 mb-4 text-cen
92108 </ svg >
93109 </ div >
94110 </ th >
95- < th
111+ < th scope =" col " aria-label =" Responses without valid tool calls "
96112 class ="px-4 py-3 text-center text-sm font-semibold text-zinc-700 dark:text-zinc-300 hidden sm:table-cell ">
97113 < div class ="flex items-center justify-center space-x-1 ">
98114 < svg xmlns ="http://www.w3.org/2000/svg " viewBox ="0 0 16 16 " fill ="currentColor "
@@ -111,7 +127,7 @@ <h2 class="text-3xl font-semibold text-zinc-800 dark:text-zinc-100 mb-4 text-cen
111127 </ div >
112128 </ th >
113129 <!-- Tokens Section -->
114- < th
130+ < th scope =" col " aria-label =" Average input tokens "
115131 class ="px-4 py-3 text-center text-sm font-semibold text-zinc-700 dark:text-zinc-300 hidden lg:table-cell border-l-2 border-zinc-300 dark:border-zinc-600 ">
116132 < div class ="flex items-center justify-center space-x-1 ">
117133 < span > In</ span >
@@ -132,7 +148,7 @@ <h2 class="text-3xl font-semibold text-zinc-800 dark:text-zinc-100 mb-4 text-cen
132148 </ svg >
133149 </ div >
134150 </ th >
135- < th
151+ < th scope =" col " aria-label =" Average output tokens "
136152 class ="px-4 py-3 text-center text-sm font-semibold text-zinc-700 dark:text-zinc-300 hidden lg:table-cell ">
137153 < div class ="flex items-center justify-center space-x-1 ">
138154 < span > Out</ span >
@@ -154,7 +170,7 @@ <h2 class="text-3xl font-semibold text-zinc-800 dark:text-zinc-100 mb-4 text-cen
154170 </ div >
155171 </ th >
156172 <!-- Performance Section -->
157- < th
173+ < th scope =" col " aria-label =" Average time per tool call "
158174 class ="px-4 py-3 text-center text-sm font-semibold text-zinc-700 dark:text-zinc-300 hidden md:table-cell border-l-2 border-zinc-300 dark:border-zinc-600 ">
159175 < div class ="flex items-center justify-center space-x-1 ">
160176 < svg xmlns ="http://www.w3.org/2000/svg " viewBox ="0 0 16 16 " fill ="currentColor " class ="w-4 h-4 ">
@@ -173,7 +189,7 @@ <h2 class="text-3xl font-semibold text-zinc-800 dark:text-zinc-100 mb-4 text-cen
173189 < span class ="text-xs "> [s]</ span >
174190 </ div >
175191 </ th >
176- < th
192+ < th scope =" col " aria-label =" Average cost per tool call "
177193 class ="px-4 py-3 text-center text-sm font-semibold text-zinc-700 dark:text-zinc-300 hidden md:table-cell ">
178194 < div class ="flex items-center justify-center space-x-1 ">
179195 < svg xmlns ="http://www.w3.org/2000/svg " viewBox ="0 0 16 16 " fill ="currentColor " class ="w-4 h-4 ">
@@ -213,7 +229,7 @@ <h2 class="text-3xl font-semibold text-zinc-800 dark:text-zinc-100 mb-4 text-cen
213229 < div class ="container mx-auto px-4 ">
214230 < div class ="flex items-center justify-center space-x-2 text-zinc-400 dark:text-zinc-500 ">
215231 < span class ="text-sm "> Brought to you by</ span >
216- < a href ="https://coder.com/ " target ="_blank " class ="inline-block transition-colors hover:opacity-75 ">
232+ < a href ="https://coder.com/ " target ="_blank " rel =" noopener " class ="inline-block transition-colors hover:opacity-75 ">
217233 < ?xml version="1.0" encoding="UTF-8"?>
218234 < svg id ="Layer_2 " data-name ="Layer 2 " xmlns ="http://www.w3.org/2000/svg " viewBox ="0 0 1320.81 200 "
219235 viewBox ="0 0 1200 200 " class ="h-3 fill-zinc-400 dark:fill-zinc-500 ">
0 commit comments