Skip to content

Commit d83ac7b

Browse files
committed
feat: add time column to table and headers card
1 parent 9a43930 commit d83ac7b

2 files changed

Lines changed: 59 additions & 20 deletions

File tree

index.html

Lines changed: 55 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,18 @@ <h1 class="text-2xl sm:text-4xl font-bold text-white mb-4">
132132
</svg> (s)
133133
</div>
134134
</th>
135+
<th
136+
class="px-2 py-3 text-center text-xs sm:text-sm font-medium text-gray-300 w-32 sm:w-36 hidden xl:table-cell">
137+
<div class="flex items-center justify-center gap-1">Cost per 1K <svg class="w-4 h-4"
138+
xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16" fill="currentColor" className="size-4">
139+
<path fillRule="evenodd" class="w-4 h-4"
140+
d="M15 4.5A3.5 3.5 0 0 1 11.435 8c-.99-.019-2.093.132-2.7.913l-4.13 5.31a2.015 2.015 0 1 1-2.827-2.828l5.309-4.13c.78-.607.932-1.71.914-2.7L8 4.5a3.5 3.5 0 0 1 4.477-3.362c.325.094.39.497.15.736L10.6 3.902a.48.48 0 0 0-.033.653c.271.314.565.608.879.879a.48.48 0 0 0 .653-.033l2.027-2.027c.239-.24.642-.175.736.15.09.31.138.637.138.976ZM3.75 13a.75.75 0 1 1-1.5 0 .75.75 0 0 1 1.5 0Z"
141+
clipRule="evenodd" />
142+
<path
143+
d="M11.5 9.5c.313 0 .62-.029.917-.084l1.962 1.962a2.121 2.121 0 0 1-3 3l-2.81-2.81 1.35-1.734c.05-.064.158-.158.426-.233.278-.078.639-.11 1.062-.102l.093.001ZM5 4l1.446 1.445a2.256 2.256 0 0 1-.047.21c-.075.268-.169.377-.233.427l-.61.474L4 5H2.655a.25.25 0 0 1-.224-.139l-1.35-2.7a.25.25 0 0 1 .047-.289l.745-.745a.25.25 0 0 1 .289-.047l2.7 1.35A.25.25 0 0 1 5 2.654V4Z" />
144+
</svg> ($)
145+
</div>
146+
</th>
135147
</tr>
136148
</thead>
137149
<tbody id="leaderboard-body" class="divide-y divide-gray-700">
@@ -142,27 +154,51 @@ <h1 class="text-2xl sm:text-4xl font-bold text-white mb-4">
142154
</div>
143155

144156

145-
<!-- Methodology -->
157+
<!-- Leaderboard Columns Explained -->
146158
<div class="bg-gray-800 rounded-lg p-4 sm:p-6 lg:p-8 border border-gray-700">
147-
<h2 class="text-xl sm:text-2xl font-bold mb-4">Methodology</h2>
148-
<div class="grid sm:grid-cols-1 md:grid-cols-2 gap-4 sm:gap-6 text-gray-300">
149-
<div>
150-
<h3 class="font-semibold text-white mb-2 text-base sm:text-lg">Game Parameters</h3>
151-
<ul class="space-y-1 text-sm sm:text-base">
152-
<li>• Balatro v1.0.1n</li>
153-
<li>• 100 consistent seeds</li>
154-
<li>• Standard deck configuration</li>
155-
<li>• No modifications or cheats</li>
156-
</ul>
159+
<h2 class="text-xl sm:text-2xl font-bold mb-4">Leaderboard Columns Explained</h2>
160+
<div class="grid sm:grid-cols-1 lg:grid-cols-2 gap-4 sm:gap-6 text-gray-300">
161+
<div class="space-y-4">
162+
<div>
163+
<h3 class="font-semibold text-white mb-2 text-base sm:text-lg">Ranking & Model Info</h3>
164+
<div class="space-y-2 text-sm sm:text-base">
165+
<div><strong class="text-blue-400">Rank:</strong> Sorted by average rounds reached (highest first)</div>
166+
<div><strong class="text-blue-400">Model:</strong> OpenRouter model names with creator-suggested
167+
parameters for open-source models, defaults for closed-source</div>
168+
<div><strong class="text-blue-400">Provider:</strong> Model developer/organization</div>
169+
</div>
170+
</div>
171+
<div>
172+
<h3 class="font-semibold text-white mb-2 text-base sm:text-lg">Performance Metrics</h3>
173+
<div class="space-y-2 text-sm sm:text-base">
174+
<div><strong class="text-blue-400">Rounds:</strong> Average rounds reached across multiple games</div>
175+
<div><strong class="text-blue-400">Completed:</strong> Success rate for round completion (rounds stopped
176+
at 3 consecutive errors/failed calls)</div>
177+
</div>
178+
</div>
157179
</div>
158-
<div>
159-
<h3 class="font-semibold text-white mb-2 text-base sm:text-lg">Evaluation Criteria</h3>
160-
<ul class="space-y-1 text-sm sm:text-base">
161-
<li>• Average ante reached</li>
162-
<li>• Win rate across seeds</li>
163-
<li>• Token efficiency</li>
164-
<li>• Decision quality scoring</li>
165-
</ul>
180+
<div class="space-y-4">
181+
<div>
182+
<h3 class="font-semibold text-white mb-2 text-base sm:text-lg">Call Quality Breakdown</h3>
183+
<div class="space-y-2 text-sm sm:text-base">
184+
<div><strong class="text-green-400">Success:</strong> Valid tool calls that execute successfully in game
185+
state</div>
186+
<div><strong class="text-red-400">Error:</strong> Invalid responses (no tool call, JSON errors, plain text
187+
responses)</div>
188+
<div><strong class="text-yellow-400">Failed:</strong> Valid tool calls that can't execute (e.g.,
189+
discarding 6 cards when limit is 5)</div>
190+
</div>
191+
</div>
192+
<div>
193+
<h3 class="font-semibold text-white mb-2 text-base sm:text-lg">Efficiency Metrics</h3>
194+
<div class="space-y-2 text-sm sm:text-base">
195+
<div><strong class="text-blue-400">Input/Output Tokens:</strong> Token counts per tool call (including
196+
reasoning and tool call tokens)</div>
197+
<div><strong class="text-blue-400">Time per Call:</strong> Average LLM response generation time</div>
198+
<div><strong class="text-blue-400">Cost per 1K Calls:</strong> Pricing based on cheapest OpenRouter option
199+
(scaled to 1K for visual comparison)</div>
200+
</div>
201+
</div>
166202
</div>
167203
</div>
168204
</div>

js/app.js

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,9 +116,12 @@ function renderLeaderboard(entries, metadata) {
116116
<td class="px-2 py-3 hidden lg:table-cell text-center">
117117
<div class="font-medium text-white text-xs sm:text-sm">${(stats.avg_total_response_time_ms / totalToolCalls / 1000).toFixed(2)}<span class="font-normal text-gray-400"> ± ${(stdStats.std_avg_response_time_ms / 1000).toFixed(2)}</span></div>
118118
</td>
119+
<td class="px-2 py-3 hidden xl:table-cell text-center">
120+
<div class="font-medium text-white text-xs sm:text-sm">${(stats.avg_avg_cost_per_call * 1000).toFixed(2)}<span class="font-normal text-gray-400"> ± ${(stdStats.std_avg_cost_per_call * 1000).toFixed(2)}</span></div>
121+
</td>
119122
</tr>
120123
<tr id="stats-row-${index}" class="hidden">
121-
<td colspan="10" class="px-2 py-4 bg-gray-800">
124+
<td colspan="11" class="px-2 py-4 bg-gray-800">
122125
<!-- Details will be loaded dynamically -->
123126
</td>
124127
</tr>

0 commit comments

Comments
 (0)