Skip to content

Commit f77ce8c

Browse files
committed
feat: add run viewer and version selector
1 parent 37680f3 commit f77ce8c

2 files changed

Lines changed: 263 additions & 6 deletions

File tree

index.html

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,20 @@ <h2 class="text-3xl font-semibold text-zinc-800 dark:text-zinc-100 mb-4 text-cen
197197
</tbody>
198198
</table>
199199
</div>
200+
201+
<!-- Version selector: manual fixed list -->
202+
<div class="flex justify-center mt-8">
203+
<div
204+
class="inline-flex items-center space-x-3 bg-white dark:bg-zinc-800 rounded-lg shadow-lg dark:shadow-2xl dark:ring-1 dark:ring-white/5 px-4 py-2">
205+
<select id="version-select"
206+
class="text-sm bg-zinc-100 dark:bg-zinc-700 text-zinc-800 dark:text-zinc-100 rounded-md border border-zinc-300 dark:border-zinc-600 px-3 py-2 focus:outline-none focus:ring-2 focus:ring-blue-500">
207+
<option value="v0.10.1">v0.10.1</option>
208+
<option value="v0.10.0" selected>v0.10.0</option>
209+
<option value="v0.9.0">v0.9.0</option>
210+
<option value="v0.8.2">v0.8.2</option>
211+
</select>
212+
</div>
213+
</div>
200214
</div>
201215

202216
<footer class="mt-12 py-6">

script.js

Lines changed: 249 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,12 @@ function getCurrentTheme() {
3232
}
3333
}
3434

35+
// Global state for main leaderboard chart
36+
let performanceChart = null;
37+
const DEFAULT_BENCHMARK_VERSION = 'v0.10.0';
38+
3539
// Load details for a specific model
36-
async function loadDetails(vendor, model, basePath = 'data/benchmarks/v0.8.1/default') {
40+
async function loadDetails(vendor, model, basePath = 'data/benchmarks/v0.10.0/default') {
3741
try {
3842
const response = await fetch(`${basePath}/${vendor}/${model}.json`);
3943
const data = await response.json();
@@ -162,7 +166,10 @@ function createPerformanceBarChart(entries) {
162166
const maxWithError = Math.max(...avgRounds.map((avg, i) => avg + stdDevs[i]));
163167
const yAxisMax = Math.ceil(maxWithError + 0.5); // Add padding
164168

165-
new Chart(ctx, {
169+
if (performanceChart) {
170+
performanceChart.destroy();
171+
}
172+
performanceChart = new Chart(ctx, {
166173
type: 'bar',
167174
data: {
168175
labels: models,
@@ -337,7 +344,7 @@ function createProviderPieChart(data, canvasId) {
337344
}
338345

339346
// Create inline detail row after clicked row
340-
function createDetailRow(stats, modelName, data) {
347+
function createDetailRow(stats, modelName, data, vendor, model, basePath) {
341348
const detailRow = document.createElement('tr');
342349
detailRow.className = 'detail-row bg-zinc-50 dark:bg-zinc-800';
343350

@@ -626,17 +633,55 @@ function createDetailRow(stats, modelName, data) {
626633
createProviderPieChart(data, pieChartCanvasId);
627634
}, 0);
628635

636+
// Make each per-run row clickable to open Run Viewer (if runs mapping exists)
637+
const perRunTable = detailRow.querySelector('table.table-auto');
638+
const tbody = perRunTable ? perRunTable.querySelector('tbody') : null;
639+
if (tbody) {
640+
const rows = Array.from(tbody.querySelectorAll('tr'));
641+
const runs = Array.isArray(data.runs) ? data.runs : null;
642+
if (runs && runs.length > 0 && rows.length > 0) {
643+
const count = Math.min(runs.length, rows.length);
644+
for (let i = 0; i < count; i++) {
645+
const tr = rows[i];
646+
tr.classList.add('cursor-pointer');
647+
tr.title = 'Open run viewer';
648+
tr.setAttribute('role', 'button');
649+
tr.addEventListener('click', async (e) => {
650+
e.stopPropagation();
651+
const runId = runs[i];
652+
if (!runId) return;
653+
// Simple availability check before opening the viewer
654+
const reqId = '00001';
655+
const probeUrl =
656+
`${basePath}/${vendor}/${model}/${runId}/request-${reqId}/tool_call.json`;
657+
const exists = await fetchJsonSafe(probeUrl);
658+
if (!exists) return; // Data missing: do not open the card
659+
660+
openRunViewer({
661+
basePath,
662+
vendor,
663+
model,
664+
runId,
665+
startIndex: 1
666+
});
667+
});
668+
}
669+
}
670+
}
671+
629672
return detailRow;
630673
}
631674

632675
// Load and display leaderboard data
633-
async function loadLeaderboard(basePath = 'data/benchmarks/v0.8.1/default', displayMode = 'model',
676+
async function loadLeaderboard(basePath = 'data/benchmarks/v0.10.0/default', displayMode = 'model',
634677
showChart = true) {
635678
try {
636679
const response = await fetch(`${basePath}/leaderboard.json`);
637680
const data = await response.json();
638681

639682
const tableBody = document.getElementById('leaderboard-body');
683+
// Clear previous rows if reloading
684+
tableBody.innerHTML = '';
640685

641686
// Create the performance bar chart (only on main leaderboard page)
642687
if (showChart) {
@@ -685,7 +730,10 @@ async function loadLeaderboard(basePath = 'data/benchmarks/v0.8.1/default', disp
685730
const detailRow = createDetailRow(
686731
data.stats,
687732
displayMode === 'community' ? primaryValue : model,
688-
data
733+
data,
734+
vendor,
735+
model,
736+
basePath
689737
);
690738
row.insertAdjacentElement('afterend', detailRow);
691739
}
@@ -777,6 +825,201 @@ document.addEventListener('DOMContentLoaded', () => {
777825
if (isCommunityPage) {
778826
loadLeaderboard('data/community/v0.8.1/default', 'community', false);
779827
} else {
780-
loadLeaderboard();
828+
initBenchmarkVersionSelector();
781829
}
782830
});
831+
832+
// ===== Run Viewer (modal) =====
833+
function formatRequestId(n) {
834+
return String(n).padStart(5, '0');
835+
}
836+
async function fetchTextSafe(url) {
837+
try {
838+
const r = await fetch(url);
839+
if (!r.ok) return null;
840+
return await r.text();
841+
} catch {
842+
return null;
843+
}
844+
}
845+
async function fetchJsonSafe(url) {
846+
try {
847+
const r = await fetch(url);
848+
if (!r.ok) return null;
849+
return await r.json();
850+
} catch {
851+
return null;
852+
}
853+
}
854+
855+
function openRunViewer({
856+
basePath,
857+
vendor,
858+
model,
859+
runId,
860+
startIndex = 1
861+
}) {
862+
const state = {
863+
basePath,
864+
vendor,
865+
model,
866+
runId,
867+
index: startIndex,
868+
overlay: null,
869+
keyHandler: null
870+
};
871+
const overlay = document.createElement('div');
872+
overlay.className = 'fixed inset-0 z-50 bg-black/70 flex items-center justify-center p-2 sm:p-4';
873+
overlay.innerHTML = `
874+
<div class="relative w-full max-w-7xl max-h-[95vh] bg-white dark:bg-zinc-800 rounded-lg shadow-2xl ring-1 ring-white/10 overflow-hidden">
875+
<div class="flex items-center justify-between px-4 py-2 border-b border-zinc-200 dark:border-zinc-700">
876+
<div class="text-sm text-zinc-600 dark:text-zinc-300 font-mono truncate" id="run-title"></div>
877+
<button id="run-close" class="p-1 rounded hover:bg-zinc-100 dark:hover:bg-zinc-700" aria-label="Close">✕</button>
878+
</div>
879+
<div class="p-3 space-y-3">
880+
<div class="flex flex-col lg:flex-row gap-3">
881+
<div class="lg:w-1/2 w-full bg-zinc-50 dark:bg-zinc-900 rounded-md overflow-hidden flex items-center justify-center h-[45vh] lg:h-[45vh] p-2">
882+
<img id="run-screenshot" class="max-h-full max-w-full object-contain" alt="Screenshot" />
883+
</div>
884+
<div class="lg:w-1/2 w-full flex flex-col">
885+
<pre id="run-reasoning" class="h-[45vh] lg:h-[45vh] bg-zinc-50 dark:bg-zinc-900 rounded-md p-3 text-xs text-zinc-800 dark:text-zinc-200 whitespace-pre-wrap overflow-auto"></pre>
886+
</div>
887+
</div>
888+
<div>
889+
<div id="run-tool" class="bg-zinc-50 dark:bg-zinc-900 rounded-md p-3 text-xs text-zinc-800 dark:text-zinc-200 overflow-auto h-[25vh] lg:h-[25vh]"></div>
890+
</div>
891+
<div class="flex items-center justify-center gap-4 py-1">
892+
<button id="run-prev" class="px-3 py-1.5 rounded bg-white/80 dark:bg-zinc-700/80 hover:bg-white dark:hover:bg-zinc-700 border border-zinc-200 dark:border-zinc-600" title="Previous (← or h)" aria-label="Previous">◀</button>
893+
<button id="run-next" class="px-3 py-1.5 rounded bg-white/80 dark:bg-zinc-700/80 hover:bg-white dark:hover:bg-zinc-700 border border-zinc-200 dark:border-zinc-600" title="Next (→ or l)" aria-label="Next">▶</button>
894+
</div>
895+
</div>
896+
</div>`;
897+
898+
document.body.appendChild(overlay);
899+
document.body.style.overflow = 'hidden';
900+
state.overlay = overlay;
901+
902+
overlay.querySelector('#run-close').addEventListener('click', () => closeRunViewer(state));
903+
overlay.addEventListener('click', (e) => {
904+
if (e.target === overlay) closeRunViewer(state);
905+
});
906+
overlay.querySelector('#run-prev').addEventListener('click', () => navigateRun(state, -1));
907+
overlay.querySelector('#run-next').addEventListener('click', () => navigateRun(state, +1));
908+
909+
state.keyHandler = (e) => {
910+
if (e.key === 'Escape') return closeRunViewer(state);
911+
if (e.key === 'ArrowLeft' || e.key === 'h') return navigateRun(state, -1);
912+
if (e.key === 'ArrowRight' || e.key === 'l') return navigateRun(state, +1);
913+
};
914+
window.addEventListener('keydown', state.keyHandler);
915+
916+
loadAndRenderRequest(state);
917+
}
918+
919+
async function loadAndRenderRequest(state) {
920+
const {
921+
basePath,
922+
vendor,
923+
model,
924+
runId,
925+
index,
926+
overlay
927+
} = state;
928+
const reqId = formatRequestId(index);
929+
const runBase = `${basePath}/${vendor}/${model}/${runId}/request-${reqId}`;
930+
931+
overlay.querySelector('#run-title').textContent =
932+
`${vendor}/${model}${runId} • request-${reqId}`;
933+
934+
const [reasoning, toolcall] = await Promise.all([
935+
fetchTextSafe(`${runBase}/reasoning.md`),
936+
fetchJsonSafe(`${runBase}/tool_call.json`)
937+
]);
938+
939+
const imgEl = overlay.querySelector('#run-screenshot');
940+
imgEl.src = `${runBase}/screenshot.avif`;
941+
imgEl.onerror = () => {
942+
imgEl.onerror = null;
943+
imgEl.src = `${runBase}/screenshot.png`;
944+
};
945+
946+
overlay.querySelector('#run-reasoning').textContent = reasoning || '(No reasoning.md)';
947+
948+
const toolDiv = overlay.querySelector('#run-tool');
949+
if (!toolcall) {
950+
toolDiv.textContent = '(No tool_call.json)';
951+
} else {
952+
const tc = Array.isArray(toolcall) ? toolcall[0] : toolcall;
953+
const name = tc && tc.function && tc.function.name ? tc.function.name : '(unknown)';
954+
let argsRaw = tc && tc.function ? tc.function.arguments : '';
955+
let argsPretty = '';
956+
if (typeof argsRaw === 'string') {
957+
try {
958+
argsPretty = JSON.stringify(JSON.parse(argsRaw), null, 2);
959+
} catch {
960+
argsPretty = argsRaw;
961+
}
962+
} else if (argsRaw && typeof argsRaw === 'object') {
963+
try {
964+
argsPretty = JSON.stringify(argsRaw, null, 2);
965+
} catch {
966+
argsPretty = String(argsRaw);
967+
}
968+
}
969+
toolDiv.innerHTML = `
970+
<div class="space-y-2">
971+
<div><span class="font-semibold">Function:</span> <span id="fn-name" class="font-mono"></span></div>
972+
<div>
973+
<div class="font-semibold mb-1">Arguments:</div>
974+
<pre id="fn-args" class="whitespace-pre-wrap"></pre>
975+
</div>
976+
</div>`;
977+
toolDiv.querySelector('#fn-name').textContent = name;
978+
toolDiv.querySelector('#fn-args').textContent = argsPretty || '';
979+
}
980+
}
981+
982+
async function navigateRun(state, delta) {
983+
const old = state.index;
984+
state.index = Math.max(1, old + delta);
985+
const reqId = formatRequestId(state.index);
986+
const probe =
987+
`${state.basePath}/${state.vendor}/${state.model}/${state.runId}/request-${reqId}/tool_call.json`;
988+
const ok = await fetchJsonSafe(probe);
989+
if (!ok) {
990+
state.index = old;
991+
return;
992+
}
993+
loadAndRenderRequest(state);
994+
}
995+
996+
function closeRunViewer(state) {
997+
window.removeEventListener('keydown', state.keyHandler);
998+
document.body.style.overflow = '';
999+
state.overlay.remove();
1000+
}
1001+
1002+
function initBenchmarkVersionSelector() {
1003+
const sel = document.getElementById('version-select');
1004+
const tableEl = document.getElementById('leaderboard-body');
1005+
if (!sel) {
1006+
// Fallback to default if selector missing, only if table exists on page
1007+
if (tableEl) {
1008+
loadLeaderboard(`data/benchmarks/${DEFAULT_BENCHMARK_VERSION}/default`, 'model', true);
1009+
}
1010+
return;
1011+
}
1012+
1013+
const applyVersion = (version) => {
1014+
const basePath = `data/benchmarks/${version}/default`;
1015+
const tbody = document.getElementById('leaderboard-body');
1016+
if (tbody) tbody.innerHTML = '';
1017+
loadLeaderboard(basePath, 'model', true);
1018+
};
1019+
1020+
// Initial load from current selection
1021+
applyVersion(sel.value || DEFAULT_BENCHMARK_VERSION);
1022+
1023+
// Reload on change
1024+
sel.addEventListener('change', () => applyVersion(sel.value));
1025+
}

0 commit comments

Comments
 (0)