@@ -32,8 +32,12 @@ function getCurrentTheme() {
3232 }
3333}
3434
35+ // Global state for main leaderboard chart
36+ let performanceChart = null ;
37+ const DEFAULT_BENCHMARK_VERSION = 'v0.10.0' ;
38+
3539// Load details for a specific model
36- async function loadDetails ( vendor , model , basePath = 'data/benchmarks/v0.8.1 /default' ) {
40+ async function loadDetails ( vendor , model , basePath = 'data/benchmarks/v0.10.0 /default' ) {
3741 try {
3842 const response = await fetch ( `${ basePath } /${ vendor } /${ model } .json` ) ;
3943 const data = await response . json ( ) ;
@@ -162,7 +166,10 @@ function createPerformanceBarChart(entries) {
162166 const maxWithError = Math . max ( ...avgRounds . map ( ( avg , i ) => avg + stdDevs [ i ] ) ) ;
163167 const yAxisMax = Math . ceil ( maxWithError + 0.5 ) ; // Add padding
164168
165- new Chart ( ctx , {
169+ if ( performanceChart ) {
170+ performanceChart . destroy ( ) ;
171+ }
172+ performanceChart = new Chart ( ctx , {
166173 type : 'bar' ,
167174 data : {
168175 labels : models ,
@@ -337,7 +344,7 @@ function createProviderPieChart(data, canvasId) {
337344}
338345
339346// Create inline detail row after clicked row
340- function createDetailRow ( stats , modelName , data ) {
347+ function createDetailRow ( stats , modelName , data , vendor , model , basePath ) {
341348 const detailRow = document . createElement ( 'tr' ) ;
342349 detailRow . className = 'detail-row bg-zinc-50 dark:bg-zinc-800' ;
343350
@@ -626,17 +633,55 @@ function createDetailRow(stats, modelName, data) {
626633 createProviderPieChart ( data , pieChartCanvasId ) ;
627634 } , 0 ) ;
628635
636+ // Make each per-run row clickable to open Run Viewer (if runs mapping exists)
637+ const perRunTable = detailRow . querySelector ( 'table.table-auto' ) ;
638+ const tbody = perRunTable ? perRunTable . querySelector ( 'tbody' ) : null ;
639+ if ( tbody ) {
640+ const rows = Array . from ( tbody . querySelectorAll ( 'tr' ) ) ;
641+ const runs = Array . isArray ( data . runs ) ? data . runs : null ;
642+ if ( runs && runs . length > 0 && rows . length > 0 ) {
643+ const count = Math . min ( runs . length , rows . length ) ;
644+ for ( let i = 0 ; i < count ; i ++ ) {
645+ const tr = rows [ i ] ;
646+ tr . classList . add ( 'cursor-pointer' ) ;
647+ tr . title = 'Open run viewer' ;
648+ tr . setAttribute ( 'role' , 'button' ) ;
649+ tr . addEventListener ( 'click' , async ( e ) => {
650+ e . stopPropagation ( ) ;
651+ const runId = runs [ i ] ;
652+ if ( ! runId ) return ;
653+ // Simple availability check before opening the viewer
654+ const reqId = '00001' ;
655+ const probeUrl =
656+ `${ basePath } /${ vendor } /${ model } /${ runId } /request-${ reqId } /tool_call.json` ;
657+ const exists = await fetchJsonSafe ( probeUrl ) ;
658+ if ( ! exists ) return ; // Data missing: do not open the card
659+
660+ openRunViewer ( {
661+ basePath,
662+ vendor,
663+ model,
664+ runId,
665+ startIndex : 1
666+ } ) ;
667+ } ) ;
668+ }
669+ }
670+ }
671+
629672 return detailRow ;
630673}
631674
632675// Load and display leaderboard data
633- async function loadLeaderboard ( basePath = 'data/benchmarks/v0.8.1 /default' , displayMode = 'model' ,
676+ async function loadLeaderboard ( basePath = 'data/benchmarks/v0.10.0 /default' , displayMode = 'model' ,
634677 showChart = true ) {
635678 try {
636679 const response = await fetch ( `${ basePath } /leaderboard.json` ) ;
637680 const data = await response . json ( ) ;
638681
639682 const tableBody = document . getElementById ( 'leaderboard-body' ) ;
683+ // Clear previous rows if reloading
684+ tableBody . innerHTML = '' ;
640685
641686 // Create the performance bar chart (only on main leaderboard page)
642687 if ( showChart ) {
@@ -685,7 +730,10 @@ async function loadLeaderboard(basePath = 'data/benchmarks/v0.8.1/default', disp
685730 const detailRow = createDetailRow (
686731 data . stats ,
687732 displayMode === 'community' ? primaryValue : model ,
688- data
733+ data ,
734+ vendor ,
735+ model ,
736+ basePath
689737 ) ;
690738 row . insertAdjacentElement ( 'afterend' , detailRow ) ;
691739 }
@@ -777,6 +825,201 @@ document.addEventListener('DOMContentLoaded', () => {
777825 if ( isCommunityPage ) {
778826 loadLeaderboard ( 'data/community/v0.8.1/default' , 'community' , false ) ;
779827 } else {
780- loadLeaderboard ( ) ;
828+ initBenchmarkVersionSelector ( ) ;
781829 }
782830} ) ;
831+
832+ // ===== Run Viewer (modal) =====
833+ function formatRequestId ( n ) {
834+ return String ( n ) . padStart ( 5 , '0' ) ;
835+ }
836+ async function fetchTextSafe ( url ) {
837+ try {
838+ const r = await fetch ( url ) ;
839+ if ( ! r . ok ) return null ;
840+ return await r . text ( ) ;
841+ } catch {
842+ return null ;
843+ }
844+ }
845+ async function fetchJsonSafe ( url ) {
846+ try {
847+ const r = await fetch ( url ) ;
848+ if ( ! r . ok ) return null ;
849+ return await r . json ( ) ;
850+ } catch {
851+ return null ;
852+ }
853+ }
854+
855+ function openRunViewer ( {
856+ basePath,
857+ vendor,
858+ model,
859+ runId,
860+ startIndex = 1
861+ } ) {
862+ const state = {
863+ basePath,
864+ vendor,
865+ model,
866+ runId,
867+ index : startIndex ,
868+ overlay : null ,
869+ keyHandler : null
870+ } ;
871+ const overlay = document . createElement ( 'div' ) ;
872+ overlay . className = 'fixed inset-0 z-50 bg-black/70 flex items-center justify-center p-2 sm:p-4' ;
873+ overlay . innerHTML = `
874+ <div class="relative w-full max-w-7xl max-h-[95vh] bg-white dark:bg-zinc-800 rounded-lg shadow-2xl ring-1 ring-white/10 overflow-hidden">
875+ <div class="flex items-center justify-between px-4 py-2 border-b border-zinc-200 dark:border-zinc-700">
876+ <div class="text-sm text-zinc-600 dark:text-zinc-300 font-mono truncate" id="run-title"></div>
877+ <button id="run-close" class="p-1 rounded hover:bg-zinc-100 dark:hover:bg-zinc-700" aria-label="Close">✕</button>
878+ </div>
879+ <div class="p-3 space-y-3">
880+ <div class="flex flex-col lg:flex-row gap-3">
881+ <div class="lg:w-1/2 w-full bg-zinc-50 dark:bg-zinc-900 rounded-md overflow-hidden flex items-center justify-center h-[45vh] lg:h-[45vh] p-2">
882+ <img id="run-screenshot" class="max-h-full max-w-full object-contain" alt="Screenshot" />
883+ </div>
884+ <div class="lg:w-1/2 w-full flex flex-col">
885+ <pre id="run-reasoning" class="h-[45vh] lg:h-[45vh] bg-zinc-50 dark:bg-zinc-900 rounded-md p-3 text-xs text-zinc-800 dark:text-zinc-200 whitespace-pre-wrap overflow-auto"></pre>
886+ </div>
887+ </div>
888+ <div>
889+ <div id="run-tool" class="bg-zinc-50 dark:bg-zinc-900 rounded-md p-3 text-xs text-zinc-800 dark:text-zinc-200 overflow-auto h-[25vh] lg:h-[25vh]"></div>
890+ </div>
891+ <div class="flex items-center justify-center gap-4 py-1">
892+ <button id="run-prev" class="px-3 py-1.5 rounded bg-white/80 dark:bg-zinc-700/80 hover:bg-white dark:hover:bg-zinc-700 border border-zinc-200 dark:border-zinc-600" title="Previous (← or h)" aria-label="Previous">◀</button>
893+ <button id="run-next" class="px-3 py-1.5 rounded bg-white/80 dark:bg-zinc-700/80 hover:bg-white dark:hover:bg-zinc-700 border border-zinc-200 dark:border-zinc-600" title="Next (→ or l)" aria-label="Next">▶</button>
894+ </div>
895+ </div>
896+ </div>` ;
897+
898+ document . body . appendChild ( overlay ) ;
899+ document . body . style . overflow = 'hidden' ;
900+ state . overlay = overlay ;
901+
902+ overlay . querySelector ( '#run-close' ) . addEventListener ( 'click' , ( ) => closeRunViewer ( state ) ) ;
903+ overlay . addEventListener ( 'click' , ( e ) => {
904+ if ( e . target === overlay ) closeRunViewer ( state ) ;
905+ } ) ;
906+ overlay . querySelector ( '#run-prev' ) . addEventListener ( 'click' , ( ) => navigateRun ( state , - 1 ) ) ;
907+ overlay . querySelector ( '#run-next' ) . addEventListener ( 'click' , ( ) => navigateRun ( state , + 1 ) ) ;
908+
909+ state . keyHandler = ( e ) => {
910+ if ( e . key === 'Escape' ) return closeRunViewer ( state ) ;
911+ if ( e . key === 'ArrowLeft' || e . key === 'h' ) return navigateRun ( state , - 1 ) ;
912+ if ( e . key === 'ArrowRight' || e . key === 'l' ) return navigateRun ( state , + 1 ) ;
913+ } ;
914+ window . addEventListener ( 'keydown' , state . keyHandler ) ;
915+
916+ loadAndRenderRequest ( state ) ;
917+ }
918+
919+ async function loadAndRenderRequest ( state ) {
920+ const {
921+ basePath,
922+ vendor,
923+ model,
924+ runId,
925+ index,
926+ overlay
927+ } = state ;
928+ const reqId = formatRequestId ( index ) ;
929+ const runBase = `${ basePath } /${ vendor } /${ model } /${ runId } /request-${ reqId } ` ;
930+
931+ overlay . querySelector ( '#run-title' ) . textContent =
932+ `${ vendor } /${ model } • ${ runId } • request-${ reqId } ` ;
933+
934+ const [ reasoning , toolcall ] = await Promise . all ( [
935+ fetchTextSafe ( `${ runBase } /reasoning.md` ) ,
936+ fetchJsonSafe ( `${ runBase } /tool_call.json` )
937+ ] ) ;
938+
939+ const imgEl = overlay . querySelector ( '#run-screenshot' ) ;
940+ imgEl . src = `${ runBase } /screenshot.avif` ;
941+ imgEl . onerror = ( ) => {
942+ imgEl . onerror = null ;
943+ imgEl . src = `${ runBase } /screenshot.png` ;
944+ } ;
945+
946+ overlay . querySelector ( '#run-reasoning' ) . textContent = reasoning || '(No reasoning.md)' ;
947+
948+ const toolDiv = overlay . querySelector ( '#run-tool' ) ;
949+ if ( ! toolcall ) {
950+ toolDiv . textContent = '(No tool_call.json)' ;
951+ } else {
952+ const tc = Array . isArray ( toolcall ) ? toolcall [ 0 ] : toolcall ;
953+ const name = tc && tc . function && tc . function . name ? tc . function . name : '(unknown)' ;
954+ let argsRaw = tc && tc . function ? tc . function . arguments : '' ;
955+ let argsPretty = '' ;
956+ if ( typeof argsRaw === 'string' ) {
957+ try {
958+ argsPretty = JSON . stringify ( JSON . parse ( argsRaw ) , null , 2 ) ;
959+ } catch {
960+ argsPretty = argsRaw ;
961+ }
962+ } else if ( argsRaw && typeof argsRaw === 'object' ) {
963+ try {
964+ argsPretty = JSON . stringify ( argsRaw , null , 2 ) ;
965+ } catch {
966+ argsPretty = String ( argsRaw ) ;
967+ }
968+ }
969+ toolDiv . innerHTML = `
970+ <div class="space-y-2">
971+ <div><span class="font-semibold">Function:</span> <span id="fn-name" class="font-mono"></span></div>
972+ <div>
973+ <div class="font-semibold mb-1">Arguments:</div>
974+ <pre id="fn-args" class="whitespace-pre-wrap"></pre>
975+ </div>
976+ </div>` ;
977+ toolDiv . querySelector ( '#fn-name' ) . textContent = name ;
978+ toolDiv . querySelector ( '#fn-args' ) . textContent = argsPretty || '' ;
979+ }
980+ }
981+
982+ async function navigateRun ( state , delta ) {
983+ const old = state . index ;
984+ state . index = Math . max ( 1 , old + delta ) ;
985+ const reqId = formatRequestId ( state . index ) ;
986+ const probe =
987+ `${ state . basePath } /${ state . vendor } /${ state . model } /${ state . runId } /request-${ reqId } /tool_call.json` ;
988+ const ok = await fetchJsonSafe ( probe ) ;
989+ if ( ! ok ) {
990+ state . index = old ;
991+ return ;
992+ }
993+ loadAndRenderRequest ( state ) ;
994+ }
995+
996+ function closeRunViewer ( state ) {
997+ window . removeEventListener ( 'keydown' , state . keyHandler ) ;
998+ document . body . style . overflow = '' ;
999+ state . overlay . remove ( ) ;
1000+ }
1001+
1002+ function initBenchmarkVersionSelector ( ) {
1003+ const sel = document . getElementById ( 'version-select' ) ;
1004+ const tableEl = document . getElementById ( 'leaderboard-body' ) ;
1005+ if ( ! sel ) {
1006+ // Fallback to default if selector missing, only if table exists on page
1007+ if ( tableEl ) {
1008+ loadLeaderboard ( `data/benchmarks/${ DEFAULT_BENCHMARK_VERSION } /default` , 'model' , true ) ;
1009+ }
1010+ return ;
1011+ }
1012+
1013+ const applyVersion = ( version ) => {
1014+ const basePath = `data/benchmarks/${ version } /default` ;
1015+ const tbody = document . getElementById ( 'leaderboard-body' ) ;
1016+ if ( tbody ) tbody . innerHTML = '' ;
1017+ loadLeaderboard ( basePath , 'model' , true ) ;
1018+ } ;
1019+
1020+ // Initial load from current selection
1021+ applyVersion ( sel . value || DEFAULT_BENCHMARK_VERSION ) ;
1022+
1023+ // Reload on change
1024+ sel . addEventListener ( 'change' , ( ) => applyVersion ( sel . value ) ) ;
1025+ }
0 commit comments