Skip to content

Commit ad993aa

Browse files
committed
refector: update balatrobench to new data format
1 parent ea92200 commit ad993aa

2 files changed

Lines changed: 115 additions & 42 deletions

File tree

community.html

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,11 @@
3434
<th
3535
class="px-4 py-3 text-left text-sm font-semibold text-zinc-700 dark:text-zinc-300 border-r-2 border-zinc-300 dark:border-zinc-600">
3636
#</th>
37-
<!-- Author Section -->
38-
<th class="px-4 py-3 text-center text-sm font-semibold text-zinc-700 dark:text-zinc-300">Author</th>
37+
<!-- Strategy Section -->
38+
<th class="px-4 py-3 text-center text-sm font-semibold text-zinc-700 dark:text-zinc-300">Strategy</th>
3939
<th
4040
class="px-4 py-3 text-center text-sm font-semibold text-zinc-700 dark:text-zinc-300 hidden lg:table-cell">
41-
Strategy</th>
41+
Author</th>
4242
<!-- Stats Section -->
4343
<th
4444
class="px-4 py-3 text-center text-sm font-semibold text-zinc-700 dark:text-zinc-300 border-l-2 border-zinc-300 dark:border-zinc-600">
@@ -200,6 +200,13 @@
200200
</svg>
201201
</a>
202202
</div>
203+
204+
<!-- Version selector: autogenerated from manifest -->
205+
<div class="flex justify-center mt-8">
206+
<select id="version-select"
207+
class="text-sm bg-transparent text-zinc-400 dark:text-zinc-500 border border-zinc-300 dark:border-zinc-600 rounded-md px-3 py-2 focus:outline-none focus:ring-2 focus:ring-blue-500 focus:text-zinc-700 dark:focus:text-zinc-200 appearance-none text-center font-mono">
208+
</select>
209+
</div>
203210
</div>
204211

205212
<footer class="mt-12 py-6">

script.js

Lines changed: 105 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -39,11 +39,46 @@ function getCurrentTheme() {
3939
// Global state for main leaderboard chart
4040
let performanceChart = null;
4141
let DEFAULT_BENCHMARK_VERSION = null; // Must be set from manifest
42+
let PAGE_TYPE = null; // 'main' or 'community'
4243

43-
// Load details for a specific model
44-
async function loadDetails(vendor, model, basePath = 'data/benchmarks/v0.12.0/default') {
44+
// Detect which page we're on
45+
function detectPageType() {
46+
const pageTitle = document.title;
47+
if (pageTitle.includes('Community')) {
48+
return 'community';
49+
}
50+
return 'main';
51+
}
52+
53+
// Get data paths based on page type and version
54+
function getDataPaths(version) {
55+
if (PAGE_TYPE === 'community') {
56+
return {
57+
manifestPath: 'data/benchmarks/strategies/manifest.json',
58+
leaderboardPath: `data/benchmarks/strategies/${version}/openai/gpt-oss-20b/leaderboard.json`,
59+
detailBasePath: `data/benchmarks/strategies/${version}/openai/gpt-oss-20b/default`
60+
};
61+
} else {
62+
return {
63+
manifestPath: 'data/benchmarks/models/manifest.json',
64+
leaderboardPath: `data/benchmarks/models/${version}/default/leaderboard.json`,
65+
detailBasePath: `data/benchmarks/models/${version}/default`
66+
};
67+
}
68+
}
69+
70+
// Load details for a specific model/strategy
71+
async function loadDetails(vendor, model, basePath, strategy = null) {
4572
try {
46-
const response = await fetch(`${basePath}/${vendor}/${model}.json`);
73+
let detailPath;
74+
if (PAGE_TYPE === 'community' && strategy) {
75+
// For strategies: load from strategy/stats.json
76+
detailPath = `${basePath}/${strategy}/stats.json`;
77+
} else {
78+
// For models: load from vendor/model.json
79+
detailPath = `${basePath}/${vendor}/${model}.json`;
80+
}
81+
const response = await fetch(detailPath);
4782
const data = await response.json();
4883
return data;
4984
} catch (error) {
@@ -382,7 +417,7 @@ function createProviderPieChart(data, canvasId) {
382417
}
383418

384419
// Create inline detail row after clicked row
385-
function createDetailRow(stats, modelName, data, vendor, model, basePath) {
420+
function createDetailRow(stats, modelName, data, vendor, model, basePath, strategy = null) {
386421
const detailRow = document.createElement('tr');
387422
detailRow.className = 'detail-row bg-zinc-50 dark:bg-zinc-800';
388423

@@ -692,16 +727,28 @@ function createDetailRow(stats, modelName, data, vendor, model, basePath) {
692727
if (!runId) return;
693728
// Simple availability check before opening the viewer
694729
const reqId = '00001';
695-
const probeUrl =
696-
`${basePath}/${vendor}/${model}/${runId}/request-${reqId}/tool_call.json`;
730+
let probeUrl;
731+
if (PAGE_TYPE === 'community' && strategy) {
732+
// For strategies: probe at basePath/model/runId/request-*.json
733+
probeUrl =
734+
`${basePath}/${model}/${runId}/request-${reqId}/tool_call.json`;
735+
} else {
736+
// For models: probe at basePath/vendor/model/runId/request-*.json
737+
probeUrl =
738+
`${basePath}/${vendor}/${model}/${runId}/request-${reqId}/tool_call.json`;
739+
}
697740
const exists = await fetchJsonSafe(probeUrl);
698-
if (!exists) return; // Data missing: do not open the card
741+
if (!exists) {
742+
console.warn('Run data not found at:', probeUrl);
743+
return; // Data missing: do not open the card
744+
}
699745

700746
openRunViewer({
701-
basePath,
747+
basePath: basePath,
702748
vendor,
703749
model,
704750
runId,
751+
strategy,
705752
startIndex: 1
706753
});
707754
});
@@ -713,10 +760,10 @@ function createDetailRow(stats, modelName, data, vendor, model, basePath) {
713760
}
714761

715762
// Load and display leaderboard data
716-
async function loadLeaderboard(basePath = 'data/benchmarks/v0.12.0/default', displayMode = 'model',
763+
async function loadLeaderboard(leaderboardPath, detailBasePath, displayMode = 'model',
717764
showChart = true) {
718765
try {
719-
const response = await fetch(`${basePath}/leaderboard.json`);
766+
const response = await fetch(leaderboardPath);
720767
const data = await response.json();
721768

722769
const tableBody = document.getElementById('leaderboard-body');
@@ -736,18 +783,17 @@ async function loadLeaderboard(basePath = 'data/benchmarks/v0.12.0/default', dis
736783
// Parse data based on display mode
737784
let primaryValue, secondaryValue, vendor, model;
738785

786+
// Parse model and vendor from config.model (format: "vendor/model")
787+
const modelParts = entry.config.model.split('/');
788+
vendor = modelParts[0];
789+
model = modelParts[1];
790+
739791
if (displayMode === 'community') {
740-
primaryValue = entry.config.author || 'Unknown Author';
741-
secondaryValue = entry.config.strategy || 'Unknown Strategy';
742-
// For detail loading, we still need vendor/model from config.model
743-
const modelParts = entry.config.model.split('/');
744-
vendor = modelParts[0];
745-
model = modelParts[1];
792+
// For strategies: show strategy name as primary, author as secondary
793+
primaryValue = entry.strategy.name;
794+
secondaryValue = entry.strategy.author;
746795
} else {
747-
// Parse model and vendor from config.model (format: "vendor/model")
748-
const modelParts = entry.config.model.split('/');
749-
vendor = modelParts[0];
750-
model = modelParts[1];
796+
// For models: show model name as primary, vendor as secondary
751797
primaryValue = model;
752798
secondaryValue = vendor;
753799
}
@@ -766,14 +812,16 @@ async function loadLeaderboard(basePath = 'data/benchmarks/v0.12.0/default', dis
766812
document.querySelectorAll('.detail-row').forEach(dr => dr.remove());
767813

768814
// Load and show details
769-
const data = await loadDetails(vendor, model, basePath);
815+
const strategy = displayMode === 'community' ? entry.config.strategy : null;
816+
const data = await loadDetails(vendor, model, detailBasePath, strategy);
770817
const detailRow = createDetailRow(
771818
data.stats,
772819
displayMode === 'community' ? primaryValue : model,
773820
data,
774821
vendor,
775822
model,
776-
basePath
823+
detailBasePath,
824+
strategy
777825
);
778826
row.insertAdjacentElement('afterend', detailRow);
779827
}
@@ -859,14 +907,11 @@ async function loadLeaderboard(basePath = 'data/benchmarks/v0.12.0/default', dis
859907

860908
// Load data when page loads
861909
document.addEventListener('DOMContentLoaded', () => {
862-
// Detect if this is the community page
863-
const isCommunityPage = document.title.includes('Community');
910+
// Detect page type
911+
PAGE_TYPE = detectPageType();
864912

865-
if (isCommunityPage) {
866-
loadLeaderboard('data/community/v0.8.1/default', 'community', false);
867-
} else {
868-
initBenchmarkVersionSelector();
869-
}
913+
// Initialize version selector for both pages
914+
initBenchmarkVersionSelector();
870915
});
871916

872917
// ===== Run Viewer (modal) =====
@@ -897,13 +942,15 @@ function openRunViewer({
897942
vendor,
898943
model,
899944
runId,
945+
strategy = null,
900946
startIndex = 1
901947
}) {
902948
const state = {
903949
basePath,
904950
vendor,
905951
model,
906952
runId,
953+
strategy,
907954
index: startIndex,
908955
overlay: null,
909956
keyHandler: null
@@ -963,11 +1010,19 @@ async function loadAndRenderRequest(state) {
9631010
vendor,
9641011
model,
9651012
runId,
1013+
strategy,
9661014
index,
9671015
overlay
9681016
} = state;
9691017
const reqId = formatRequestId(index);
970-
const runBase = `${basePath}/${vendor}/${model}/${runId}/request-${reqId}`;
1018+
let runBase;
1019+
if (PAGE_TYPE === 'community' && strategy) {
1020+
// For strategies: construct path as basePath/model/runId/request-*
1021+
runBase = `${basePath}/${model}/${runId}/request-${reqId}`;
1022+
} else {
1023+
// For models: construct path as basePath/vendor/model/runId/request-*
1024+
runBase = `${basePath}/${vendor}/${model}/${runId}/request-${reqId}`;
1025+
}
9711026

9721027
overlay.querySelector('#run-title').textContent =
9731028
`${vendor}/${model}${runId} • request-${reqId}`;
@@ -1018,8 +1073,16 @@ async function navigateRun(state, delta) {
10181073
const old = state.index;
10191074
state.index = Math.max(1, old + delta);
10201075
const reqId = formatRequestId(state.index);
1021-
const probe =
1022-
`${state.basePath}/${state.vendor}/${state.model}/${state.runId}/request-${reqId}/tool_call.json`;
1076+
let probe;
1077+
if (PAGE_TYPE === 'community' && state.strategy) {
1078+
// For strategies: construct path as basePath/model/runId/request-*
1079+
probe =
1080+
`${state.basePath}/${state.model}/${state.runId}/request-${reqId}/tool_call.json`;
1081+
} else {
1082+
// For models: construct path as basePath/vendor/model/runId/request-*
1083+
probe =
1084+
`${state.basePath}/${state.vendor}/${state.model}/${state.runId}/request-${reqId}/tool_call.json`;
1085+
}
10231086
const ok = await fetchJsonSafe(probe);
10241087
if (!ok) {
10251088
state.index = old;
@@ -1035,8 +1098,8 @@ function closeRunViewer(state) {
10351098
}
10361099

10371100
// Load version manifest and populate version selector
1038-
async function loadVersionManifest() {
1039-
const response = await fetch('data/benchmarks/manifest.json');
1101+
async function loadVersionManifest(manifestPath) {
1102+
const response = await fetch(manifestPath);
10401103
if (!response.ok) {
10411104
throw new Error(`Failed to load version manifest: ${response.status} ${response.statusText}`);
10421105
}
@@ -1057,30 +1120,33 @@ async function loadVersionManifest() {
10571120
// Populate version selector with options from manifest
10581121
async function initBenchmarkVersionSelector() {
10591122
const sel = document.getElementById('version-select');
1060-
const tableEl = document.getElementById('leaderboard-body');
10611123
if (!sel) {
10621124
throw new Error('Version selector element not found');
10631125
}
10641126

1127+
const paths = getDataPaths(DEFAULT_BENCHMARK_VERSION);
1128+
10651129
// Load versions from manifest (will throw if latest not found or manifest missing)
1066-
const versions = await loadVersionManifest();
1130+
const versions = await loadVersionManifest(paths.manifestPath);
10671131

10681132
// Populate select options
10691133
versions.forEach(versionObj => {
10701134
const option = document.createElement('option');
10711135
option.value = versionObj.version;
1072-
option.textContent = versionObj.label;
1136+
option.textContent = versionObj.label || versionObj.version;
10731137
if (versionObj.latest) {
10741138
option.selected = true;
10751139
}
10761140
sel.appendChild(option);
10771141
});
10781142

10791143
const applyVersion = (version) => {
1080-
const basePath = `data/benchmarks/${version}/default`;
1144+
const paths = getDataPaths(version);
10811145
const tbody = document.getElementById('leaderboard-body');
10821146
if (tbody) tbody.innerHTML = '';
1083-
loadLeaderboard(basePath, 'model', true);
1147+
const displayMode = PAGE_TYPE === 'community' ? 'community' : 'model';
1148+
const showChart = PAGE_TYPE === 'main';
1149+
loadLeaderboard(paths.leaderboardPath, paths.detailBasePath, displayMode, showChart);
10841150
};
10851151

10861152
// Initial load from current selection

0 commit comments

Comments
 (0)