Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/polite-tigers-drop.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"trackio": minor
---

feat:Reduce Spaces 429s: slower polling, get_logs_batch, read cache
11 changes: 10 additions & 1 deletion trackio/frontend/src/App.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@
isStaticMode,
setMediaDir,
} from "./lib/api.js";
import {
getAppPollIntervalMs,
isRateLimitCooldownActive,
isTabHidden,
} from "./lib/hostPolling.js";
import { setColorPalette } from "./lib/stores.js";
import { getPageFromPath, navigateTo, getQueryParam } from "./lib/router.js";
import Settings from "./pages/Settings.svelte";
Expand Down Expand Up @@ -159,9 +164,11 @@
if (pollTimer) clearInterval(pollTimer);
pollTimer = setInterval(async () => {
if (!realtimeEnabled) return;
if (isTabHidden()) return;
if (isRateLimitCooldownActive()) return;
await refreshRuns();
await refreshAlerts();
}, 1000);
}, getAppPollIntervalMs());
}

function applyUrlTokens() {
Expand Down Expand Up @@ -382,6 +389,7 @@
{showHeaders}
{appBootstrapReady}
{plotOrder}
{realtimeEnabled}
bind:metricColumns
/>
{:else if currentPage === "system"}
Expand All @@ -390,6 +398,7 @@
selectedRuns={selectedRunRecords}
{smoothing}
{appBootstrapReady}
{realtimeEnabled}
bind:availableDevices={availableSystemDevices}
bind:selectedDevices={selectedSystemDevices}
/>
Expand Down
49 changes: 49 additions & 0 deletions trackio/frontend/src/lib/api.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import * as staticApi from "./staticApi.js";
import { registerRateLimitHit } from "./hostPolling.js";

const BASE = window.__trackio_base || "";

Expand Down Expand Up @@ -47,6 +48,9 @@ export async function callApi(apiName, params = {}) {
headers: { "Content-Type": "application/json", ...getOauthSessionHeader() },
body: JSON.stringify(params),
});
if (resp.status === 429) {
registerRateLimitHit();
}
if (!resp.ok) {
throw new Error(`API call ${apiName} failed: ${resp.status}`);
}
Expand Down Expand Up @@ -85,6 +89,29 @@ export async function getLogs(project, run) {
return await callApi("/get_logs", params);
}

export async function getLogsBatch(project, runs) {
if (await isStaticMode()) {
const out = [];
for (const run of runs) {
const logs = await staticApi.getLogs(project, run);
out.push({
run: run?.name ?? null,
run_id: run?.id ?? null,
logs,
});
}
return out;
}
const payload = {
project,
runs: runs.map((run) => ({
run: run?.name ?? null,
run_id: run?.id ?? null,
})),
};
return await callApi("/get_logs_batch", payload);
}

export async function getProjectSummary(project) {
if (await isStaticMode()) return staticApi.getProjectSummary(project);
return await callApi("/get_project_summary", { project });
Expand Down Expand Up @@ -114,6 +141,28 @@ export async function getSystemLogs(project, run) {
return await callApi("/get_system_logs", params);
}

export async function getSystemLogsBatch(project, runs) {
if (await isStaticMode()) {
const out = [];
for (const run of runs) {
const logs = await staticApi.getSystemLogs(project, run);
out.push({
run: run?.name ?? null,
run_id: run?.id ?? null,
logs,
});
}
return out;
}
return await callApi("/get_system_logs_batch", {
project,
runs: runs.map((run) => ({
run: run?.name ?? null,
run_id: run?.id ?? null,
})),
});
}

export async function getSnapshot(project, run, step) {
const params = { project, ...normalizeRun(run) };
if (await isStaticMode()) return staticApi.getSnapshot(project, run, step);
Expand Down
29 changes: 29 additions & 0 deletions trackio/frontend/src/lib/hostPolling.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
let rateLimitCooldownUntil = 0;

export function isHfSpaceHost() {
if (typeof window === "undefined") return false;
return (window.location.hostname || "")
.toLowerCase()
.endsWith(".hf.space");
}

export function registerRateLimitHit() {
const until = Date.now() + 12000;
rateLimitCooldownUntil = Math.max(rateLimitCooldownUntil, until);
}

export function isRateLimitCooldownActive() {
return Date.now() < rateLimitCooldownUntil;
}

export function getAppPollIntervalMs() {
return isHfSpaceHost() ? 2500 : 1000;
}

export function getMetricsPollIntervalMs() {
return isHfSpaceHost() ? 3500 : 1000;
}

export function isTabHidden() {
return typeof document !== "undefined" && document.hidden;
}
37 changes: 27 additions & 10 deletions trackio/frontend/src/pages/Metrics.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,12 @@
import BarPlot from "../components/BarPlot.svelte";
import Accordion from "../components/Accordion.svelte";
import LoadingTrackio from "../components/LoadingTrackio.svelte";
import { getLogs } from "../lib/api.js";
import { getLogsBatch } from "../lib/api.js";
import {
getMetricsPollIntervalMs,
isRateLimitCooldownActive,
isTabHidden,
} from "../lib/hostPolling.js";
import {
processRunData,
getMetricColumns,
Expand All @@ -27,6 +32,7 @@
showHeaders = true,
appBootstrapReady = false,
plotOrder = [],
realtimeEnabled = true,
// eslint-disable-next-line no-useless-assignment -- bindable out-prop to parent
metricColumns = $bindable([]),
} = $props();
Expand Down Expand Up @@ -168,12 +174,16 @@
return;
}

let fetched = false;
for (const run of selectedRuns) {
const needFetch = selectedRuns.filter((run) => {
const runKey = run.id ?? run.name;
if (!rawDataCache.has(runKey)) {
const logs = await getLogs(project, run);
rawDataCache.set(runKey, logs);
return !rawDataCache.has(runKey);
});
let fetched = false;
if (needFetch.length > 0) {
const batch = await getLogsBatch(project, needFetch);
for (const entry of batch) {
const runKey = entry.run_id ?? entry.run;
rawDataCache.set(runKey, entry.logs);
fetched = true;
}
}
Expand All @@ -185,12 +195,16 @@
}

async function refreshCachedRuns() {
if (!realtimeEnabled) return;
if (!project || selectedRuns.length === 0) return;
if (isTabHidden()) return;
if (isRateLimitCooldownActive()) return;

const batch = await getLogsBatch(project, selectedRuns);
let changed = false;
for (const run of selectedRuns) {
const logs = await getLogs(project, run);
const runKey = run.id ?? run.name;
for (const entry of batch) {
const runKey = entry.run_id ?? entry.run;
const logs = entry.logs;
const prev = rawDataCache.get(runKey);
if (!prev || logs.length !== prev.length) {
rawDataCache.set(runKey, logs);
Expand Down Expand Up @@ -230,7 +244,10 @@
xLim = [lo, hi];
}
}
refreshTimer = setInterval(refreshCachedRuns, 1000);
refreshTimer = setInterval(
refreshCachedRuns,
getMetricsPollIntervalMs(),
);
return () => {
if (refreshTimer) clearInterval(refreshTimer);
};
Expand Down
37 changes: 27 additions & 10 deletions trackio/frontend/src/pages/SystemMetrics.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,12 @@
import LinePlot from "../components/LinePlot.svelte";
import Accordion from "../components/Accordion.svelte";
import LoadingTrackio from "../components/LoadingTrackio.svelte";
import { getSystemLogs } from "../lib/api.js";
import { getSystemLogsBatch } from "../lib/api.js";
import {
getMetricsPollIntervalMs,
isRateLimitCooldownActive,
isTabHidden,
} from "../lib/hostPolling.js";
import {
groupMetricsByPrefix,
computeMetricPlotData,
Expand All @@ -16,6 +21,7 @@
selectedRuns = [],
smoothing = 5,
appBootstrapReady = false,
realtimeEnabled = true,
availableDevices = $bindable([]),
selectedDevices = $bindable([]),
} = $props();
Expand Down Expand Up @@ -183,12 +189,16 @@
return;
}

let fetched = false;
for (const run of selectedRuns) {
const needFetch = selectedRuns.filter((run) => {
const runKey = run.id ?? run.name;
if (!rawDataCache.has(runKey)) {
const logs = await getSystemLogs(project, run);
rawDataCache.set(runKey, logs);
return !rawDataCache.has(runKey);
});
let fetched = false;
if (needFetch.length > 0) {
const batch = await getSystemLogsBatch(project, needFetch);
for (const entry of batch) {
const runKey = entry.run_id ?? entry.run;
rawDataCache.set(runKey, entry.logs);
fetched = true;
}
}
Expand All @@ -200,12 +210,16 @@
}

async function refreshCachedRuns() {
if (!realtimeEnabled) return;
if (!project || selectedRuns.length === 0) return;
if (isTabHidden()) return;
if (isRateLimitCooldownActive()) return;

const batch = await getSystemLogsBatch(project, selectedRuns);
let changed = false;
for (const run of selectedRuns) {
const logs = await getSystemLogs(project, run);
const runKey = run.id ?? run.name;
for (const entry of batch) {
const runKey = entry.run_id ?? entry.run;
const logs = entry.logs;
const prev = rawDataCache.get(runKey);
if (!prev || logs.length !== prev.length) {
rawDataCache.set(runKey, logs);
Expand Down Expand Up @@ -264,7 +278,10 @@
});

onMount(() => {
refreshTimer = setInterval(refreshCachedRuns, 1000);
refreshTimer = setInterval(
refreshCachedRuns,
getMetricsPollIntervalMs(),
);
return () => {
if (refreshTimer) clearInterval(refreshTimer);
};
Expand Down
17 changes: 17 additions & 0 deletions trackio/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -687,6 +687,13 @@ def get_system_logs(
return SQLiteStorage.get_system_logs(project, run, run_id=run_id)


def get_system_logs_batch(
project: str,
runs: list[dict[str, Any]],
) -> list[dict[str, Any]]:
return SQLiteStorage.get_system_logs_batch(project, runs)
Copy link

Copilot AI Apr 17, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

get_system_logs_batch similarly accepts an arbitrary runs list from the request with no size/type validation. Consider capping the number of runs per request and validating each element is a dict containing run/run_id to avoid 500s (e.g., when a client sends non-dict items) and to reduce DoS risk.

Copilot uses AI. Check for mistakes.


def get_snapshot(
project: str,
run: str | None = None,
Expand All @@ -713,6 +720,14 @@ def get_logs(
return SQLiteStorage.get_logs(project, run, max_points=1500, run_id=run_id)


def get_logs_batch(
project: str,
runs: list[dict[str, Any]],
max_points: int | None = 1500,
) -> list[dict[str, Any]]:
return SQLiteStorage.get_logs_batch(project, runs, max_points=max_points)
Copy link

Copilot AI Apr 17, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

get_logs_batch exposes max_points and runs directly from the HTTP request body, but there’s no validation/capping. A client can send max_points=0/negative (triggering server errors) or an extremely large runs list (DoS / very expensive DB reads). Add input validation here (e.g., require max_points to be a positive int within a sane upper bound, and cap len(runs)).

Copilot uses AI. Check for mistakes.


def query_project(project: str, query: str) -> dict[str, Any]:
return SQLiteStorage.query_project(project, query)

Expand Down Expand Up @@ -806,8 +821,10 @@ def _api_registry() -> dict[str, Any]:
"get_run_summary": get_run_summary,
"get_system_metrics_for_run": get_system_metrics_for_run,
"get_system_logs": get_system_logs,
"get_system_logs_batch": get_system_logs_batch,
"get_snapshot": get_snapshot,
"get_logs": get_logs,
"get_logs_batch": get_logs_batch,
"query_project": query_project,
"get_settings": get_settings,
"get_project_files": get_project_files,
Expand Down
Loading
Loading