Skip to content

Commit 949b823

Browse files
authored
Merge branch 'main' into fix-metric-xaxis
2 parents 0b1e28b + 3e11174 commit 949b823

13 files changed

Lines changed: 548 additions & 79 deletions

.changeset/shiny-rockets-bathe.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"trackio": minor
3+
---
4+
5+
feat:Use HF buckets as backend

tests/e2e-spaces/conftest.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,10 @@ def test_space_id():
1717

1818
@pytest.fixture(scope="session", autouse=True)
1919
def _ensure_space_ready(test_space_id):
20-
space_id, dataset_id = utils.preprocess_space_and_dataset_ids(test_space_id, None)
21-
deploy.create_space_if_not_exists(space_id, None, dataset_id, None)
20+
space_id, dataset_id, bucket_id = utils.preprocess_space_and_dataset_ids(
21+
test_space_id, None
22+
)
23+
deploy.create_space_if_not_exists(space_id, None, dataset_id, bucket_id, None)
2224

2325
deadline = time.time() + 300
2426
while time.time() < deadline:

tests/e2e-spaces/test_metrics_on_spaces.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,11 @@
22
import time
33

44
import huggingface_hub
5+
import pytest
56
from gradio_client import Client
67

78
import trackio
9+
from trackio import utils
810

911

1012
def test_basic_logging(test_space_id):
@@ -92,3 +94,60 @@ def test_runs_data_persisted_after_restart(test_space_id):
9294
lr = cfg.get("learning_rate")
9395
assert lr is not None and abs(float(lr) - 0.001) < 1e-6
9496
assert cfg.get("epochs") == 10
97+
98+
99+
def test_bucket_space_preserves_logged_metrics_after_restart(test_space_id):
100+
_, dataset_id, bucket_id = utils.preprocess_space_and_dataset_ids(
101+
test_space_id, None, None
102+
)
103+
if dataset_id is not None or bucket_id is None:
104+
pytest.skip("Requires a Space deployed with bucket backend (no dataset_id).")
105+
106+
project_name = f"test_bucket_persist_{secrets.token_urlsafe(8)}"
107+
run_name = "metrics_run"
108+
109+
trackio.init(project=project_name, name=run_name, space_id=test_space_id)
110+
trackio.log(metrics={"loss": 0.42, "acc": 0.88})
111+
trackio.finish()
112+
113+
client = Client(test_space_id)
114+
client.predict(api_name="/force_sync")
115+
116+
huggingface_hub.add_space_variable(
117+
test_space_id, "TRACKIO_TEST_RESTART", secrets.token_urlsafe(8)
118+
)
119+
120+
time.sleep(10)
121+
deadline = time.time() + 300
122+
client = None
123+
while time.time() < deadline:
124+
try:
125+
client = Client(test_space_id, verbose=False)
126+
break
127+
except Exception:
128+
time.sleep(10)
129+
assert client is not None, "Space did not come back up after restart"
130+
131+
summary = client.predict(
132+
project=project_name, run=run_name, api_name="/get_run_summary"
133+
)
134+
assert summary["num_logs"] == 1
135+
assert "loss" in summary["metrics"] and "acc" in summary["metrics"]
136+
137+
loss_values = client.predict(
138+
project=project_name,
139+
run=run_name,
140+
metric_name="loss",
141+
api_name="/get_metric_values",
142+
)
143+
assert len(loss_values) == 1
144+
assert abs(float(loss_values[0]["value"]) - 0.42) < 1e-6
145+
146+
acc_values = client.predict(
147+
project=project_name,
148+
run=run_name,
149+
metric_name="acc",
150+
api_name="/get_metric_values",
151+
)
152+
assert len(acc_values) == 1
153+
assert abs(float(acc_values[0]["value"]) - 0.88) < 1e-6

trackio/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ sdk_version: {GRADIO_VERSION}
44
app_file: {APP_FILE}
55
tags:
66
- trackio
7-
hf_oauth: true
7+
{LINKED_HUB_METADATA}hf_oauth: true
88
hf_oauth_scopes:
99
- write-repos
1010
---

trackio/__init__.py

Lines changed: 43 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@
8585
config = {}
8686

8787
_atexit_registered = False
88+
_projects_notified_auto_log_hw: set[str] = set()
8889

8990

9091
def _cleanup_current_run():
@@ -103,6 +104,7 @@ def init(
103104
space_id: str | None = None,
104105
space_storage: SpaceStorage | None = None,
105106
dataset_id: str | None = None,
107+
bucket_id: str | None = None,
106108
config: dict | None = None,
107109
resume: str = "never",
108110
settings: Any = None,
@@ -137,13 +139,18 @@ def init(
137139
space_storage ([`~huggingface_hub.SpaceStorage`], *optional*):
138140
Choice of persistent storage tier.
139141
dataset_id (`str`, *optional*):
140-
If a `space_id` is provided, a persistent Hugging Face Dataset will be
141-
created and the metrics will be synced to it every 5 minutes. Specify a
142-
Dataset with name like `"username/datasetname"` or `"orgname/datasetname"`,
143-
or `"datasetname"` (uses currently-logged-in Hugging Face user's namespace),
144-
or `None` (uses the same name as the Space but with the `"_dataset"`
145-
suffix). If the Dataset does not exist, it will be created. If the Dataset
146-
already exists, the project will be appended to it.
142+
If provided, uses the legacy Hugging Face Dataset backend for metric
143+
persistence (metrics are exported to Parquet and committed every 5 minutes).
144+
Specify a Dataset with name like `"username/datasetname"` or
145+
`"orgname/datasetname"`, or `"datasetname"` (uses currently-logged-in
146+
Hugging Face user's namespace). Cannot be used together with `bucket_id`.
147+
bucket_id (`str`, *optional*):
148+
The ID of the Hugging Face Bucket to use for metric persistence. By default,
149+
when a `space_id` is provided and neither `dataset_id` nor `bucket_id` is
150+
explicitly set, a bucket is auto-generated from the space_id. Buckets provide
151+
S3-like storage without git overhead - the SQLite database is stored directly
152+
via `hf-mount` in the Space. Specify a Bucket with name like
153+
`"username/bucketname"` or just `"bucketname"`.
147154
config (`dict`, *optional*):
148155
A dictionary of configuration options. Provided for compatibility with
149156
`wandb.init()`.
@@ -194,11 +201,14 @@ def init(
194201
)
195202

196203
space_id = space_id or os.environ.get("TRACKIO_SPACE_ID")
204+
bucket_id = bucket_id or os.environ.get("TRACKIO_BUCKET_ID")
197205
if space_id is None and dataset_id is not None:
198206
raise ValueError("Must provide a `space_id` when `dataset_id` is provided.")
207+
if dataset_id is not None and bucket_id is not None:
208+
raise ValueError("Cannot provide both `dataset_id` and `bucket_id`.")
199209
try:
200-
space_id, dataset_id = utils.preprocess_space_and_dataset_ids(
201-
space_id, dataset_id
210+
space_id, dataset_id, bucket_id = utils.preprocess_space_and_dataset_ids(
211+
space_id, dataset_id, bucket_id
202212
)
203213
except LocalTokenNotFoundError as e:
204214
raise LocalTokenNotFoundError(
@@ -221,7 +231,13 @@ def init(
221231
):
222232
print(f"* Trackio project initialized: {project}")
223233

224-
if dataset_id is not None:
234+
if bucket_id is not None:
235+
os.environ["TRACKIO_BUCKET_ID"] = bucket_id
236+
bucket_url = f"https://huggingface.co/buckets/{bucket_id}"
237+
print(
238+
f"* Trackio metrics will be synced to Hugging Face Bucket: {bucket_url}"
239+
)
240+
elif dataset_id is not None:
225241
os.environ["TRACKIO_DATASET_ID"] = dataset_id
226242
print(
227243
f"* Trackio metrics will be synced to Hugging Face Dataset: {dataset_id}"
@@ -233,13 +249,19 @@ def init(
233249
utils.print_dashboard_instructions(project)
234250
else:
235251
deploy.create_space_if_not_exists(
236-
space_id, space_storage, dataset_id, private
252+
space_id,
253+
space_storage,
254+
dataset_id,
255+
bucket_id,
256+
private,
237257
)
238258
user_name, space_name = space_id.split("/")
239259
space_url = deploy.SPACE_HOST_URL.format(
240260
user_name=user_name, space_name=space_name
241261
)
242-
print(f"* View dashboard by going to: {space_url}")
262+
print(
263+
f"* View dashboard by going to: {deploy._BOLD_ORANGE}{space_url}{deploy._RESET}"
264+
)
243265
if utils.is_in_notebook() and embed:
244266
utils.embed_url_in_notebook(space_url)
245267
context_vars.current_project.set(project)
@@ -268,10 +290,15 @@ def init(
268290
nvidia_available = gpu_available()
269291
apple_available = apple_gpu_available()
270292
auto_log_gpu = nvidia_available or apple_available
271-
if nvidia_available:
272-
print("* NVIDIA GPU detected, enabling automatic GPU metrics logging")
273-
elif apple_available:
274-
print("* Apple Silicon detected, enabling automatic system metrics logging")
293+
if project not in _projects_notified_auto_log_hw:
294+
if nvidia_available:
295+
print("* NVIDIA GPU detected, enabling automatic GPU metrics logging")
296+
elif apple_available:
297+
print(
298+
"* Apple Silicon detected, enabling automatic system metrics logging"
299+
)
300+
if nvidia_available or apple_available:
301+
_projects_notified_auto_log_hw.add(project)
275302

276303
run = Run(
277304
url=url,

trackio/bucket_storage.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
import sqlite3
2+
3+
import huggingface_hub
4+
from huggingface_hub import sync_bucket
5+
6+
from trackio.sqlite_storage import SQLiteStorage
7+
from trackio.utils import MEDIA_DIR, TRACKIO_DIR
8+
9+
10+
def create_bucket_if_not_exists(bucket_id: str, private: bool | None = None) -> None:
11+
huggingface_hub.create_bucket(bucket_id, private=private or False, exist_ok=True)
12+
13+
14+
def download_bucket_to_trackio_dir(bucket_id: str) -> None:
15+
TRACKIO_DIR.mkdir(parents=True, exist_ok=True)
16+
sync_bucket(
17+
source=f"hf://buckets/{bucket_id}",
18+
dest=str(TRACKIO_DIR),
19+
quiet=True,
20+
)
21+
22+
23+
def upload_project_to_bucket(project: str, bucket_id: str) -> None:
24+
db_path = SQLiteStorage.get_project_db_path(project)
25+
if not db_path.exists():
26+
raise FileNotFoundError(f"No database found for project '{project}'")
27+
28+
with sqlite3.connect(str(db_path), timeout=30.0) as conn:
29+
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)")
30+
31+
files_to_add = [(str(db_path), db_path.name)]
32+
33+
media_dir = MEDIA_DIR / project
34+
if media_dir.exists():
35+
for media_file in media_dir.rglob("*"):
36+
if media_file.is_file():
37+
rel = media_file.relative_to(TRACKIO_DIR)
38+
files_to_add.append((str(media_file), str(rel)))
39+
40+
huggingface_hub.batch_bucket_files(bucket_id, add=files_to_add)

0 commit comments

Comments
 (0)