gradio-app · abidlabs · Mar 31, 2026 · Mar 30, 2026 · Mar 30, 2026 · Mar 30, 2026
diff --git a/.changeset/shiny-rockets-bathe.md b/.changeset/shiny-rockets-bathe.md
@@ -0,0 +1,5 @@
+---
+"trackio": minor
+---
+
+feat:Use HF buckets as backend
diff --git a/tests/e2e-spaces/conftest.py b/tests/e2e-spaces/conftest.py
@@ -17,8 +17,10 @@ def test_space_id():
 
 @pytest.fixture(scope="session", autouse=True)
 def _ensure_space_ready(test_space_id):
-    space_id, dataset_id = utils.preprocess_space_and_dataset_ids(test_space_id, None)
-    deploy.create_space_if_not_exists(space_id, None, dataset_id, None)
+    space_id, dataset_id, bucket_id = utils.preprocess_space_and_dataset_ids(
+        test_space_id, None
+    )
+    deploy.create_space_if_not_exists(space_id, None, dataset_id, bucket_id, None)
 
     deadline = time.time() + 300
     while time.time() < deadline:

diff --git a/tests/e2e-spaces/test_metrics_on_spaces.py b/tests/e2e-spaces/test_metrics_on_spaces.py
@@ -2,9 +2,11 @@
 import time
 
 import huggingface_hub
+import pytest
 from gradio_client import Client
 
 import trackio
+from trackio import utils
 
 
 def test_basic_logging(test_space_id):
@@ -92,3 +94,60 @@ def test_runs_data_persisted_after_restart(test_space_id):
     lr = cfg.get("learning_rate")
     assert lr is not None and abs(float(lr) - 0.001) < 1e-6
     assert cfg.get("epochs") == 10
+
+
+def test_bucket_space_preserves_logged_metrics_after_restart(test_space_id):
+    _, dataset_id, bucket_id = utils.preprocess_space_and_dataset_ids(
+        test_space_id, None, None
+    )
+    if dataset_id is not None or bucket_id is None:
+        pytest.skip("Requires a Space deployed with bucket backend (no dataset_id).")
+
+    project_name = f"test_bucket_persist_{secrets.token_urlsafe(8)}"
+    run_name = "metrics_run"
+
+    trackio.init(project=project_name, name=run_name, space_id=test_space_id)
+    trackio.log(metrics={"loss": 0.42, "acc": 0.88})
+    trackio.finish()
+
+    client = Client(test_space_id)
+    client.predict(api_name="/force_sync")
+
+    huggingface_hub.add_space_variable(
+        test_space_id, "TRACKIO_TEST_RESTART", secrets.token_urlsafe(8)
+    )
+
+    time.sleep(10)
+    deadline = time.time() + 300
+    client = None
+    while time.time() < deadline:
+        try:
+            client = Client(test_space_id, verbose=False)
+            break
+        except Exception:
+            time.sleep(10)
+    assert client is not None, "Space did not come back up after restart"
+
+    summary = client.predict(
+        project=project_name, run=run_name, api_name="/get_run_summary"
+    )
+    assert summary["num_logs"] == 1
+    assert "loss" in summary["metrics"] and "acc" in summary["metrics"]
+
+    loss_values = client.predict(
+        project=project_name,
+        run=run_name,
+        metric_name="loss",
+        api_name="/get_metric_values",
+    )
+    assert len(loss_values) == 1
+    assert abs(float(loss_values[0]["value"]) - 0.42) < 1e-6
+
+    acc_values = client.predict(
+        project=project_name,
+        run=run_name,
+        metric_name="acc",
+        api_name="/get_metric_values",
+    )
+    assert len(acc_values) == 1
+    assert abs(float(acc_values[0]["value"]) - 0.88) < 1e-6
diff --git a/trackio/README.md b/trackio/README.md
@@ -4,7 +4,7 @@ sdk_version: {GRADIO_VERSION}
 app_file: {APP_FILE}
 tags:
  - trackio
-hf_oauth: true
+{LINKED_HUB_METADATA}hf_oauth: true
 hf_oauth_scopes:
  - write-repos
 ---
diff --git a/trackio/__init__.py b/trackio/__init__.py
@@ -85,6 +85,7 @@
 config = {}
 
 _atexit_registered = False
+_projects_notified_auto_log_hw: set[str] = set()
 
 
 def _cleanup_current_run():
@@ -103,6 +104,7 @@ def init(
     space_id: str | None = None,
     space_storage: SpaceStorage | None = None,
     dataset_id: str | None = None,
+    bucket_id: str | None = None,
     config: dict | None = None,
     resume: str = "never",
     settings: Any = None,
@@ -137,13 +139,18 @@ def init(
         space_storage ([`~huggingface_hub.SpaceStorage`], *optional*):
             Choice of persistent storage tier.
         dataset_id (`str`, *optional*):
-            If a `space_id` is provided, a persistent Hugging Face Dataset will be
-            created and the metrics will be synced to it every 5 minutes. Specify a
-            Dataset with name like `"username/datasetname"` or `"orgname/datasetname"`,
-            or `"datasetname"` (uses currently-logged-in Hugging Face user's namespace),
-            or `None` (uses the same name as the Space but with the `"_dataset"`
-            suffix). If the Dataset does not exist, it will be created. If the Dataset
-            already exists, the project will be appended to it.
+            If provided, uses the legacy Hugging Face Dataset backend for metric
+            persistence (metrics are exported to Parquet and committed every 5 minutes).
+            Specify a Dataset with name like `"username/datasetname"` or
+            `"orgname/datasetname"`, or `"datasetname"` (uses currently-logged-in
+            Hugging Face user's namespace). Cannot be used together with `bucket_id`.
+        bucket_id (`str`, *optional*):
+            The ID of the Hugging Face Bucket to use for metric persistence. By default,
+            when a `space_id` is provided and neither `dataset_id` nor `bucket_id` is
+            explicitly set, a bucket is auto-generated from the space_id. Buckets provide
+            S3-like storage without git overhead - the SQLite database is stored directly
+            via `hf-mount` in the Space. Specify a Bucket with name like
+            `"username/bucketname"` or just `"bucketname"`.
         config (`dict`, *optional*):
             A dictionary of configuration options. Provided for compatibility with
             `wandb.init()`.
@@ -194,11 +201,14 @@ def init(
         )
 
     space_id = space_id or os.environ.get("TRACKIO_SPACE_ID")
+    bucket_id = bucket_id or os.environ.get("TRACKIO_BUCKET_ID")
     if space_id is None and dataset_id is not None:
         raise ValueError("Must provide a `space_id` when `dataset_id` is provided.")
+    if dataset_id is not None and bucket_id is not None:
+        raise ValueError("Cannot provide both `dataset_id` and `bucket_id`.")
     try:
-        space_id, dataset_id = utils.preprocess_space_and_dataset_ids(
-            space_id, dataset_id
+        space_id, dataset_id, bucket_id = utils.preprocess_space_and_dataset_ids(
+            space_id, dataset_id, bucket_id
         )
     except LocalTokenNotFoundError as e:
         raise LocalTokenNotFoundError(
@@ -221,7 +231,13 @@ def init(
     ):
         print(f"* Trackio project initialized: {project}")
 
-        if dataset_id is not None:
+        if bucket_id is not None:
+            os.environ["TRACKIO_BUCKET_ID"] = bucket_id
+            bucket_url = f"https://huggingface.co/buckets/{bucket_id}"
+            print(
+                f"* Trackio metrics will be synced to Hugging Face Bucket: {bucket_url}"
+            )
+        elif dataset_id is not None:
             os.environ["TRACKIO_DATASET_ID"] = dataset_id
             print(
                 f"* Trackio metrics will be synced to Hugging Face Dataset: {dataset_id}"
@@ -233,13 +249,19 @@ def init(
                 utils.print_dashboard_instructions(project)
         else:
             deploy.create_space_if_not_exists(
-                space_id, space_storage, dataset_id, private
+                space_id,
+                space_storage,
+                dataset_id,
+                bucket_id,
+                private,
             )
             user_name, space_name = space_id.split("/")
             space_url = deploy.SPACE_HOST_URL.format(
                 user_name=user_name, space_name=space_name
             )
-            print(f"* View dashboard by going to: {space_url}")
+            print(
+                f"* View dashboard by going to: {deploy._BOLD_ORANGE}{space_url}{deploy._RESET}"
+            )
             if utils.is_in_notebook() and embed:
                 utils.embed_url_in_notebook(space_url)
     context_vars.current_project.set(project)
@@ -268,10 +290,15 @@ def init(
         nvidia_available = gpu_available()
         apple_available = apple_gpu_available()
         auto_log_gpu = nvidia_available or apple_available
-        if nvidia_available:
-            print("* NVIDIA GPU detected, enabling automatic GPU metrics logging")
-        elif apple_available:
-            print("* Apple Silicon detected, enabling automatic system metrics logging")
+        if project not in _projects_notified_auto_log_hw:
+            if nvidia_available:
+                print("* NVIDIA GPU detected, enabling automatic GPU metrics logging")
+            elif apple_available:
+                print(
+                    "* Apple Silicon detected, enabling automatic system metrics logging"
+                )
+            if nvidia_available or apple_available:
+                _projects_notified_auto_log_hw.add(project)
 
     run = Run(
         url=url,

diff --git a/trackio/bucket_storage.py b/trackio/bucket_storage.py
@@ -0,0 +1,40 @@
+import sqlite3
+
+import huggingface_hub
+from huggingface_hub import sync_bucket
+
+from trackio.sqlite_storage import SQLiteStorage
+from trackio.utils import MEDIA_DIR, TRACKIO_DIR
+
+
+def create_bucket_if_not_exists(bucket_id: str, private: bool | None = None) -> None:
+    huggingface_hub.create_bucket(bucket_id, private=private or False, exist_ok=True)
+
+
+def download_bucket_to_trackio_dir(bucket_id: str) -> None:
+    TRACKIO_DIR.mkdir(parents=True, exist_ok=True)
+    sync_bucket(
+        source=f"hf://buckets/{bucket_id}",
+        dest=str(TRACKIO_DIR),
+        quiet=True,
+    )
+
+
+def upload_project_to_bucket(project: str, bucket_id: str) -> None:
+    db_path = SQLiteStorage.get_project_db_path(project)
+    if not db_path.exists():
+        raise FileNotFoundError(f"No database found for project '{project}'")
+
+    with sqlite3.connect(str(db_path), timeout=30.0) as conn:
+        conn.execute("PRAGMA wal_checkpoint(TRUNCATE)")
+
+    files_to_add = [(str(db_path), db_path.name)]
+
+    media_dir = MEDIA_DIR / project
+    if media_dir.exists():
+        for media_file in media_dir.rglob("*"):
+            if media_file.is_file():
+                rel = media_file.relative_to(TRACKIO_DIR)
+                files_to_add.append((str(media_file), str(rel)))
+
+    huggingface_hub.batch_bucket_files(bucket_id, add=files_to_add)