Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/shiny-rockets-bathe.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"trackio": minor
---

feat:Use HF buckets as backend
6 changes: 4 additions & 2 deletions tests/e2e-spaces/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@ def test_space_id():

@pytest.fixture(scope="session", autouse=True)
def _ensure_space_ready(test_space_id):
space_id, dataset_id = utils.preprocess_space_and_dataset_ids(test_space_id, None)
deploy.create_space_if_not_exists(space_id, None, dataset_id, None)
space_id, dataset_id, bucket_id = utils.preprocess_space_and_dataset_ids(
test_space_id, None
)
deploy.create_space_if_not_exists(space_id, None, dataset_id, bucket_id, None)

deadline = time.time() + 300
while time.time() < deadline:
Expand Down
59 changes: 59 additions & 0 deletions tests/e2e-spaces/test_metrics_on_spaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@
import time

import huggingface_hub
import pytest
from gradio_client import Client

import trackio
from trackio import utils


def test_basic_logging(test_space_id):
Expand Down Expand Up @@ -92,3 +94,60 @@ def test_runs_data_persisted_after_restart(test_space_id):
lr = cfg.get("learning_rate")
assert lr is not None and abs(float(lr) - 0.001) < 1e-6
assert cfg.get("epochs") == 10


def test_bucket_space_preserves_logged_metrics_after_restart(test_space_id):
_, dataset_id, bucket_id = utils.preprocess_space_and_dataset_ids(
test_space_id, None, None
)
if dataset_id is not None or bucket_id is None:
pytest.skip("Requires a Space deployed with bucket backend (no dataset_id).")

project_name = f"test_bucket_persist_{secrets.token_urlsafe(8)}"
run_name = "metrics_run"

trackio.init(project=project_name, name=run_name, space_id=test_space_id)
trackio.log(metrics={"loss": 0.42, "acc": 0.88})
trackio.finish()

client = Client(test_space_id)
client.predict(api_name="/force_sync")

huggingface_hub.add_space_variable(
test_space_id, "TRACKIO_TEST_RESTART", secrets.token_urlsafe(8)
)

time.sleep(10)
deadline = time.time() + 300
client = None
while time.time() < deadline:
try:
client = Client(test_space_id, verbose=False)
break
except Exception:
time.sleep(10)
assert client is not None, "Space did not come back up after restart"

summary = client.predict(
project=project_name, run=run_name, api_name="/get_run_summary"
)
assert summary["num_logs"] == 1
assert "loss" in summary["metrics"] and "acc" in summary["metrics"]

loss_values = client.predict(
project=project_name,
run=run_name,
metric_name="loss",
api_name="/get_metric_values",
)
assert len(loss_values) == 1
assert abs(float(loss_values[0]["value"]) - 0.42) < 1e-6

acc_values = client.predict(
project=project_name,
run=run_name,
metric_name="acc",
api_name="/get_metric_values",
)
assert len(acc_values) == 1
assert abs(float(acc_values[0]["value"]) - 0.88) < 1e-6
2 changes: 1 addition & 1 deletion trackio/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ sdk_version: {GRADIO_VERSION}
app_file: {APP_FILE}
tags:
- trackio
hf_oauth: true
{LINKED_HUB_METADATA}hf_oauth: true
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For #457

hf_oauth_scopes:
- write-repos
---
59 changes: 43 additions & 16 deletions trackio/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@
config = {}

_atexit_registered = False
_projects_notified_auto_log_hw: set[str] = set()


def _cleanup_current_run():
Expand All @@ -103,6 +104,7 @@ def init(
space_id: str | None = None,
space_storage: SpaceStorage | None = None,
dataset_id: str | None = None,
bucket_id: str | None = None,
config: dict | None = None,
resume: str = "never",
settings: Any = None,
Expand Down Expand Up @@ -137,13 +139,18 @@ def init(
space_storage ([`~huggingface_hub.SpaceStorage`], *optional*):
Choice of persistent storage tier.
dataset_id (`str`, *optional*):
If a `space_id` is provided, a persistent Hugging Face Dataset will be
created and the metrics will be synced to it every 5 minutes. Specify a
Dataset with name like `"username/datasetname"` or `"orgname/datasetname"`,
or `"datasetname"` (uses currently-logged-in Hugging Face user's namespace),
or `None` (uses the same name as the Space but with the `"_dataset"`
suffix). If the Dataset does not exist, it will be created. If the Dataset
already exists, the project will be appended to it.
If provided, uses the legacy Hugging Face Dataset backend for metric
persistence (metrics are exported to Parquet and committed every 5 minutes).
Specify a Dataset with name like `"username/datasetname"` or
`"orgname/datasetname"`, or `"datasetname"` (uses currently-logged-in
Hugging Face user's namespace). Cannot be used together with `bucket_id`.
bucket_id (`str`, *optional*):
The ID of the Hugging Face Bucket to use for metric persistence. By default,
when a `space_id` is provided and neither `dataset_id` nor `bucket_id` is
explicitly set, a bucket is auto-generated from the space_id. Buckets provide
S3-like storage without git overhead - the SQLite database is stored directly
via `hf-mount` in the Space. Specify a Bucket with name like
`"username/bucketname"` or just `"bucketname"`.
config (`dict`, *optional*):
A dictionary of configuration options. Provided for compatibility with
`wandb.init()`.
Expand Down Expand Up @@ -194,11 +201,14 @@ def init(
)

space_id = space_id or os.environ.get("TRACKIO_SPACE_ID")
bucket_id = bucket_id or os.environ.get("TRACKIO_BUCKET_ID")
if space_id is None and dataset_id is not None:
raise ValueError("Must provide a `space_id` when `dataset_id` is provided.")
if dataset_id is not None and bucket_id is not None:
raise ValueError("Cannot provide both `dataset_id` and `bucket_id`.")
try:
space_id, dataset_id = utils.preprocess_space_and_dataset_ids(
space_id, dataset_id
space_id, dataset_id, bucket_id = utils.preprocess_space_and_dataset_ids(
space_id, dataset_id, bucket_id
)
except LocalTokenNotFoundError as e:
raise LocalTokenNotFoundError(
Expand All @@ -221,7 +231,13 @@ def init(
):
print(f"* Trackio project initialized: {project}")

if dataset_id is not None:
if bucket_id is not None:
os.environ["TRACKIO_BUCKET_ID"] = bucket_id
bucket_url = f"https://huggingface.co/buckets/{bucket_id}"
print(
f"* Trackio metrics will be synced to Hugging Face Bucket: {bucket_url}"
)
elif dataset_id is not None:
os.environ["TRACKIO_DATASET_ID"] = dataset_id
print(
f"* Trackio metrics will be synced to Hugging Face Dataset: {dataset_id}"
Expand All @@ -233,13 +249,19 @@ def init(
utils.print_dashboard_instructions(project)
else:
deploy.create_space_if_not_exists(
space_id, space_storage, dataset_id, private
space_id,
space_storage,
dataset_id,
bucket_id,
private,
)
user_name, space_name = space_id.split("/")
space_url = deploy.SPACE_HOST_URL.format(
user_name=user_name, space_name=space_name
)
print(f"* View dashboard by going to: {space_url}")
print(
f"* View dashboard by going to: {deploy._BOLD_ORANGE}{space_url}{deploy._RESET}"
)
if utils.is_in_notebook() and embed:
utils.embed_url_in_notebook(space_url)
context_vars.current_project.set(project)
Expand Down Expand Up @@ -268,10 +290,15 @@ def init(
nvidia_available = gpu_available()
apple_available = apple_gpu_available()
auto_log_gpu = nvidia_available or apple_available
if nvidia_available:
print("* NVIDIA GPU detected, enabling automatic GPU metrics logging")
elif apple_available:
print("* Apple Silicon detected, enabling automatic system metrics logging")
if project not in _projects_notified_auto_log_hw:
if nvidia_available:
print("* NVIDIA GPU detected, enabling automatic GPU metrics logging")
elif apple_available:
print(
"* Apple Silicon detected, enabling automatic system metrics logging"
)
if nvidia_available or apple_available:
_projects_notified_auto_log_hw.add(project)

run = Run(
url=url,
Expand Down
40 changes: 40 additions & 0 deletions trackio/bucket_storage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import sqlite3

import huggingface_hub
from huggingface_hub import sync_bucket

from trackio.sqlite_storage import SQLiteStorage
from trackio.utils import MEDIA_DIR, TRACKIO_DIR


def create_bucket_if_not_exists(bucket_id: str, private: bool | None = None) -> None:
huggingface_hub.create_bucket(bucket_id, private=private or False, exist_ok=True)


def download_bucket_to_trackio_dir(bucket_id: str) -> None:
TRACKIO_DIR.mkdir(parents=True, exist_ok=True)
sync_bucket(
source=f"hf://buckets/{bucket_id}",
dest=str(TRACKIO_DIR),
quiet=True,
)


def upload_project_to_bucket(project: str, bucket_id: str) -> None:
db_path = SQLiteStorage.get_project_db_path(project)
if not db_path.exists():
raise FileNotFoundError(f"No database found for project '{project}'")

with sqlite3.connect(str(db_path), timeout=30.0) as conn:
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)")

files_to_add = [(str(db_path), db_path.name)]

media_dir = MEDIA_DIR / project
if media_dir.exists():
for media_file in media_dir.rglob("*"):
if media_file.is_file():
rel = media_file.relative_to(TRACKIO_DIR)
files_to_add.append((str(media_file), str(rel)))

huggingface_hub.batch_bucket_files(bucket_id, add=files_to_add)
Loading
Loading