8585config = {}
8686
8787_atexit_registered = False
88+ _projects_notified_auto_log_hw : set [str ] = set ()
8889
8990
9091def _cleanup_current_run ():
@@ -103,6 +104,7 @@ def init(
103104 space_id : str | None = None ,
104105 space_storage : SpaceStorage | None = None ,
105106 dataset_id : str | None = None ,
107+ bucket_id : str | None = None ,
106108 config : dict | None = None ,
107109 resume : str = "never" ,
108110 settings : Any = None ,
@@ -137,13 +139,18 @@ def init(
137139 space_storage ([`~huggingface_hub.SpaceStorage`], *optional*):
138140 Choice of persistent storage tier.
139141 dataset_id (`str`, *optional*):
140- If a `space_id` is provided, a persistent Hugging Face Dataset will be
141- created and the metrics will be synced to it every 5 minutes. Specify a
142- Dataset with name like `"username/datasetname"` or `"orgname/datasetname"`,
143- or `"datasetname"` (uses currently-logged-in Hugging Face user's namespace),
144- or `None` (uses the same name as the Space but with the `"_dataset"`
145- suffix). If the Dataset does not exist, it will be created. If the Dataset
146- already exists, the project will be appended to it.
142+ If provided, uses the legacy Hugging Face Dataset backend for metric
143+ persistence (metrics are exported to Parquet and committed every 5 minutes).
144+ Specify a Dataset with name like `"username/datasetname"` or
145+ `"orgname/datasetname"`, or `"datasetname"` (uses currently-logged-in
146+ Hugging Face user's namespace). Cannot be used together with `bucket_id`.
147+ bucket_id (`str`, *optional*):
148+ The ID of the Hugging Face Bucket to use for metric persistence. By default,
149+ when a `space_id` is provided and neither `dataset_id` nor `bucket_id` is
150+ explicitly set, a bucket is auto-generated from the space_id. Buckets provide
151+ S3-like storage without git overhead - the SQLite database is stored directly
152+ via `hf-mount` in the Space. Specify a Bucket with name like
153+ `"username/bucketname"` or just `"bucketname"`.
147154 config (`dict`, *optional*):
148155 A dictionary of configuration options. Provided for compatibility with
149156 `wandb.init()`.
@@ -194,11 +201,14 @@ def init(
194201 )
195202
196203 space_id = space_id or os .environ .get ("TRACKIO_SPACE_ID" )
204+ bucket_id = bucket_id or os .environ .get ("TRACKIO_BUCKET_ID" )
197205 if space_id is None and dataset_id is not None :
198206 raise ValueError ("Must provide a `space_id` when `dataset_id` is provided." )
207+ if dataset_id is not None and bucket_id is not None :
208+ raise ValueError ("Cannot provide both `dataset_id` and `bucket_id`." )
199209 try :
200- space_id , dataset_id = utils .preprocess_space_and_dataset_ids (
201- space_id , dataset_id
210+ space_id , dataset_id , bucket_id = utils .preprocess_space_and_dataset_ids (
211+ space_id , dataset_id , bucket_id
202212 )
203213 except LocalTokenNotFoundError as e :
204214 raise LocalTokenNotFoundError (
@@ -221,7 +231,13 @@ def init(
221231 ):
222232 print (f"* Trackio project initialized: { project } " )
223233
224- if dataset_id is not None :
234+ if bucket_id is not None :
235+ os .environ ["TRACKIO_BUCKET_ID" ] = bucket_id
236+ bucket_url = f"https://huggingface.co/buckets/{ bucket_id } "
237+ print (
238+ f"* Trackio metrics will be synced to Hugging Face Bucket: { bucket_url } "
239+ )
240+ elif dataset_id is not None :
225241 os .environ ["TRACKIO_DATASET_ID" ] = dataset_id
226242 print (
227243 f"* Trackio metrics will be synced to Hugging Face Dataset: { dataset_id } "
@@ -233,13 +249,19 @@ def init(
233249 utils .print_dashboard_instructions (project )
234250 else :
235251 deploy .create_space_if_not_exists (
236- space_id , space_storage , dataset_id , private
252+ space_id ,
253+ space_storage ,
254+ dataset_id ,
255+ bucket_id ,
256+ private ,
237257 )
238258 user_name , space_name = space_id .split ("/" )
239259 space_url = deploy .SPACE_HOST_URL .format (
240260 user_name = user_name , space_name = space_name
241261 )
242- print (f"* View dashboard by going to: { space_url } " )
262+ print (
263+ f"* View dashboard by going to: { deploy ._BOLD_ORANGE } { space_url } { deploy ._RESET } "
264+ )
243265 if utils .is_in_notebook () and embed :
244266 utils .embed_url_in_notebook (space_url )
245267 context_vars .current_project .set (project )
@@ -268,10 +290,15 @@ def init(
268290 nvidia_available = gpu_available ()
269291 apple_available = apple_gpu_available ()
270292 auto_log_gpu = nvidia_available or apple_available
271- if nvidia_available :
272- print ("* NVIDIA GPU detected, enabling automatic GPU metrics logging" )
273- elif apple_available :
274- print ("* Apple Silicon detected, enabling automatic system metrics logging" )
293+ if project not in _projects_notified_auto_log_hw :
294+ if nvidia_available :
295+ print ("* NVIDIA GPU detected, enabling automatic GPU metrics logging" )
296+ elif apple_available :
297+ print (
298+ "* Apple Silicon detected, enabling automatic system metrics logging"
299+ )
300+ if nvidia_available or apple_available :
301+ _projects_notified_auto_log_hw .add (project )
275302
276303 run = Run (
277304 url = url ,
0 commit comments