@@ -97,7 +97,29 @@ def _cleanup_current_run():
9797 pass
9898
9999
100- def _safe_get_runs_for_init (project : str ) -> list [str ]:
100+ def _safe_get_runs_for_init (
101+ project : str ,
102+ space_id : str | None ,
103+ resume : str ,
104+ remote_client : RemoteClient | None = None ,
105+ check_existing_for_never : bool = False ,
106+ ) -> list [str ]:
107+ if space_id is not None :
108+ if resume == "never" and not check_existing_for_never :
109+ return []
110+ try :
111+ client = remote_client or RemoteClient (
112+ space_id ,
113+ hf_token = huggingface_hub .utils .get_token (),
114+ verbose = False ,
115+ )
116+ runs = client .predict (project = project , api_name = "/get_runs_for_project" )
117+ return runs if isinstance (runs , list ) else []
118+ except Exception as e :
119+ _emit_nonfatal_warning (
120+ f"trackio.init() could not inspect existing runs for project '{ project } ' on Space '{ space_id } ': { e } . Continuing without resume metadata."
121+ )
122+ return []
101123 try :
102124 return SQLiteStorage .get_runs (project )
103125 except Exception as e :
@@ -107,6 +129,43 @@ def _safe_get_runs_for_init(project: str) -> list[str]:
107129 return []
108130
109131
132+ def _safe_get_last_step_for_init (
133+ project : str ,
134+ run_name : str ,
135+ space_id : str | None ,
136+ resumed : bool ,
137+ remote_client : RemoteClient | None = None ,
138+ ) -> int | None :
139+ if not resumed :
140+ return None
141+ if space_id is not None :
142+ try :
143+ client = remote_client or RemoteClient (
144+ space_id ,
145+ hf_token = huggingface_hub .utils .get_token (),
146+ verbose = False ,
147+ )
148+ summary = client .predict (
149+ project = project , run = run_name , api_name = "/get_run_summary"
150+ )
151+ if isinstance (summary , dict ):
152+ last_step = summary .get ("last_step" )
153+ return last_step if isinstance (last_step , int ) else None
154+ return None
155+ except Exception as e :
156+ _emit_nonfatal_warning (
157+ f"trackio.init() could not recover the previous step for run '{ run_name } ' on Space '{ space_id } ': { e } . Continuing from step 0."
158+ )
159+ return None
160+ try :
161+ return SQLiteStorage .get_max_step_for_run (project , run_name )
162+ except Exception as e :
163+ _emit_nonfatal_warning (
164+ f"trackio.init() could not recover the previous step for run '{ run_name } ': { e } . Continuing from step 0."
165+ )
166+ return None
167+
168+
110169def init (
111170 project : str ,
112171 name : str | None = None ,
@@ -288,7 +347,26 @@ def init(
288347 )
289348 context_vars .current_project .set (project )
290349
291- existing_runs = _safe_get_runs_for_init (project )
350+ remote_client = None
351+ if space_id is not None :
352+ try :
353+ remote_client = RemoteClient (
354+ space_id ,
355+ hf_token = huggingface_hub .utils .get_token (),
356+ verbose = False ,
357+ )
358+ except Exception as e :
359+ _emit_nonfatal_warning (
360+ f"trackio.init() could not create a Space client for '{ space_id } ': { e } . Continuing with local fallback metadata lookups."
361+ )
362+
363+ existing_runs = _safe_get_runs_for_init (
364+ project ,
365+ space_id ,
366+ resume ,
367+ remote_client = remote_client ,
368+ check_existing_for_never = name is not None ,
369+ )
292370
293371 if resume == "must" :
294372 if name is None :
@@ -310,6 +388,18 @@ def init(
310388 else :
311389 raise ValueError ("resume must be one of: 'must', 'allow', or 'never'" )
312390
391+ initial_last_step = (
392+ _safe_get_last_step_for_init (
393+ project ,
394+ name ,
395+ space_id ,
396+ resumed ,
397+ remote_client = remote_client ,
398+ )
399+ if name is not None
400+ else None
401+ )
402+
313403 if auto_log_gpu is None :
314404 nvidia_available = gpu_available ()
315405 apple_available = apple_gpu_available ()
@@ -332,6 +422,8 @@ def init(
332422 group = group ,
333423 config = config ,
334424 space_id = space_id ,
425+ existing_runs = existing_runs ,
426+ initial_last_step = initial_last_step ,
335427 auto_log_gpu = auto_log_gpu ,
336428 gpu_log_interval = gpu_log_interval ,
337429 webhook_url = webhook_url ,
0 commit comments