66import subprocess
77import sys
88import tempfile
9+ import threading
910import time
1011from concurrent .futures import ThreadPoolExecutor , as_completed
1112from dataclasses import dataclass , field
1617
1718import trackio
1819
20+ if hasattr (sys .stdout , "reconfigure" ):
21+ try :
22+ sys .stdout .reconfigure (line_buffering = True )
23+ except Exception :
24+ pass
25+
26+ _print_lock = threading .Lock ()
27+
28+
29+ def _progress (message : str ) -> None :
30+ with _print_lock :
31+ print (message , flush = True )
32+
33+
1934SPACE_HEAVY_KEYWORDS = (
2035 "deploy" ,
2136 "sync-static-space" ,
3146 "fractal-evolution" ,
3247)
3348
49+ REQUIRES_SECRET_ENV_KEYWORDS = ("slack-webhook" ,)
50+
3451BENIGN_CONSOLE_ERROR_SNIPPETS = (
3552 "favicon.ico" ,
3653 "Failed to load resource: the server responded with a status of 404" ,
3754)
3855
39- TRACKIO_PATH_IN_TRACEBACK = re .compile (r'File "[^"]*trackio[/\\]' , re .IGNORECASE )
56+ ANSI_ESCAPE = re .compile (r"\x1b\[[0-9;]*m" )
57+
58+ TRACKIO_LIB_FRAME = re .compile (
59+ r'File "[^"]*(?:[/\\]trackio[/\\]trackio[/\\]|site-packages[/\\]trackio[/\\])' ,
60+ re .IGNORECASE ,
61+ )
4062
4163
4264@dataclass
@@ -85,20 +107,23 @@ def scan_logs_for_trackio_issues(
85107) -> list [TrackioIssue ]:
86108 issues : list [TrackioIssue ] = []
87109 combined = f"{ stdout } \n { stderr } "
88- if "Traceback (most recent call last):" in combined and (
89- "trackio" in combined . lower () or TRACKIO_PATH_IN_TRACEBACK . search ( combined )
110+ if "Traceback (most recent call last):" in combined and TRACKIO_LIB_FRAME . search (
111+ combined
90112 ):
91113 issues .append (
92114 TrackioIssue (
93115 kind = "python_traceback" ,
94- detail = "Traceback involving Trackio appears in example output" ,
116+ detail = "Traceback with frames under the trackio package appears in output" ,
95117 example = example_name ,
96118 artifact_path = str (stderr_path if stderr .strip () else stdout_path ),
97119 )
98120 )
99121 for line in combined .splitlines ():
100- lower = line .lower ()
101- if "trackio" not in lower and "/trackio/" not in line :
122+ plain = ANSI_ESCAPE .sub ("" , line )
123+ lower = plain .lower ()
124+ if re .search (r"\[trackio\s+(info|warn|error)\]" , plain , re .IGNORECASE ):
125+ continue
126+ if "trackio" not in lower and "/trackio/" not in plain .lower ():
102127 continue
103128 if any (
104129 w in lower
@@ -153,7 +178,12 @@ def _normalize_list_response(value: object, key: str) -> list[str]:
153178 return []
154179
155180
156- def _example_candidates (examples_dir : Path , include_spaces : bool ) -> list [Path ]:
181+ def _example_candidates (
182+ examples_dir : Path ,
183+ include_spaces : bool ,
184+ include_extra_deps : bool ,
185+ include_secret_env : bool ,
186+ ) -> list [Path ]:
157187 paths = sorted (examples_dir .glob ("*.py" ))
158188 if include_spaces :
159189 return paths
@@ -162,7 +192,13 @@ def _example_candidates(examples_dir: Path, include_spaces: bool) -> list[Path]:
162192 name = path .name .lower ()
163193 if any (keyword in name for keyword in SPACE_HEAVY_KEYWORDS ):
164194 continue
165- if any (keyword in name for keyword in EXTRA_DEPS_KEYWORDS ):
195+ if not include_extra_deps and any (
196+ keyword in name for keyword in EXTRA_DEPS_KEYWORDS
197+ ):
198+ continue
199+ if not include_secret_env and any (
200+ keyword in name for keyword in REQUIRES_SECRET_ENV_KEYWORDS
201+ ):
166202 continue
167203 filtered .append (path )
168204 return filtered
@@ -181,6 +217,8 @@ def run_example_and_collect_projects(
181217 artifacts_dir : Path ,
182218 space : str | None ,
183219) -> ExampleOutcome :
220+ env = {** env }
221+ env .setdefault ("PYTHONUNBUFFERED" , "1" )
184222 list_args = ["list" , "projects" ]
185223 if space :
186224 list_args .extend (["--space" , space ])
@@ -199,7 +237,7 @@ def run_example_and_collect_projects(
199237 [sys .executable , str (example_path )],
200238 env = env ,
201239 timeout_s = timeout_s ,
202- cwd = repo_root ,
240+ cwd = example_path . parent ,
203241 )
204242 except subprocess .TimeoutExpired as e :
205243 return ExampleOutcome (
@@ -504,7 +542,12 @@ def execute_examples(
504542 shared .mkdir (parents = True , exist_ok = True )
505543 env = dict (os .environ )
506544 env ["TRACKIO_DIR" ] = str (shared )
545+ env .setdefault ("PYTHONUNBUFFERED" , "1" )
546+ n = len (selected )
507547 for idx , example in enumerate (selected ):
548+ _progress (
549+ f" Example [{ idx + 1 } /{ n } ]: starting subprocess { example .name } ..."
550+ )
508551 sublog = logs_root / f"{ idx :02d} _{ _ensure_serializable_name (example .stem )} "
509552 sublog .mkdir (parents = True , exist_ok = True )
510553 out = run_example_and_collect_projects (
@@ -517,19 +560,30 @@ def execute_examples(
517560 space = space ,
518561 )
519562 outcomes .append (out )
563+ _progress (
564+ f" Example [{ idx + 1 } /{ n } ]: finished { example .name } (ok={ out .ok } )"
565+ )
520566 return outcomes
521567
522568 sandboxes_root = artifacts_root / "sandboxes"
523569 sandboxes_root .mkdir (exist_ok = True )
524570
571+ max_workers = min (jobs , len (selected ))
572+ _progress (
573+ f"Running { len (selected )} example subprocess(es) in parallel "
574+ f"(up to { max_workers } at a time; each may run for minutes) ..."
575+ )
576+
525577 def run_idx (idx : int , example : Path ) -> ExampleOutcome :
578+ _progress (f" Subprocess started: [{ idx } ] { example .name } " )
526579 try :
527580 trackio_dir = (
528581 sandboxes_root / f"{ idx :02d} _{ _ensure_serializable_name (example .stem )} "
529582 )
530583 trackio_dir .mkdir (parents = True , exist_ok = True )
531584 env = dict (os .environ )
532585 env ["TRACKIO_DIR" ] = str (trackio_dir )
586+ env .setdefault ("PYTHONUNBUFFERED" , "1" )
533587 sublog = logs_root / f"{ idx :02d} _{ _ensure_serializable_name (example .stem )} "
534588 sublog .mkdir (parents = True , exist_ok = True )
535589 return run_example_and_collect_projects (
@@ -555,13 +609,16 @@ def run_idx(idx: int, example: Path) -> ExampleOutcome:
555609 error_message = repr (e ),
556610 )
557611
558- max_workers = min (jobs , len (selected ))
559612 slot_outcomes : list [ExampleOutcome | None ] = [None ] * len (selected )
560613 with ThreadPoolExecutor (max_workers = max_workers ) as pool :
561614 futures = {pool .submit (run_idx , i , ex ): i for i , ex in enumerate (selected )}
562615 for fut in as_completed (futures ):
563616 slot = futures [fut ]
564- slot_outcomes [slot ] = fut .result ()
617+ done = fut .result ()
618+ slot_outcomes [slot ] = done
619+ _progress (
620+ f" Subprocess finished: [{ slot } ] { selected [slot ].name } (ok={ done .ok } )"
621+ )
565622 return [o for o in slot_outcomes if o is not None ]
566623
567624
@@ -578,19 +635,19 @@ def dedupe_issues(issues: list[TrackioIssue]) -> list[TrackioIssue]:
578635
579636
580637def print_trackio_library_report (issues : list [TrackioIssue ]) -> None :
581- print ("\n === Trackio library health report ===" )
582- print (
638+ _progress ("\n === Trackio library health report ===" )
639+ _progress (
583640 "Focus: signals that suggest Trackio (library, CLI, or dashboard) regressions, "
584641 "not whether a particular example script is perfect."
585642 )
586643 if not issues :
587- print ("No Trackio-related problems were collected in this run." )
644+ _progress ("No Trackio-related problems were collected in this run." )
588645 return
589- print (f"Collected { len (issues )} item(s) to review:\n " )
646+ _progress (f"Collected { len (issues )} item(s) to review:\n " )
590647 for i , issue in enumerate (issues , 1 ):
591648 ex = f" [{ issue .example } ]" if issue .example else ""
592649 art = f"\n log: { issue .artifact_path } " if issue .artifact_path else ""
593- print (f"{ i } . ({ issue .kind } ){ ex } \n { issue .detail } { art } \n " )
650+ _progress (f"{ i } . ({ issue .kind } ){ ex } \n { issue .detail } { art } \n " )
594651
595652
596653def main () -> int :
@@ -611,6 +668,23 @@ def main() -> int:
611668 action = "store_true" ,
612669 help = "Include examples likely to require Spaces credentials/network." ,
613670 )
671+ parser .add_argument (
672+ "--include-extra-deps-examples" ,
673+ action = "store_true" ,
674+ help = (
675+ "Include examples that need optional third-party packages "
676+ "(e.g. transformers, datasets). Excluded by default so local health checks "
677+ "do not fail on missing optional deps."
678+ ),
679+ )
680+ parser .add_argument (
681+ "--include-secret-env-examples" ,
682+ action = "store_true" ,
683+ help = (
684+ "Include examples that require secret env vars (e.g. SLACK_WEBHOOK_URL). "
685+ "Excluded by default."
686+ ),
687+ )
614688 parser .add_argument (
615689 "--space" ,
616690 default = None ,
@@ -643,7 +717,12 @@ def main() -> int:
643717
644718 repo_root = Path (__file__ ).resolve ().parent .parent
645719 examples_dir = repo_root / "examples"
646- candidates = _example_candidates (examples_dir , args .include_space_examples )
720+ candidates = _example_candidates (
721+ examples_dir ,
722+ args .include_space_examples ,
723+ args .include_extra_deps_examples ,
724+ args .include_secret_env_examples ,
725+ )
647726 if not candidates :
648727 raise RuntimeError ("No example candidates available with current filters." )
649728
@@ -662,14 +741,18 @@ def main() -> int:
662741 screenshots_dir = artifacts_root / "screenshots"
663742 screenshots_dir .mkdir (exist_ok = True )
664743
665- print (f"Artifacts: { artifacts_root } " )
666- print (
744+ _progress (f"Artifacts: { artifacts_root } " )
745+ _progress (
667746 f"Execution: { 'parallel' if args .jobs > 1 else 'sequential' } (--jobs { args .jobs } )"
668747 )
669- print (f"Selected examples ({ sample_count } ):" )
748+ _progress (f"Selected examples ({ sample_count } ):" )
670749 for example in selected :
671- print (f" - { example .name } " )
750+ _progress (f" - { example .name } " )
672751
752+ _progress (
753+ "\n Running example scripts (no further output until a subprocess finishes; "
754+ "slow scripts with training loops can take several minutes) ..."
755+ )
673756 start = time .time ()
674757 outcomes = execute_examples (
675758 selected ,
@@ -689,7 +772,7 @@ def main() -> int:
689772
690773 if failed :
691774 for o in failed :
692- print (f"\n Example run failed: { o .error_message or 'unknown' } " )
775+ _progress (f"\n Example run failed: { o .error_message or 'unknown' } " )
693776 if not args .continue_on_failure :
694777 collected_issues = dedupe_issues (collected_issues )
695778 print_trackio_library_report (collected_issues )
@@ -718,27 +801,31 @@ def main() -> int:
718801 print_trackio_library_report (collected_issues )
719802 return 1
720803
721- print ("\n Validating with Trackio CLI..." )
804+ _progress ("\n Validating with Trackio CLI ..." )
722805 cli_issues = validate_cli_for_results (ok_results , repo_root , args .space )
723806 collected_issues .extend (cli_issues )
724807 if cli_issues :
725- print (
808+ _progress (
726809 f" CLI reported { len (cli_issues )} problem(s); see Trackio library report."
727810 )
728811 else :
729- print (" CLI checks passed" )
812+ _progress (" CLI checks passed" )
730813
731- print ("\n Validating UI with Playwright..." )
814+ _progress (
815+ "\n Validating UI with Playwright (Chromium + local dashboard; often 30–120s) ..."
816+ )
732817 ui_issues = validate_ui (
733818 ok_results ,
734819 screenshots_dir = screenshots_dir ,
735820 browser_timeout_ms = args .browser_timeout_ms ,
736821 )
737822 collected_issues .extend (ui_issues )
738823 if ui_issues :
739- print (f" UI reported { len (ui_issues )} problem(s); see Trackio library report." )
824+ _progress (
825+ f" UI reported { len (ui_issues )} problem(s); see Trackio library report."
826+ )
740827 else :
741- print (" UI checks passed" )
828+ _progress (" UI checks passed" )
742829
743830 collected_issues = dedupe_issues (collected_issues )
744831 print_trackio_library_report (collected_issues )
@@ -765,12 +852,12 @@ def main() -> int:
765852 summary_path = artifacts_root / "summary.json"
766853 summary_path .write_text (json .dumps (summary , indent = 2 ))
767854
768- print (f"\n Summary written to: { summary_path } " )
769- print (f"Screenshots: { screenshots_dir } " )
855+ _progress (f"\n Summary written to: { summary_path } " )
856+ _progress (f"Screenshots: { screenshots_dir } " )
770857
771858 if failed or collected_issues :
772859 return 1
773- print ("\n Trackio library check completed with no collected issues." )
860+ _progress ("\n Trackio library check completed with no collected issues." )
774861 return 0
775862
776863
0 commit comments