format

Saba9 · Saba9 · commit 34791b413aae · 2026-05-04T11:14:00.000-07:00
diff --git a/autonomous-experiments/test_harness/agent_runner.py b/autonomous-experiments/test_harness/agent_runner.py
@@ -87,18 +87,35 @@ def experiment_failure_recovery(project):
         error_alerts = [a for a in alerts if a.get("level") == "error"]
 
         if returncode != 0 or error_alerts:
-            error_msg = error_alerts[0]["title"] if error_alerts else "non-zero exit code"
+            error_msg = (
+                error_alerts[0]["title"] if error_alerts else "non-zero exit code"
+            )
             print(f"  [AGENT] Attempt {attempt} failed: {error_msg}")
             lr *= 0.1
             print(f"  [AGENT] Reducing LR to {lr}")
             attempts.append({"run": run_name, "status": "failed", "lr": lr * 10})
         else:
             result = run_cli(
-                ["get", "metric", "--project", project, "--run", run_name, "--metric", "val/loss"]
+                [
+                    "get",
+                    "metric",
+                    "--project",
+                    project,
+                    "--run",
+                    run_name,
+                    "--metric",
+                    "val/loss",
+                ]
+            )
+            val_loss = (
+                result["values"][-1]["value"]
+                if result and result.get("values")
+                else None
             )
-            val_loss = result["values"][-1]["value"] if result and result.get("values") else None
             print(f"  [AGENT] Attempt {attempt} succeeded! val_loss={val_loss}")
-            attempts.append({"run": run_name, "status": "success", "val_loss": val_loss})
+            attempts.append(
+                {"run": run_name, "status": "success", "val_loss": val_loss}
+            )
             break
 
     print("\n[AGENT] Recovery history:")
@@ -136,7 +153,9 @@ def experiment_long_monitoring(project):
     ]
 
     print("  [AGENT] Starting long training run in background...")
-    proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
+    proc = subprocess.Popen(
+        cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
+    )
 
     all_alerts = []
 
@@ -147,7 +166,9 @@ def experiment_long_monitoring(project):
         new_alerts = [a for a in alerts if a not in all_alerts]
         if new_alerts:
             for alert in new_alerts:
-                print(f"  [AGENT] New alert: [{alert.get('level', '?')}] {alert.get('title', '?')}")
+                print(
+                    f"  [AGENT] New alert: [{alert.get('level', '?')}] {alert.get('title', '?')}"
+                )
                 all_alerts.append(alert)
             since = datetime.now(timezone.utc).isoformat()
 
diff --git a/tests/e2e-local/test_cli_agent_commands.py b/tests/e2e-local/test_cli_agent_commands.py
@@ -70,7 +70,16 @@ def test_best(temp_dir):
         assert {"value", "step", "config", "run"} <= entry.keys()
 
     r2 = _cli(
-        ["best", "--project", PROJECT, "--metric", "accuracy", "--direction", "max", "--json"],
+        [
+            "best",
+            "--project",
+            PROJECT,
+            "--metric",
+            "accuracy",
+            "--direction",
+            "max",
+            "--json",
+        ],
         temp_dir,
     )
     assert r2.returncode == 0
@@ -80,15 +89,24 @@ def test_best(temp_dir):
 def test_best_finished_filter(temp_dir):
     _seed(temp_dir)
     r = _cli(
-        ["best", "--project", FILTER_PROJECT, "--metric", "val/loss", "--json"], temp_dir
+        ["best", "--project", FILTER_PROJECT, "--metric", "val/loss", "--json"],
+        temp_dir,
     )
     assert r.returncode == 0
     run_names = [e["run"] for e in json.loads(r.stdout)["ranking"]]
     assert "still-running" not in run_names
     assert len(run_names) == 2
 
     r2 = _cli(
-        ["best", "--project", FILTER_PROJECT, "--metric", "val/loss", "--include-all", "--json"],
+        [
+            "best",
+            "--project",
+            FILTER_PROJECT,
+            "--metric",
+            "val/loss",
+            "--include-all",
+            "--json",
+        ],
         temp_dir,
     )
     assert r2.returncode == 0
@@ -110,7 +128,16 @@ def test_compare(temp_dir):
         assert {"val/loss", "accuracy"} <= run_entry["metrics"].keys()
 
     r2 = _cli(
-        ["compare", "--project", PROJECT, "--runs", "run-lr0.01,run-lr0.1", "--metrics", "val/loss", "--json"],
+        [
+            "compare",
+            "--project",
+            PROJECT,
+            "--runs",
+            "run-lr0.01,run-lr0.1",
+            "--metrics",
+            "val/loss",
+            "--json",
+        ],
         temp_dir,
     )
     assert r2.returncode == 0
@@ -120,15 +147,24 @@ def test_compare(temp_dir):
 def test_compare_finished_filter(temp_dir):
     _seed(temp_dir)
     r = _cli(
-        ["compare", "--project", FILTER_PROJECT, "--metrics", "val/loss", "--json"], temp_dir
+        ["compare", "--project", FILTER_PROJECT, "--metrics", "val/loss", "--json"],
+        temp_dir,
     )
     assert r.returncode == 0
     run_names = [e["run"] for e in json.loads(r.stdout)["runs"]]
     assert "still-running" not in run_names
     assert len(run_names) == 2
 
     r2 = _cli(
-        ["compare", "--project", FILTER_PROJECT, "--metrics", "val/loss", "--include-all", "--json"],
+        [
+            "compare",
+            "--project",
+            FILTER_PROJECT,
+            "--metrics",
+            "val/loss",
+            "--include-all",
+            "--json",
+        ],
         temp_dir,
     )
     assert r2.returncode == 0
@@ -147,10 +183,28 @@ def test_summary(temp_dir):
     assert data["num_runs"] == 3
     assert data["total_alerts"] >= 1
     for run_entry in data["runs"]:
-        assert {"run", "status", "last_step", "num_logs", "config", "metric_value"} <= run_entry.keys()
+        assert {
+            "run",
+            "status",
+            "last_step",
+            "num_logs",
+            "config",
+            "metric_value",
+        } <= run_entry.keys()
 
 
 def test_best_error_cases(temp_dir):
     _seed(temp_dir)
-    assert _cli(["best", "--project", "nope", "--metric", "loss", "--json"], temp_dir).returncode != 0
-    assert _cli(["best", "--project", PROJECT, "--metric", "nonexistent", "--json"], temp_dir).returncode != 0
+    assert (
+        _cli(
+            ["best", "--project", "nope", "--metric", "loss", "--json"], temp_dir
+        ).returncode
+        != 0
+    )
+    assert (
+        _cli(
+            ["best", "--project", PROJECT, "--metric", "nonexistent", "--json"],
+            temp_dir,
+        ).returncode
+        != 0
+    )