Fix deployment to Spaces (#146)

abidlabs · web-flow · commit 8d98b3fd7c60 · 2025-08-19T14:38:52.000-04:00
* Fix deployment to Spaces

* revert changes

* revert more

* revert

* simplify

* final
diff --git a/.gitignore b/.gitignore
@@ -4,3 +4,4 @@ tests/__pycache__/
 .trackio/
 trackio.db
 *.pyc
+.venv/
diff --git a/examples/deploy-on-spaces.py b/examples/deploy-on-spaces.py
@@ -1,58 +1,87 @@
+import math
 import random
 import time
 
-from tqdm import tqdm
-
 import trackio as wandb
 
-project_id = random.randint(10000, 99999)
-
-wandb.init(
-    project=f"fake-training-{project_id}",
-    name="test-run",
-    config=dict(
-        epochs=5,
-        learning_rate=0.001,
-        batch_size=32,
-    ),
-    space_id=f"trackio-{project_id}",
-)
-
-EPOCHS = 5
-NUM_TRAIN_BATCHES = 100
-NUM_VAL_BATCHES = 20
-
-for epoch in range(EPOCHS):
-    train_loss = 0
-    train_accuracy = 0
-    val_loss = 0
-    val_accuracy = 0
-
-    for _ in tqdm(range(NUM_TRAIN_BATCHES), desc=f"Epoch {epoch + 1} - Training"):
-        loss = random.uniform(0.2, 1.0)
-        accuracy = random.uniform(0.6, 0.95)
-        train_loss += loss
-        train_accuracy += accuracy
-
-    for _ in tqdm(range(NUM_VAL_BATCHES), desc=f"Epoch {epoch + 1} - Validation"):
-        loss = random.uniform(0.2, 0.9)
-        accuracy = random.uniform(0.65, 0.98)
-        val_loss += loss
-        val_accuracy += accuracy
-
-    train_loss /= NUM_TRAIN_BATCHES
-    train_accuracy /= NUM_TRAIN_BATCHES
-    val_loss /= NUM_VAL_BATCHES
-    val_accuracy /= NUM_VAL_BATCHES
-
-    wandb.log(
-        {
-            "train_loss": train_loss,
-            "train_accuracy": train_accuracy,
-            "val_loss": val_loss,
-            "val_accuracy": val_accuracy,
-        }
+EPOCHS = 20
+PROJECT_ID = random.randint(100000, 999999)
+
+
+def generate_loss_curve(epoch, max_epochs, base_loss=2.5, min_loss=0.1):
+    """Generate a realistic loss curve that decreases over time with noise"""
+    progress = epoch / max_epochs
+    base_curve = base_loss * math.exp(-3 * progress) + min_loss
+
+    noise_scale = 0.3 * (1 - progress * 0.7)
+    noise = random.gauss(0, noise_scale)
+
+    return max(min_loss * 0.5, base_curve + noise)
+
+
+def generate_accuracy_curve(epoch, max_epochs, max_acc=0.95, min_acc=0.1):
+    """Generate a realistic accuracy curve that increases over time with noise"""
+    progress = epoch / max_epochs
+    base_curve = max_acc / (1 + math.exp(-6 * (progress - 0.5))) + min_acc
+
+    noise_scale = 0.08 * (1 - progress * 0.5)
+    noise = random.gauss(0, noise_scale)
+
+    return max(0, min(max_acc, base_curve + noise))
+
+
+for run in range(3):
+    wandb.init(
+        project=f"deploy-on-spaces-{PROJECT_ID}",
+        name=f"test-run-{run}",
+        config=dict(
+            epochs=EPOCHS,
+            learning_rate=0.001,
+            batch_size=32,
+        ),
+        space_id=f"trackio-on-spaces-{PROJECT_ID}",
     )
-    time.sleep(1)
+
+    for epoch in range(EPOCHS):
+        train_loss = generate_loss_curve(
+            epoch,
+            EPOCHS,
+            base_loss=random.uniform(2.5, 3.5),
+            min_loss=random.uniform(0.05, 0.15),
+        )
+        val_loss = generate_loss_curve(
+            epoch,
+            EPOCHS,
+            base_loss=random.uniform(2.5, 3.5),
+            min_loss=random.uniform(0.05, 0.15),
+        )
+
+        train_accuracy = generate_accuracy_curve(
+            epoch,
+            EPOCHS,
+            max_acc=random.uniform(0.7, 0.9),
+            min_acc=random.uniform(0.1, 0.3),
+        )
+        val_accuracy = generate_accuracy_curve(
+            epoch,
+            EPOCHS,
+            max_acc=random.uniform(0.7, 0.9),
+            min_acc=random.uniform(0.1, 0.3),
+        )
+
+        if epoch > 2 and random.random() < 0.3:
+            val_loss *= 1.1
+            val_accuracy *= 0.95
+
+        wandb.log(
+            {
+                "train_loss": round(train_loss, 4),
+                "train_accuracy": round(train_accuracy, 4),
+                "val_loss": round(val_loss, 4),
+                "val_accuracy": round(val_accuracy, 4),
+            }
+        )
+
+        time.sleep(0.2)
 
 wandb.finish()
diff --git a/examples/fake-training.py b/examples/fake-training.py
@@ -81,6 +81,6 @@ def generate_accuracy_curve(epoch, max_epochs, max_acc=0.95, min_acc=0.1):
             }
         )
 
-        time.sleep(0.5)
+        time.sleep(0.2)
 
-    wandb.finish()
+wandb.finish()
diff --git a/trackio/run.py b/trackio/run.py
@@ -8,6 +8,8 @@
 from trackio.typehints import LogEntry
 from trackio.utils import RESERVED_KEYS, fibo, generate_readable_name
 
+BATCH_SEND_INTERVAL = 0.5
+
 
 class Run:
     def __init__(
@@ -33,15 +35,17 @@ def __init__(
         self._client_thread.start()
 
     def _batch_sender(self):
-        """Send batched logs every 500ms."""
-        while not self._stop_flag.is_set():
-            time.sleep(0.5)
+        """Send batched logs every BATCH_SEND_INTERVAL."""
+        while not self._stop_flag.is_set() or len(self._queued_logs) > 0:
+            # If the stop flag has been set, then just quickly send all
+            # the logs and exit.
+            if not self._stop_flag.is_set():
+                time.sleep(BATCH_SEND_INTERVAL)
 
             with self._client_lock:
                 if self._queued_logs and self._client is not None:
                     logs_to_send = self._queued_logs.copy()
                     self._queued_logs.clear()
-
                     self._client.predict(
                         api_name="/bulk_log",
                         logs=logs_to_send,
@@ -54,6 +58,7 @@ def _init_client_background(self):
             for sleep_coefficient in fib:
                 try:
                     client = Client(self.url, verbose=False)
+
                     with self._client_lock:
                         self._client = client
                     break
@@ -85,16 +90,9 @@ def finish(self):
         """Cleanup when run is finished."""
         self._stop_flag.set()
 
-        with self._client_lock:
-            if self._queued_logs and self._client is not None:
-                logs_to_send = self._queued_logs.copy()
-                self._queued_logs.clear()
-                self._client.predict(
-                    api_name="/bulk_log",
-                    logs=logs_to_send,
-                    hf_token=huggingface_hub.utils.get_token(),
-                )
+        # Wait for the batch sender to finish before joining the client thread.
+        time.sleep(2 * BATCH_SEND_INTERVAL)
 
         if self._client_thread is not None:
             print(f"* Uploading logs to Trackio Space: {self.url} (please wait...)")
-            self._client_thread.join(timeout=30)
+            self._client_thread.join()
diff --git a/trackio/version.txt b/trackio/version.txt
@@ -1 +1 @@
-0.2.5
+0.2.6

Original file line number	Diff line number	Diff line change
`@@ -81,6 +81,6 @@ def generate_accuracy_curve(epoch, max_epochs, max_acc=0.95, min_acc=0.1):`
`81`	`81`	`}`
`82`	`82`	`)`
`83`	`83`
`84`		`- time.sleep(0.5)`
	`84`	`+ time.sleep(0.2)`
`85`	`85`
`86`		`- wandb.finish()`
	`86`	`+wandb.finish()`