gradio-app · abidlabs · Apr 22, 2026 · Apr 18, 2026 · Apr 18, 2026 · Apr 18, 2026
diff --git a/.changeset/easy-apes-hammer.md b/.changeset/easy-apes-hammer.md
@@ -0,0 +1,5 @@
+---
+"trackio": minor
+---
+
+feat:Traces in Trackio
diff --git a/examples/traces/basic-trace.py b/examples/traces/basic-trace.py
@@ -0,0 +1,40 @@
+import random
+
+import trackio
+
+PROJECT_ID = random.randint(100000, 999999)
+PROJECT_NAME = f"trace-demo-basic-{PROJECT_ID}"
+
+examples = [
+    ("What is 2 + 2?", "2 + 2 = 4."),
+    ("What is the capital of Australia?", "The capital of Australia is Canberra."),
+    (
+        "Give me a one-sentence summary of Trackio.",
+        "Trackio is a lightweight experiment tracking dashboard for ML and agent workflows.",
+    ),
+    ("Translate 'hello' to Spanish.", "Hola."),
+]
+
+for run_idx in range(2):
+    trackio.init(project=PROJECT_NAME, name=f"basic-run-{run_idx}")
+
+    for step, (prompt, completion) in enumerate(examples):
+        trackio.log(
+            {
+                "trace": trackio.Trace(
+                    messages=[
+                        {"role": "system", "content": "You are a concise assistant."},
+                        {"role": "user", "content": prompt},
+                        {"role": "assistant", "content": completion},
+                    ],
+                    metadata={
+                        "label": f"basic-demo-{run_idx + 1}",
+                        "category": "basic-example",
+                        "index": step,
+                    },
+                )
+            },
+            step=step,
+        )
+
+    trackio.finish()
diff --git a/examples/traces/complex-trace.py b/examples/traces/complex-trace.py
@@ -0,0 +1,74 @@
+import random
+
+import numpy as np
+
+import trackio
+
+PROJECT_ID = random.randint(100000, 999999)
+PROJECT_NAME = f"trace-demo-complex-{PROJECT_ID}"
+
+
+def make_screenshot(seed: int):
+    rng = np.random.default_rng(seed)
+    return rng.integers(0, 255, size=(240, 320, 3), dtype=np.uint8)
+
+
+for run_idx in range(2):
+    trackio.init(project=PROJECT_NAME, name=f"complex-run-{run_idx}")
+
+    for step in range(4):
+        screenshot = make_screenshot(run_idx * 10 + step)
+        trackio.log(
+            {
+                "agent_trace": trackio.Trace(
+                    messages=[
+                        {"role": "system", "content": "You are a browser agent."},
+                        {
+                            "role": "user",
+                            "content": [
+                                {
+                                    "type": "text",
+                                    "text": f"Inspect page variant {step} and summarize it.",
+                                },
+                                trackio.Image(
+                                    screenshot,
+                                    caption=f"browser screenshot run={run_idx} step={step}",
+                                ),
+                            ],
+                        },
+                        {
+                            "role": "assistant",
+                            "content": "I will inspect the page and call a tool if needed.",
+                            "tool_calls": [
+                                {
+                                    "id": f"call_{run_idx}_{step}",
+                                    "type": "function",
+                                    "function": {
+                                        "name": "extract_title",
+                                        "arguments": '{"selector": "title"}',
+                                    },
+                                }
+                            ],
+                        },
+                        {
+                            "role": "tool",
+                            "content": f'{{"title": "Trackio Demo {run_idx}-{step}"}}',
+                            "tool_call_id": f"call_{run_idx}_{step}",
+                        },
+                        {
+                            "role": "assistant",
+                            "content": f"The page variant {step} appears to be a Trackio demo with a visible screenshot and an extracted title.",
+                        },
+                    ],
+                    metadata={
+                        "label": f"complex-demo-{run_idx}",
+                        "environment": "browser",
+                        "category": "complex-example",
+                        "variant": step,
+                    },
+                )
+            },
+            step=step,
+        )
+
+    trackio.finish()
diff --git a/examples/traces/trl-trace-integration.py b/examples/traces/trl-trace-integration.py
@@ -0,0 +1,162 @@
+# /// script
+# dependencies = [
+#   "trackio",
+#   "trl",
+#   "datasets",
+#   "transformers",
+#   "torch",
+# ]
+# ///
+
+import random
+
+import torch
+from datasets import Dataset
+from transformers import AutoModelForCausalLM, AutoTokenizer, TrainerCallback
+from trl import SFTConfig, SFTTrainer
+
+import trackio
+
+PROJECT_ID = random.randint(100000, 999999)
+PROJECT_NAME = f"trace-demo-trl-{PROJECT_ID}"
+MODEL_NAME = "sshleifer/tiny-gpt2"
+
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+if tokenizer.pad_token is None:
+    tokenizer.pad_token = tokenizer.eos_token
+
+examples = [
+    {"prompt": "What is 2 + 2?", "reference_completion": "2 + 2 = 4."},
+    {
+        "prompt": "What color is the sky on a clear day?",
+        "reference_completion": "The sky is typically blue on a clear day.",
+    },
+    {
+        "prompt": "Translate 'good morning' to French.",
+        "reference_completion": "Bonjour.",
+    },
+    {
+        "prompt": "Name the capital of Japan.",
+        "reference_completion": "Tokyo is the capital of Japan.",
+    },
+    {
+        "prompt": "Give one use of Trackio.",
+        "reference_completion": "Trackio can be used to inspect training logs and traces.",
+    },
+]
+
+
+def format_example(example):
+    return {
+        "text": (
+            "### Instruction:\n"
+            f"{example['prompt']}\n\n"
+            "### Response:\n"
+            f"{example['reference_completion']}"
+        )
+    }
+
+
+dataset = Dataset.from_list([format_example(example) for example in examples * 2])
+
+
+class TraceLoggingCallback(TrainerCallback):
+    def __init__(self, prompt_examples, run_label, tokenizer):
+        self.prompt_examples = prompt_examples
+        self.run_label = run_label
+        self.tokenizer = tokenizer
+
+    def _generate_completion(self, model, prompt):
+        encoded = self.tokenizer(
+            prompt,
+            return_tensors="pt",
+            truncation=True,
+            max_length=64,
+        )
+        encoded = {key: value.to(model.device) for key, value in encoded.items()}
+
+        was_training = model.training
+        model.eval()
+        with torch.no_grad():
+            generated = model.generate(
+                **encoded,
+                max_new_tokens=24,
+                do_sample=False,
+                pad_token_id=self.tokenizer.pad_token_id,
+                eos_token_id=self.tokenizer.eos_token_id,
+            )
+        if was_training:
+            model.train()
+
+        prompt_length = encoded["input_ids"].shape[1]
+        completion_ids = generated[0][prompt_length:]
+        completion = self.tokenizer.decode(completion_ids, skip_special_tokens=True)
+        completion = completion.strip()
+        return completion or "(empty generation)"
+
+    def on_log(self, args, state, control, logs=None, **kwargs):
+        if not logs or state.global_step <= 0:
+            return
+
+        model = kwargs.get("model")
+        if model is None:
+            return
+
+        sample = self.prompt_examples[
+            (state.global_step - 1) % len(self.prompt_examples)
+        ]
+        trackio.log(
+            {
+                "trace": trackio.Trace(
+                    messages=[
+                        {
+                            "role": "system",
+                            "content": "You are a supervised fine-tuning demo model.",
+                        },
+                        {"role": "user", "content": sample["prompt"]},
+                        {
+                            "role": "assistant",
+                            "content": self._generate_completion(
+                                model, sample["prompt"]
+                            ),
+                        },
+                    ],
+                    metadata={
+                        "label": self.run_label,
+                        "trainer": "trl-sft",
+                        "loss": float(logs.get("loss", 0.0)),
+                        "global_step": int(state.global_step),
+                        "reference_completion": sample["reference_completion"],
+                    },
+                )
+            },
+            step=int(state.global_step),
+        )
+
+
+for run_idx in range(2):
+    run_name = f"trl-run-{run_idx}"
+    model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
+
+    trainer = SFTTrainer(
+        model=model,
+        args=SFTConfig(
+            output_dir=f"./trl_trace_output_{PROJECT_ID}_{run_idx}",
+            per_device_train_batch_size=2,
+            max_steps=5,
+            logging_steps=1,
+            save_strategy="no",
+            report_to="trackio",
+            project=PROJECT_NAME,
+            run_name=run_name,
+            trackio_space_id=None,
+            learning_rate=5e-5,
+            dataset_text_field="text",
+            max_length=64,
+        ),
+        train_dataset=dataset,
+        processing_class=tokenizer,
+        callbacks=[TraceLoggingCallback(examples, run_name, tokenizer)],
+    )
+
+    trainer.train()
diff --git a/tests/README.md b/tests/README.md
@@ -2,13 +2,10 @@
 
 This directory contains Python unit tests which can be run by running `pytest test` in the root directory.
 
-This directory consists of 4 kinds of tests, all run with `pytest`:
+This directory consists of 3 kinds of tests, all run with `pytest`:
 
-1. Python unit tests in the `unit` subdirectory: each of the `test_` files in this folder contain unit tests for the corresponding module (e.g. `test_run.py` contains unit tests for `run.py`)
+1. Python tests in the `unit` subdirectory: most are classic unit tests for the corresponding module (e.g. `test_run.py` contains tests for `run.py`), and a few are lightweight local integration tests that still live under `unit`
 
 2. UI tests run via Playright in the `ui` folder: each of the files in this folder contain UI tests that involve launching Trackio in the browser and confirming that the UI elements are present and interact as expected.
 
-3. End-to-end local tests in the `e2e-local` subdirectory: which are also local tests, but test behaviors that include the end-to-end user workflow: `User API  → Gradio UI → SQLite Storage`
-
-4. Finally directory also includes the `e2e-spaces` subdirectory, which deploy Trackio onto Spaces and then confirm that that the data is logged as expected. 
-
+3. Finally directory also includes the `e2e-spaces` subdirectory, which deploy Trackio onto Spaces and then confirm that that the data is logged as expected.
diff --git a/tests/e2e-local/logs.csv b/tests/e2e-local/logs.csv
diff --git a/tests/e2e-local/test_bulk_logging.py b/tests/e2e-local/test_bulk_logging.py