Skip to content

Commit 0a24638

Browse files
committed
fix(observability): cockpit was silent — schema rejected real events
User report: "uptime ticks but nothing happens, doesn't even listen to the MCP server." Two compounding bugs: 1. **Permissive schema for unknown types.** docs/event-schema.json's genericVariant pinned `type` to a 28-element enum drawn from the Rust event.rs allowlist. The actual orchestrator + plugins emit names that AREN'T in that list — `lifecycle.anchor`, `mcp.tool.call.requested`, `crow.trust.scored`, `pech.ledger.appended`, `hydra.veto.fired`, etc. Schema validator dropped every line silently. Fix: type is `{ type: "string" }` with no enum constraint; well-typed variants in oneOf still strict. 2. **Rust parse_line falls back on unknown discriminator.** event.rs was a strict serde-tagged enum — any `type` value not in the variant list returned a parse error. Added Event::Unknown(GenericPayload) marked #[serde(skip)] so serde never tries to dispatch it directly, and parse_line tries the strict variant first, falls through to GenericPayload-wrapped Unknown on failure. The cockpit now surfaces every event in the events table, regardless of whether we have a typed variant for it. Verified: 7728 bytes of real events flow through stdout from a 30s live.ts run, where before it was 0 bytes.
1 parent a400e2e commit 0a24638

3 files changed

Lines changed: 38 additions & 38 deletions

File tree

docs/event-schema.json

Lines changed: 2 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -25,45 +25,11 @@
2525
"minimum": 0
2626
},
2727
"genericVariant": {
28-
"description": "Loose-shape variants flowing through Rust GenericPayload. Required: type, time. Optional common fields are recognized; unknown fields round-trip via additionalProperties:true.",
28+
"description": "Permissive fallback for any event whose type isn't well-typed in the oneOf below. Required: type (any string), time. Both producer (TS) and consumer (Rust) accept any type string here so wire-format extensions don't require a schema bump in lockstep. The Rust GenericPayload mirrors this — unknown discriminators round-trip via the flattened extras map.",
2929
"type": "object",
3030
"required": ["type", "time"],
3131
"properties": {
32-
"type": {
33-
"type": "string",
34-
"enum": [
35-
"session.started",
36-
"session.opened",
37-
"session.closed",
38-
"session.ended",
39-
"phase.entered",
40-
"phase.completed",
41-
"plugin.loaded",
42-
"plugin.health",
43-
"tool.result",
44-
"tool.error",
45-
"sylph.veto",
46-
"crow.trust",
47-
"djinn.anchor",
48-
"djinn.drift",
49-
"gorgon.hotspot",
50-
"naga.spec_check",
51-
"lich.review",
52-
"emu.context_update",
53-
"task.created",
54-
"task.started",
55-
"task.blocked",
56-
"task.completed",
57-
"task.failed",
58-
"code.generated",
59-
"file.created",
60-
"file.modified",
61-
"test.run",
62-
"test.passed",
63-
"test.failed",
64-
"pr.created"
65-
]
66-
},
32+
"type": { "type": "string" },
6733
"time": { "$ref": "#/definitions/time" },
6834
"session_id": { "type": "string" },
6935
"task_id": { "type": "string" },

inspector/src/event.rs

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,15 @@ pub enum Event {
267267
#[serde(default, skip_serializing_if = "Option::is_none")]
268268
payload: Option<serde_json::Value>,
269269
},
270+
/// Catch-all for events whose `type` discriminator isn't in the explicit
271+
/// list above. The producer (TS bridge) emits many `lifecycle.*`,
272+
/// `mcp.tool.*`, `*.appended`, `*.fired`, `*.scored` event names that the
273+
/// Rust enum hasn't enumerated. Without this, `parse_line` would error and
274+
/// the transport's malformed-line counter would tick on every emit.
275+
/// Populated by `parse_line` via fallback parse to `GenericPayload` —
276+
/// never by direct serde dispatch (hence `#[serde(skip)]`).
277+
#[serde(skip)]
278+
Unknown(GenericPayload),
270279
}
271280

272281
impl Event {
@@ -281,6 +290,8 @@ impl Event {
281290
| Event::CodeModified { time, .. }
282291
| Event::RequestApproval { time, .. } => *time,
283292

293+
Event::Unknown(p) => p.time,
294+
284295
Event::SessionStarted(p)
285296
| Event::SessionOpened(p)
286297
| Event::SessionClosed(p)
@@ -354,6 +365,7 @@ impl Event {
354365
Event::TestFailed(_) => "test.failed",
355366
Event::PrCreated(_) => "pr.created",
356367
Event::RequestApproval { .. } => "request.approval",
368+
Event::Unknown(_) => "unknown",
357369
}
358370
}
359371

@@ -400,6 +412,8 @@ impl Event {
400412
| Event::TestPassed(p)
401413
| Event::TestFailed(p)
402414
| Event::PrCreated(p) => p.plugin.as_deref(),
415+
416+
Event::Unknown(p) => p.plugin.as_deref(),
403417
}
404418
}
405419

@@ -446,6 +460,8 @@ impl Event {
446460
| Event::TestPassed(p)
447461
| Event::TestFailed(p)
448462
| Event::PrCreated(p) => p.session_id.as_deref(),
463+
464+
Event::Unknown(p) => p.session_id.as_deref(),
449465
}
450466
}
451467

@@ -492,6 +508,8 @@ impl Event {
492508
| Event::TestPassed(p)
493509
| Event::TestFailed(p)
494510
| Event::PrCreated(p) => p.task_id.as_deref(),
511+
512+
Event::Unknown(p) => p.task_id.as_deref(),
495513
}
496514
}
497515

@@ -537,15 +555,29 @@ impl Event {
537555
| Event::TestPassed(p)
538556
| Event::TestFailed(p)
539557
| Event::PrCreated(p) => p.severity,
558+
559+
Event::Unknown(p) => p.severity,
540560
}
541561
}
542562
}
543563

544564
/// Tolerant single-line parser. Bubbles up errors (including unknown
545565
/// discriminants) so the caller can log and skip, never panic.
546566
pub fn parse_line(line: &str) -> anyhow::Result<Event> {
547-
let event = serde_json::from_str::<Event>(line)?;
548-
Ok(event)
567+
// Try the strict tagged-enum parse first.
568+
match serde_json::from_str::<Event>(line) {
569+
Ok(event) => Ok(event),
570+
Err(strict_err) => {
571+
// Fall back to GenericPayload for events whose `type` discriminator
572+
// isn't in our explicit variant list. The producer emits many event
573+
// names (`lifecycle.*`, `mcp.tool.*`, `*.appended`, `*.fired`) that
574+
// we don't model individually but still want to surface in the UI.
575+
match serde_json::from_str::<GenericPayload>(line) {
576+
Ok(payload) => Ok(Event::Unknown(payload)),
577+
Err(_) => Err(anyhow::Error::from(strict_err).context("event parse failed")),
578+
}
579+
}
580+
}
549581
}
550582

551583
#[cfg(test)]

inspector/src/views/overview.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -929,6 +929,8 @@ fn event_phase(ev: &Event) -> Option<String> {
929929
| Event::TestFailed(p)
930930
| Event::PrCreated(p) => p.phase.clone(),
931931

932+
Event::Unknown(p) => p.phase.clone(),
933+
932934
Event::RuntimeMetrics { .. } | Event::CodeModified { .. } => None,
933935
}
934936
}

0 commit comments

Comments
 (0)