@@ -18,12 +18,20 @@ export type SSEEventType = (typeof SSE_EVENT_TYPES)[number];
1818interface SSEDebugGlobals {
1919 __galaxy_sse_connected ?: boolean ;
2020 __galaxy_sse_last_event_ts ?: number ;
21+ __galaxy_sse_reconnect_attempts ?: number ;
2122}
2223
2324function sseGlobals ( ) : SSEDebugGlobals {
2425 return window as unknown as SSEDebugGlobals ;
2526}
2627
28+ // Full-jitter exponential backoff bounds for managed reconnect. Aligned with
29+ // the retry budget shape used by the polling paths (see
30+ // ``isRetryableApiError`` in ``client/src/utils/simple-error.ts``); 30 s caps
31+ // the delay during sustained 429/5xx so the client doesn't drift to minutes.
32+ const RECONNECT_BASE_MS = 1000 ;
33+ const RECONNECT_CAP_MS = 30_000 ;
34+
2735// ---------------------------------------------------------------------------
2836// Module-level shared EventSource.
2937//
@@ -48,6 +56,15 @@ const subscribers: Map<SSEEventType, Set<Handler>> = new Map();
4856// exact same listeners (``addEventListener`` matches by reference).
4957const dispatchers : Map < SSEEventType , Handler > = new Map ( ) ;
5058
59+ // Managed-reconnect state. We take over from the browser's native auto-retry
60+ // once it flags ``readyState === CLOSED`` so that responses lacking a
61+ // ``text/event-stream`` content type (a 429 / 5xx page, an HTML error page
62+ // from a load balancer, etc.) don't strand the client on the polling
63+ // fallback. ``reconnectAttempts`` is the input to the backoff formula and is
64+ // reset to zero on every successful ``onopen``.
65+ let reconnectAttempts = 0 ;
66+ let reconnectTimer : ReturnType < typeof setTimeout > | null = null ;
67+
5168function openSourceIfNeeded ( ) {
5269 if ( sharedSource ) {
5370 return ;
@@ -78,6 +95,14 @@ function openSourceIfNeeded() {
7895 // Global readiness flag so Selenium tests can distinguish a working
7996 // SSE pipeline from the polling fallback.
8097 sseGlobals ( ) . __galaxy_sse_connected = true ;
98+ // The connection is healthy again — drop any pending managed reopen
99+ // and zero the backoff so the next failure starts at the base delay
100+ // rather than wherever the previous outage left off.
101+ reconnectAttempts = 0 ;
102+ if ( reconnectTimer !== null ) {
103+ clearTimeout ( reconnectTimer ) ;
104+ reconnectTimer = null ;
105+ }
81106 // Re-assert any viewer subscriptions the user accumulated. The server
82107 // doesn't carry app-level subscription state across reconnects (it
83108 // only knows the user from the cookie), so the client owns the source
@@ -86,12 +111,17 @@ function openSourceIfNeeded() {
86111 } ;
87112
88113 sharedSource . onerror = ( ) => {
89- // EventSource auto-reconnects natively; SSE-vs-polling is a
90- // config-level decision (see historyStore / notificationsStore), so
91- // we must not give up on transient errors here — doing so would leave
92- // the client with no updates at all.
93114 sharedConnected . value = false ;
94115 sseGlobals ( ) . __galaxy_sse_connected = false ;
116+ // The browser auto-retries while ``readyState === CONNECTING``; let
117+ // it. Once it flips to ``CLOSED`` (response missing
118+ // ``text/event-stream``, repeated network failure giving up, etc.)
119+ // the native loop is done and we own the reconnect — otherwise the
120+ // client silently drops to polling-only updates for the rest of the
121+ // session.
122+ if ( sharedSource ?. readyState === EventSource . CLOSED ) {
123+ scheduleReconnect ( ) ;
124+ }
95125 } ;
96126
97127 // Browser EventSource teardown during a full-page navigation
@@ -118,11 +148,70 @@ function closeSource() {
118148 sharedSource = null ;
119149 sharedConnected . value = false ;
120150 sseGlobals ( ) . __galaxy_sse_connected = false ;
151+ // Cancel any pending managed reopen — without this, ``pagehide``-driven
152+ // teardown could be followed by ``setTimeout`` re-opening a stream we
153+ // just deliberately closed.
154+ if ( reconnectTimer !== null ) {
155+ clearTimeout ( reconnectTimer ) ;
156+ reconnectTimer = null ;
157+ }
158+ reconnectAttempts = 0 ;
121159 if ( typeof window !== "undefined" ) {
122160 window . removeEventListener ( "pagehide" , closeSource ) ;
123161 }
124162}
125163
164+ /**
165+ * Tear down the EventSource without disturbing the subscriber map so the
166+ * scheduled reopen ends up wired to the same handler set. ``closeSource`` is
167+ * the right tool when *no* listener wants more events; this is the right tool
168+ * when listeners still exist and only the underlying socket needs to cycle.
169+ */
170+ function closeSourceForReconnect ( ) {
171+ if ( ! sharedSource ) {
172+ return ;
173+ }
174+ for ( const [ eventType , dispatcher ] of dispatchers ) {
175+ sharedSource . removeEventListener ( eventType , dispatcher ) ;
176+ }
177+ dispatchers . clear ( ) ;
178+ sharedSource . close ( ) ;
179+ sharedSource = null ;
180+ sharedConnected . value = false ;
181+ sseGlobals ( ) . __galaxy_sse_connected = false ;
182+ }
183+
184+ function scheduleReconnect ( ) {
185+ if ( reconnectTimer !== null ) {
186+ // Already armed; the active timer will handle the next attempt.
187+ return ;
188+ }
189+ // Full-jitter exponential backoff: the random factor in [0.5, 1.5)
190+ // smears retries from clients hitting the same outage so a recovering
191+ // server isn't met with a synchronized stampede.
192+ const exp = Math . min ( RECONNECT_CAP_MS , RECONNECT_BASE_MS * 2 ** reconnectAttempts ) ;
193+ const delay = Math . floor ( exp * ( 0.5 + Math . random ( ) ) ) ;
194+ reconnectAttempts += 1 ;
195+ const globals = sseGlobals ( ) ;
196+ globals . __galaxy_sse_reconnect_attempts = ( globals . __galaxy_sse_reconnect_attempts ?? 0 ) + 1 ;
197+ closeSourceForReconnect ( ) ;
198+ reconnectTimer = setTimeout ( ( ) => {
199+ reconnectTimer = null ;
200+ // Subscribers may have all unsubscribed during the outage; if so, the
201+ // shared source should stay closed.
202+ let hasSubscribers = false ;
203+ for ( const subs of subscribers . values ( ) ) {
204+ if ( subs . size > 0 ) {
205+ hasSubscribers = true ;
206+ break ;
207+ }
208+ }
209+ if ( hasSubscribers ) {
210+ openSourceIfNeeded ( ) ;
211+ }
212+ } , delay ) ;
213+ }
214+
126215function addSubscriber ( onEvent : Handler , eventTypes : readonly SSEEventType [ ] ) {
127216 for ( const eventType of eventTypes ) {
128217 let subs = subscribers . get ( eventType ) ;
@@ -157,10 +246,15 @@ function removeSubscriber(onEvent: Handler, eventTypes: readonly SSEEventType[])
157246/**
158247 * Composable for subscribing to events on the shared SSE stream.
159248 *
160- * The browser's EventSource handles reconnection automatically and sends the
161- * ``Last-Event-ID`` header so the server can catch up on missed events. Only
162- * one EventSource is opened per tab regardless of how many callers invoke
163- * this composable; the composable multiplexes dispatch per event type.
249+ * Reconnection: the browser's native auto-retry handles the cheap path
250+ * (transient network blips while ``readyState === CONNECTING``); once the
251+ * source flips to ``CLOSED`` — typically a 4xx/5xx response with no
252+ * ``text/event-stream`` body, which most browsers treat as fatal — this
253+ * composable takes over with full-jitter exponential backoff capped at 30 s.
254+ * The server emits ``id:`` per event so the ``Last-Event-ID`` header on
255+ * reconnect lets the server catch up on missed events. Only one EventSource
256+ * is opened per tab regardless of how many callers invoke this composable;
257+ * the composable multiplexes dispatch per event type.
164258 *
165259 * @param onEvent - callback invoked for every matching SSE event
166260 * @param eventTypes - subset of event types to listen to (defaults to all)
@@ -291,3 +385,27 @@ export function removeHistoryViewerSubscription(historyId: string): void {
291385export function _resetHistoryViewerSubscriptionsForTest ( ) : void {
292386 viewerSubscriptions . clear ( ) ;
293387}
388+
389+ /** Test-only: tear down the shared source and reconnect state. */
390+ export function _resetSSESharedSourceForTest ( ) : void {
391+ if ( reconnectTimer !== null ) {
392+ clearTimeout ( reconnectTimer ) ;
393+ reconnectTimer = null ;
394+ }
395+ reconnectAttempts = 0 ;
396+ if ( sharedSource ) {
397+ for ( const [ eventType , dispatcher ] of dispatchers ) {
398+ sharedSource . removeEventListener ( eventType , dispatcher ) ;
399+ }
400+ dispatchers . clear ( ) ;
401+ sharedSource . close ( ) ;
402+ sharedSource = null ;
403+ }
404+ subscribers . clear ( ) ;
405+ sharedConnected . value = false ;
406+ sseEverConnected . value = false ;
407+ const globals = sseGlobals ( ) ;
408+ delete globals . __galaxy_sse_connected ;
409+ delete globals . __galaxy_sse_last_event_ts ;
410+ delete globals . __galaxy_sse_reconnect_attempts ;
411+ }
0 commit comments