@@ -389,37 +389,61 @@ func (s *MCPService) handleStderr(server *MCPServer) {
389389 close (server .stderrDone )
390390}
391391
392- // monitorServer monitors the server process and restarts it if it crashes
392+ // monitorServer monitors the server process and restarts it if it crashes.
393+ // When the process exits the loop terminates and monitorDone is closed, so a
394+ // subsequent restart can safely wait on it. The restart itself runs in a
395+ // separate goroutine to avoid a self-deadlock on monitorDone.
393396func (s * MCPService ) monitorServer (server * MCPServer ) {
394- for server .monitorActive {
395- err := server .Process .Wait ()
396- if ! server .monitorActive {
397- break
398- }
397+ defer close (server .monitorDone )
399398
400- if err != nil {
401- log .Printf ("ERROR: MCP server '%s' crashed: %v" , server .Name , err )
402- } else {
403- log .Printf ("ERROR: MCP server '%s' exited unexpectedly" , server .Name )
404- }
399+ if ! server .monitorActive {
400+ return
401+ }
402+
403+ err := server .Process .Wait ()
404+ if ! server .monitorActive || s .isShuttingDown () {
405+ return
406+ }
405407
406- time .Sleep (1 * time .Second )
408+ if err != nil {
409+ log .Printf ("ERROR: MCP server '%s' crashed: %v" , server .Name , err )
410+ } else {
411+ log .Printf ("ERROR: MCP server '%s' exited unexpectedly" , server .Name )
412+ }
407413
408- log .Printf ("Restarting MCP server '%s'..." , server .Name )
409- if restartErr := s .restartServer (server ); restartErr != nil {
410- log .Printf ("ERROR: Failed to restart MCP server '%s': %v" , server .Name , restartErr )
411- } else {
412- log .Printf ("Successfully restarted MCP server '%s'" , server .Name )
413- }
414+ go s .scheduleRestart (server )
415+ }
416+
417+ // scheduleRestart is run on a fresh goroutine after monitorServer has exited
418+ // so restartServer can wait on monitorDone without deadlocking on itself.
419+ func (s * MCPService ) scheduleRestart (server * MCPServer ) {
420+ time .Sleep (1 * time .Second )
421+ if s .isShuttingDown () || ! server .monitorActive {
422+ return
414423 }
415- close (server .monitorDone )
424+ log .Printf ("Restarting MCP server '%s'..." , server .Name )
425+ if err := s .restartServer (server ); err != nil {
426+ log .Printf ("ERROR: Failed to restart MCP server '%s': %v" , server .Name , err )
427+ } else {
428+ log .Printf ("Successfully restarted MCP server '%s'" , server .Name )
429+ }
430+ }
431+
432+ func (s * MCPService ) isShuttingDown () bool {
433+ s .Mutex .RLock ()
434+ defer s .Mutex .RUnlock ()
435+ return s .shuttingDown
416436}
417437
418438// restartServer restarts a crashed MCP server
419439func (s * MCPService ) restartServer (server * MCPServer ) error {
420440 s .Mutex .Lock ()
421441 defer s .Mutex .Unlock ()
422442
443+ if s .shuttingDown {
444+ return fmt .Errorf ("service is shutting down" )
445+ }
446+
423447 server .stderrActive = false
424448 server .monitorActive = false
425449
@@ -433,8 +457,8 @@ func (s *MCPService) restartServer(server *MCPServer) error {
433457 server .Stderr .Close ()
434458 }
435459
436- <- server .stderrDone
437- <- server .monitorDone
460+ waitClosed ( server .stderrDone , 2 * time . Second )
461+ waitClosed ( server .monitorDone , 2 * time . Second )
438462
439463 parts := strings .Fields (server .Command )
440464 if len (parts ) == 0 {
@@ -496,15 +520,17 @@ func (s *MCPService) restartServer(server *MCPServer) error {
496520 return nil
497521}
498522
499- // stopServer stops an MCP server
523+ // stopServer stops an MCP server. The monitor goroutine owns Process.Wait(),
524+ // so we kill the process, close the pipes, and wait for the monitor/stderr
525+ // goroutines to drain — we do not Wait() ourselves here to avoid racing with
526+ // the monitor's own call.
500527func (s * MCPService ) stopServer (server * MCPServer ) {
501528 server .stderrActive = false
502529 server .monitorActive = false
503530
504531 if ! server .IsHTTP {
505- if server .Process != nil {
506- server .Process .Process .Kill ()
507- server .Process .Wait ()
532+ if server .Process != nil && server .Process .Process != nil {
533+ _ = server .Process .Process .Kill ()
508534 }
509535 if server .Stdin != nil {
510536 server .Stdin .Close ()
@@ -516,14 +542,28 @@ func (s *MCPService) stopServer(server *MCPServer) {
516542 server .Stderr .Close ()
517543 }
518544
519- <- server .stderrDone
520- <- server .monitorDone
545+ // Drain the monitor/stderr goroutines. Use a generous timeout so a
546+ // misbehaving server can't pin the REPL on shutdown.
547+ waitClosed (server .stderrDone , 2 * time .Second )
548+ waitClosed (server .monitorDone , 2 * time .Second )
549+ }
550+ }
551+
552+ // waitClosed blocks until ch is closed or the timeout fires.
553+ func waitClosed (ch chan struct {}, timeout time.Duration ) {
554+ if ch == nil {
555+ return
556+ }
557+ select {
558+ case <- ch :
559+ case <- time .After (timeout ):
521560 }
522561}
523562
524563// StopAllServers stops all MCP servers
525564func (s * MCPService ) StopAllServers () {
526565 s .Mutex .Lock ()
566+ s .shuttingDown = true
527567 defer s .Mutex .Unlock ()
528568
529569 for name , server := range s .Servers {
0 commit comments