Skip to content

Commit 8a01fa8

Browse files
authored
Merge pull request #1183 from Yeachan-Heo/issue-1182-too-many-mcp-instances
Fix stale detached MCP servers piling up across launches
2 parents 30e622c + ec38857 commit 8a01fa8

File tree

8 files changed

+321
-16
lines changed

8 files changed

+321
-16
lines changed

src/cli/__tests__/autoresearch.test.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,12 @@ async function initRepo(): Promise<string> {
5454
return cwd;
5555
}
5656

57+
async function installFakePs(fakeBin: string): Promise<void> {
58+
const fakePsPath = join(fakeBin, 'ps');
59+
await writeFile(fakePsPath, '#!/bin/sh\nexit 0\n', 'utf-8');
60+
execFileSync('chmod', ['+x', fakePsPath], { stdio: 'ignore' });
61+
}
62+
5763
describe('normalizeAutoresearchCodexArgs', () => {
5864
it('adds sandbox bypass by default for autoresearch workers', () => {
5965
assert.deepEqual(normalizeAutoresearchCodexArgs(['--model', 'gpt-5']), ['--model', 'gpt-5', '--dangerously-bypass-approvals-and-sandbox']);
@@ -242,6 +248,7 @@ touch -t 202603180000 "$OMX_TEST_REPO_ROOT/.omx/specs/autoresearch-test-launch/r
242248
'utf-8',
243249
);
244250
execFileSync('chmod', ['+x', fakeCodexPath], { stdio: 'ignore' });
251+
await installFakePs(fakeBin);
245252

246253
const result = runOmx(repo, ['autoresearch', '--topic', 'Investigate flaky onboarding behavior', '--evaluator', 'node scripts/eval.js', '--slug', 'test-launch'], {
247254
PATH: `${fakeBin}:${process.env.PATH || ''}`,
@@ -351,6 +358,7 @@ EOF
351358
'utf-8',
352359
);
353360
execFileSync('chmod', ['+x', fakeCodexPath], { stdio: 'ignore' });
361+
await installFakePs(fakeBin);
354362

355363
const fakeTmuxPath = join(fakeBin, 'tmux');
356364
await writeFile(
@@ -459,6 +467,7 @@ perl -0pi -e "s/HEAD_PLACEHOLDER/$head_commit/g" "$candidate_file"
459467
'utf-8',
460468
);
461469
execFileSync('chmod', ['+x', fakeCodexPath], { stdio: 'ignore' });
470+
await installFakePs(fakeBin);
462471

463472
const fakeTmuxPath = join(fakeBin, 'tmux');
464473
await writeFile(
@@ -564,6 +573,7 @@ perl -0pi -e "s/HEAD_PLACEHOLDER/$head_commit/g" "$candidate_file"
564573
'utf-8',
565574
);
566575
execFileSync('chmod', ['+x', fakeCodexPath], { stdio: 'ignore' });
576+
await installFakePs(fakeBin);
567577

568578
const fakeTmuxPath = join(fakeBin, 'tmux');
569579
await writeFile(
@@ -761,6 +771,7 @@ printf '{\\n "status": "abort",\\n "candidate_commit": null,\\n "base_commit"
761771
'utf-8',
762772
);
763773
execFileSync('chmod', ['+x', fakeCodexPath], { stdio: 'ignore' });
774+
await installFakePs(fakeBin);
764775

765776
const result = runOmx(
766777
repo,
@@ -814,6 +825,7 @@ EOF
814825
'utf-8',
815826
);
816827
execFileSync('chmod', ['+x', fakeCodexPath], { stdio: 'ignore' });
828+
await installFakePs(fakeBin);
817829

818830
const result = runOmx(
819831
repo,

src/cli/__tests__/cleanup.test.ts

Lines changed: 126 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import {
55
cleanupOmxMcpProcesses,
66
cleanupStaleTmpDirectories,
77
findCleanupCandidates,
8+
findLaunchSafeCleanupCandidates,
89
type ProcessEntry,
910
} from '../cleanup.js';
1011

@@ -31,6 +32,26 @@ const CURRENT_SESSION_PROCESSES: ProcessEntry[] = [
3132
ppid: 810,
3233
command: 'node /tmp/worktree/dist/mcp/team-server.js',
3334
},
35+
{
36+
pid: 820,
37+
ppid: 50,
38+
command: 'codex --model gpt-5',
39+
},
40+
{
41+
pid: 821,
42+
ppid: 820,
43+
command: 'node /tmp/other-session/dist/mcp/state-server.js',
44+
},
45+
{
46+
pid: 830,
47+
ppid: 50,
48+
command: 'node /repo/bin/omx.js autoresearch --topic launch',
49+
},
50+
{
51+
pid: 831,
52+
ppid: 830,
53+
command: 'node /tmp/parallel-session/dist/mcp/memory-server.js',
54+
},
3455
{
3556
pid: 900,
3657
ppid: 1,
@@ -61,9 +82,65 @@ describe('findCleanupCandidates', () => {
6182
command: 'node /tmp/worktree/dist/mcp/team-server.js',
6283
reason: 'outside-current-session',
6384
},
85+
{
86+
pid: 821,
87+
ppid: 820,
88+
command: 'node /tmp/other-session/dist/mcp/state-server.js',
89+
reason: 'outside-current-session',
90+
},
91+
{
92+
pid: 831,
93+
ppid: 830,
94+
command: 'node /tmp/parallel-session/dist/mcp/memory-server.js',
95+
reason: 'outside-current-session',
96+
},
6497
],
6598
);
6699
});
100+
101+
it('limits launch-safe cleanup to OMX MCP processes with no live Codex or OMX launch ancestor', () => {
102+
assert.deepEqual(
103+
findLaunchSafeCleanupCandidates(CURRENT_SESSION_PROCESSES, 701),
104+
[
105+
{
106+
pid: 800,
107+
ppid: 1,
108+
command: 'node /tmp/oh-my-codex/dist/mcp/memory-server.js',
109+
reason: 'ppid=1',
110+
},
111+
{
112+
pid: 810,
113+
ppid: 42,
114+
command: 'node /tmp/worktree/dist/mcp/trace-server.js',
115+
reason: 'outside-current-session',
116+
},
117+
{
118+
pid: 811,
119+
ppid: 810,
120+
command: 'node /tmp/worktree/dist/mcp/team-server.js',
121+
reason: 'outside-current-session',
122+
},
123+
],
124+
);
125+
});
126+
127+
it('keeps detached MCP candidates whose ancestor chain is live but unrelated to Codex or OMX launchers', () => {
128+
const unrelatedAncestorProcesses: ProcessEntry[] = [
129+
{ pid: 701, ppid: 700, command: 'node /repo/bin/omx.js' },
130+
{ pid: 840, ppid: 841, command: 'node /tmp/unrelated/dist/mcp/state-server.js' },
131+
{ pid: 841, ppid: 842, command: 'node worker.js' },
132+
{ pid: 842, ppid: 1, command: 'bash' },
133+
];
134+
135+
assert.deepEqual(findLaunchSafeCleanupCandidates(unrelatedAncestorProcesses, 701), [
136+
{
137+
pid: 840,
138+
ppid: 841,
139+
command: 'node /tmp/unrelated/dist/mcp/state-server.js',
140+
reason: 'outside-current-session',
141+
},
142+
]);
143+
});
67144
});
68145

69146
describe('cleanupOmxMcpProcesses', () => {
@@ -81,9 +158,9 @@ describe('cleanupOmxMcpProcesses', () => {
81158
});
82159

83160
assert.equal(result.dryRun, true);
84-
assert.equal(result.candidates.length, 3);
161+
assert.equal(result.candidates.length, 5);
85162
assert.equal(signalCount, 0);
86-
assert.match(lines.join('\n'), /Dry run: would terminate 3 orphaned OMX MCP server process/);
163+
assert.match(lines.join('\n'), /Dry run: would terminate 5 orphaned OMX MCP server process/);
87164
assert.match(lines.join('\n'), /PID 800/);
88165
assert.match(lines.join('\n'), /PID 810/);
89166
});
@@ -97,7 +174,7 @@ describe('cleanupOmxMcpProcesses', () => {
97174
const result = await cleanupOmxMcpProcesses([], {
98175
currentPid: 701,
99176
listProcesses: () => [
100-
...CURRENT_SESSION_PROCESSES.filter((processEntry) => processEntry.pid !== 811),
177+
...CURRENT_SESSION_PROCESSES.filter((processEntry) => processEntry.pid !== 811 && processEntry.pid !== 821 && processEntry.pid !== 831),
101178
],
102179
isPidAlive: (pid) => alive.has(pid),
103180
sendSignal: (pid, signal) => {
@@ -123,6 +200,52 @@ describe('cleanupOmxMcpProcesses', () => {
123200
assert.match(lines.join('\n'), /Escalating to SIGKILL for 1 process/);
124201
assert.match(lines.join('\n'), /Killed 2 orphaned OMX MCP server process\(es\) \(1 required SIGKILL\)\./);
125202
});
203+
204+
it('supports launch-safe candidate selection for automatic cleanup', async () => {
205+
const lines: string[] = [];
206+
const signals: Array<{ pid: number; signal: NodeJS.Signals }> = [];
207+
208+
const result = await cleanupOmxMcpProcesses([], {
209+
currentPid: 701,
210+
listProcesses: () => CURRENT_SESSION_PROCESSES,
211+
selectCandidates: findLaunchSafeCleanupCandidates,
212+
isPidAlive: () => false,
213+
sendSignal: (pid, signal) => {
214+
signals.push({ pid, signal });
215+
},
216+
writeLine: (line) => lines.push(line),
217+
});
218+
219+
assert.equal(result.terminatedCount, 3);
220+
assert.deepEqual(result.candidates, [
221+
{
222+
pid: 800,
223+
ppid: 1,
224+
command: 'node /tmp/oh-my-codex/dist/mcp/memory-server.js',
225+
reason: 'ppid=1',
226+
},
227+
{
228+
pid: 810,
229+
ppid: 42,
230+
command: 'node /tmp/worktree/dist/mcp/trace-server.js',
231+
reason: 'outside-current-session',
232+
},
233+
{
234+
pid: 811,
235+
ppid: 810,
236+
command: 'node /tmp/worktree/dist/mcp/team-server.js',
237+
reason: 'outside-current-session',
238+
},
239+
]);
240+
assert.deepEqual(signals, [
241+
{ pid: 800, signal: 'SIGTERM' },
242+
{ pid: 810, signal: 'SIGTERM' },
243+
{ pid: 811, signal: 'SIGTERM' },
244+
]);
245+
assert.match(lines.join('\n'), /Found 3 orphaned OMX MCP server process/);
246+
assert.doesNotMatch(lines.join('\n'), /PID 821/);
247+
assert.doesNotMatch(lines.join('\n'), /PID 831/);
248+
});
126249
});
127250

128251
describe('cleanupStaleTmpDirectories', () => {

src/cli/__tests__/exec.test.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ describe('omx exec', () => {
4242
const home = join(wd, 'home');
4343
const fakeBin = join(wd, 'bin');
4444
const fakeCodexPath = join(fakeBin, 'codex');
45+
const fakePsPath = join(fakeBin, 'ps');
4546

4647
await mkdir(join(home, '.codex'), { recursive: true });
4748
await mkdir(fakeBin, { recursive: true });
@@ -66,6 +67,8 @@ describe('omx exec', () => {
6667
].join('\n'),
6768
);
6869
await chmod(fakeCodexPath, 0o755);
70+
await writeFile(fakePsPath, '#!/bin/sh\nexit 0\n');
71+
await chmod(fakePsPath, 0o755);
6972

7073
const result = runOmx(wd, ['exec', '--model', 'gpt-5', 'say hi'], {
7174
HOME: home,
@@ -100,11 +103,14 @@ describe('omx exec', () => {
100103
const home = join(wd, 'home');
101104
const fakeBin = join(wd, 'bin');
102105
const fakeCodexPath = join(fakeBin, 'codex');
106+
const fakePsPath = join(fakeBin, 'ps');
103107

104108
await mkdir(home, { recursive: true });
105109
await mkdir(fakeBin, { recursive: true });
106110
await writeFile(fakeCodexPath, '#!/bin/sh\nprintf \'fake-codex:%s\\n\' \"$*\"\n');
107111
await chmod(fakeCodexPath, 0o755);
112+
await writeFile(fakePsPath, '#!/bin/sh\nexit 0\n');
113+
await chmod(fakePsPath, 0o755);
108114

109115
const result = runOmx(wd, ['exec', '--help'], {
110116
HOME: home,

src/cli/__tests__/index.test.ts

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ import {
3434
resolveNotifyTempContract,
3535
buildNotifyTempStartupMessages,
3636
buildNotifyFallbackWatcherEnv,
37+
cleanupLaunchOrphanedMcpProcesses,
3738
resolveBackgroundHelperLaunchMode,
3839
shouldDetachBackgroundHelper,
3940
resolveNotifyFallbackWatcherScript,
@@ -45,6 +46,7 @@ import {
4546
DEFAULT_FRONTIER_MODEL,
4647
getTeamLowComplexityModel,
4748
} from "../../config/models.js";
49+
import type { ProcessEntry } from "../cleanup.js";
4850

4951
function expectedLowComplexityModel(codexHomeOverride?: string): string {
5052
return getTeamLowComplexityModel(codexHomeOverride);
@@ -240,6 +242,82 @@ describe("resolveNotifyTempContract", () => {
240242
});
241243
});
242244

245+
describe("cleanupLaunchOrphanedMcpProcesses", () => {
246+
it("reaps only detached OMX MCP processes without a live Codex ancestor", async () => {
247+
const processes: ProcessEntry[] = [
248+
{ pid: 700, ppid: 500, command: "codex" },
249+
{ pid: 701, ppid: 700, command: "node /repo/bin/omx.js" },
250+
{
251+
pid: 710,
252+
ppid: 700,
253+
command: "node /repo/oh-my-codex/dist/mcp/state-server.js",
254+
},
255+
{
256+
pid: 800,
257+
ppid: 1,
258+
command: "node /tmp/oh-my-codex/dist/mcp/memory-server.js",
259+
},
260+
{
261+
pid: 810,
262+
ppid: 42,
263+
command: "node /tmp/oh-my-codex/dist/mcp/trace-server.js",
264+
},
265+
{
266+
pid: 820,
267+
ppid: 50,
268+
command: "codex --model gpt-5",
269+
},
270+
{
271+
pid: 821,
272+
ppid: 820,
273+
command: "node /tmp/other-session/dist/mcp/state-server.js",
274+
},
275+
{
276+
pid: 830,
277+
ppid: 50,
278+
command: "node /repo/bin/omx.js autoresearch --topic launch",
279+
},
280+
{
281+
pid: 831,
282+
ppid: 830,
283+
command: "node /tmp/parallel-session/dist/mcp/memory-server.js",
284+
},
285+
];
286+
const signals: Array<{ pid: number; signal: NodeJS.Signals }> = [];
287+
const alive = new Set([800, 810]);
288+
289+
const result = await cleanupLaunchOrphanedMcpProcesses({
290+
currentPid: 701,
291+
listProcesses: () => processes,
292+
isPidAlive: (pid) => alive.has(pid),
293+
sendSignal: (pid, signal) => {
294+
signals.push({ pid, signal });
295+
alive.delete(pid);
296+
},
297+
sleep: async () => {},
298+
now: () => 0,
299+
});
300+
301+
assert.equal(result.terminatedCount, 2);
302+
assert.equal(result.forceKilledCount, 0);
303+
assert.deepEqual(result.failedPids, []);
304+
assert.deepEqual(signals, [
305+
{ pid: 800, signal: "SIGTERM" },
306+
{ pid: 810, signal: "SIGTERM" },
307+
]);
308+
assert.equal(
309+
signals.some(({ pid }) => pid === 821),
310+
false,
311+
"launch-safe cleanup must preserve OMX MCP processes still attached to another live Codex tree",
312+
);
313+
assert.equal(
314+
signals.some(({ pid }) => pid === 831),
315+
false,
316+
"launch-safe cleanup must preserve OMX MCP processes still attached to another live OMX launch tree",
317+
);
318+
});
319+
});
320+
243321
describe("watcher script path resolution", () => {
244322
it("resolves packaged watcher entrypoints from dist/scripts", () => {
245323
assert.equal(

src/cli/__tests__/launch-fallback.test.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ describe('omx launch fallback when tmux is unavailable', () => {
4141
const home = join(wd, 'home');
4242
const fakeBin = join(wd, 'bin');
4343
const fakeCodexPath = join(fakeBin, 'codex');
44+
const fakePsPath = join(fakeBin, 'ps');
4445

4546
await mkdir(home, { recursive: true });
4647
await mkdir(fakeBin, { recursive: true });
@@ -49,6 +50,8 @@ describe('omx launch fallback when tmux is unavailable', () => {
4950
'#!/bin/sh\nprintf \'fake-codex:%s\\n\' \"$*\"\n',
5051
);
5152
await chmod(fakeCodexPath, 0o755);
53+
await writeFile(fakePsPath, '#!/bin/sh\nexit 0\n');
54+
await chmod(fakePsPath, 0o755);
5255

5356
const result = runOmx(
5457
wd,

0 commit comments

Comments
 (0)