Skip to content

Commit 3525565

Browse files
dsarnoclaude
andcommitted
Add real-subprocess reaper test (Linux/macOS CI coverage)
The reaper had no CI coverage at all (ci-start-server never passes an owner pid, so the plugin-armed path never runs in CI). Close that — and the "armed reaper only ever ran on macOS" gap — with an integration test that spawns a real `python -m godot_ai --owner-pid` server, kills a throwaway owner process, and asserts the server self-terminates and frees its port. This exercises the genuine POSIX primitives (os.kill(pid,0) liveness + SIGTERM self-shutdown → uvicorn graceful drain) on every OS pytest runs on — crucially the ubuntu runners (Python 3.11 + 3.13), giving real Linux coverage of the armed reaper. Skipped on Windows, where the reaper is disabled. Adds GODOT_AI_REAPER_POLL_SECONDS so the test drives a sub-second reap instead of waiting the production 5s; malformed/absent falls back to the default. Passes locally on macOS (Python 3.13) in ~2s. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
1 parent cfa0846 commit 3525565

3 files changed

Lines changed: 132 additions & 2 deletions

File tree

src/godot_ai/orphan_reaper.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,24 @@
3333
logger = logging.getLogger(__name__)
3434

3535
DEFAULT_POLL_SECONDS = 5.0
36+
POLL_SECONDS_ENV = "GODOT_AI_REAPER_POLL_SECONDS"
37+
38+
39+
def poll_seconds_from_env() -> float:
40+
"""Reaper poll interval, overridable via ``GODOT_AI_REAPER_POLL_SECONDS``.
41+
42+
Defaults to :data:`DEFAULT_POLL_SECONDS`. The override exists so the
43+
subprocess integration test can drive a fast (<1s) reap instead of waiting
44+
the production 5s; a malformed value falls back to the default.
45+
"""
46+
raw = os.environ.get(POLL_SECONDS_ENV, "").strip()
47+
if not raw:
48+
return DEFAULT_POLL_SECONDS
49+
try:
50+
value = float(raw)
51+
except ValueError:
52+
return DEFAULT_POLL_SECONDS
53+
return value if value > 0 else DEFAULT_POLL_SECONDS
3654

3755

3856
def should_arm_reaper(owner_pid: int | None) -> bool:

src/godot_ai/server.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
PreserveGodotCommandErrorData,
2626
StripClientWrapperKwargs,
2727
)
28-
from godot_ai.orphan_reaper import should_arm_reaper, watch_owner
28+
from godot_ai.orphan_reaper import poll_seconds_from_env, should_arm_reaper, watch_owner
2929
from godot_ai.resources.editor import register_editor_resources
3030
from godot_ai.resources.library import register_library_resources
3131
from godot_ai.resources.nodes import register_node_resources
@@ -126,7 +126,11 @@ async def _lifespan(server: FastMCP) -> AsyncIterator[AppContext]:
126126
reaper_task: asyncio.Task | None = None
127127
if should_arm_reaper(owner_pid):
128128
reaper_task = asyncio.create_task(
129-
watch_owner(owner_pid, lambda: len(registry.list_all()))
129+
watch_owner(
130+
owner_pid,
131+
lambda: len(registry.list_all()),
132+
poll_seconds=poll_seconds_from_env(),
133+
)
130134
)
131135
logger.info("Orphan reaper armed for owner editor pid %d", owner_pid)
132136
elif owner_pid and owner_pid > 0:
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
"""End-to-end orphan-reaper test against a REAL server subprocess.
2+
3+
Unlike the unit tests (which inject fakes), this spawns an actual
4+
``python -m godot_ai`` HTTP server with ``--owner-pid`` pointed at a throwaway
5+
owner process, kills the owner, and asserts the server self-terminates. It
6+
exercises the genuine POSIX primitives — ``os.kill(pid, 0)`` liveness and the
7+
``SIGTERM``-to-self → uvicorn graceful shutdown — on whatever OS CI runs it on
8+
(notably the Linux runners, which the in-CI plugin path never arms the reaper
9+
on). Skipped on Windows, where the reaper is intentionally disabled.
10+
"""
11+
12+
from __future__ import annotations
13+
14+
import os
15+
import socket
16+
import subprocess
17+
import sys
18+
import time
19+
20+
import pytest
21+
22+
pytestmark = pytest.mark.skipif(
23+
sys.platform.startswith("win"),
24+
reason="orphan reaper is disabled on Windows (see should_arm_reaper)",
25+
)
26+
27+
28+
def _free_port() -> int:
29+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
30+
s.bind(("127.0.0.1", 0))
31+
return s.getsockname()[1]
32+
33+
34+
def _port_open(port: int) -> bool:
35+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
36+
s.settimeout(0.25)
37+
return s.connect_ex(("127.0.0.1", port)) == 0
38+
39+
40+
def _terminate(proc: subprocess.Popen | None) -> None:
41+
if proc is None or proc.poll() is not None:
42+
return
43+
proc.kill()
44+
try:
45+
proc.wait(timeout=5)
46+
except subprocess.TimeoutExpired:
47+
pass
48+
49+
50+
def test_server_subprocess_reaps_when_owner_dies():
51+
http_port = _free_port()
52+
ws_port = _free_port()
53+
54+
env = dict(os.environ)
55+
env["GODOT_AI_DISABLE_TELEMETRY"] = "true"
56+
env["GODOT_AI_REAPER_POLL_SECONDS"] = "0.3" # fast reap for the test
57+
58+
owner = subprocess.Popen([sys.executable, "-c", "import time; time.sleep(300)"])
59+
server = None
60+
try:
61+
server = subprocess.Popen(
62+
[
63+
sys.executable,
64+
"-m",
65+
"godot_ai",
66+
"--transport",
67+
"streamable-http",
68+
"--port",
69+
str(http_port),
70+
"--ws-port",
71+
str(ws_port),
72+
"--owner-pid",
73+
str(owner.pid),
74+
],
75+
env=env,
76+
stdout=subprocess.DEVNULL,
77+
stderr=subprocess.DEVNULL,
78+
)
79+
80+
# Wait for the HTTP listener to come up (server fully started).
81+
deadline = time.monotonic() + 20
82+
while time.monotonic() < deadline:
83+
if server.poll() is not None:
84+
pytest.fail(f"server exited early with code {server.returncode}")
85+
if _port_open(http_port):
86+
break
87+
time.sleep(0.2)
88+
else:
89+
pytest.fail("server did not start listening within 20s")
90+
91+
# Owner alive + no editor sessions → reaper must NOT fire yet.
92+
time.sleep(1.0)
93+
assert server.poll() is None, "server reaped while owner still alive"
94+
95+
# Kill the owner → reaper should see it gone, 0 sessions, and shut down.
96+
owner.kill()
97+
owner.wait(timeout=5)
98+
99+
try:
100+
rc = server.wait(timeout=15)
101+
except subprocess.TimeoutExpired:
102+
pytest.fail("server did NOT self-terminate after owner died (reaper failed)")
103+
assert rc is not None
104+
# Port released after the process exits.
105+
assert not _port_open(http_port)
106+
finally:
107+
_terminate(server)
108+
_terminate(owner)

0 commit comments

Comments
 (0)