Skip to content

Commit 84a2926

Browse files
dsarnoclaude
andauthored
Harden WebSocket transport: pending-Future leak + duplicate-ID handshake reject (#374)
* Harden WebSocket transport: pending-Future leak + duplicate-ID handshake reject Two small audit-v2 fixes in src/godot_ai/transport/websocket.py covering #343 sub-findings #2 and #5. (Sub-finding #4 — errno.EADDRINUSE portability — landed separately via PR #373.) - #5 (P2): wrap `send_command`'s `ws.send` + `wait_for` in a try/finally that always pops `_pending`. Pre-fix, a `ws.send` raise (ConnectionClosed mid-send, transport error) leaked the Future entry forever; under churn the dict grew unbounded. Happy path still pops via the response receiver, so the finally pop is a no-op there. - #2 (P1): reject a second handshake whose session_id is already registered (close code 4001), instead of silently overwriting the routing map. session_id format is `<slug>@<4hex>` — 16 bits of suffix is locally guessable, so without this any local peer could hijack an active session by impersonating its ID. Legitimate plugin reconnect after `editor_reload_plugin` first triggers ConnectionClosed -> unregister, so the new connect still lands cleanly. Tests: - tests/integration/test_websocket.py — TestDuplicateHandshake pins the reject + reconnect-after-clean-disconnect paths; TestPendingFutureCleanup pins the timeout-pop and send-failure-pop behaviors. Live-smoked against a real Godot 4.6 editor: full GDScript suite 1037/1040 passed, duplicate-handshake rejected with close 4001, original session unaffected. https://claude.ai/code/session_016ijmCD5S6QfwJGJcc5Wirp * Apply pending ruff format to test_mcp_tools and test_gdscript_no_adjacent_string_concat Drive-by: `ruff format --check` was failing on these two test files on beta. Reformatting them keeps the format-check CI step green for the audit-v2 transport-hardening PR. No behavioral change — purely whitespace / line-wrapping deltas produced by `ruff format`. https://claude.ai/code/session_016ijmCD5S6QfwJGJcc5Wirp --------- Co-authored-by: Claude <noreply@anthropic.com>
1 parent 3295ded commit 84a2926

4 files changed

Lines changed: 153 additions & 11 deletions

File tree

src/godot_ai/transport/websocket.py

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,11 @@
2020

2121
DEFAULT_PORT = 9500
2222

23+
## RFC 6455 reserves 4000-4999 for application-defined close codes; we use
24+
## 4001 to flag a handshake rejected for duplicate session_id so a debugging
25+
## peer can distinguish it from a normal close.
26+
_CLOSE_CODE_DUPLICATE_SESSION = 4001
27+
2328

2429
class GodotWebSocketServer:
2530
"""Accepts connections from Godot editor plugins and routes commands."""
@@ -62,6 +67,23 @@ async def _handle_connection(self, ws: ServerConnection):
6267
data = json.loads(raw)
6368
handshake = HandshakeMessage.model_validate(data)
6469

70+
## Reject duplicate session_id while the first peer is live —
71+
## otherwise the second handshake silently overwrites the
72+
## routing map (duplicate-ID hijack).
73+
existing = self.registry.get(handshake.session_id)
74+
if existing is not None:
75+
logger.warning(
76+
"Rejecting duplicate handshake for session %s (existing pid=%s, project=%s)",
77+
handshake.session_id,
78+
existing.editor_pid,
79+
existing.project_path,
80+
)
81+
await ws.close(
82+
code=_CLOSE_CODE_DUPLICATE_SESSION,
83+
reason="session id already registered",
84+
)
85+
return
86+
6587
session_id = handshake.session_id
6688
session = Session(
6789
session_id=handshake.session_id,
@@ -159,12 +181,16 @@ async def send_command(
159181
future: asyncio.Future[CommandResponse] = asyncio.get_running_loop().create_future()
160182
self._pending[request.request_id] = future
161183

162-
await ws.send(request.model_dump_json())
163-
184+
## Always pop on exit — the response receiver in _handle_connection
185+
## pops on the happy path, so this is a no-op there; on `ws.send`
186+
## raise / TimeoutError / cancellation it prevents Futures leaking
187+
## into _pending forever.
164188
try:
189+
await ws.send(request.model_dump_json())
165190
return await asyncio.wait_for(future, timeout=timeout)
166191
except asyncio.TimeoutError:
167-
self._pending.pop(request.request_id, None)
168192
raise TimeoutError(
169193
f"Command {command} timed out after {timeout}s on session {session_id}"
170194
)
195+
finally:
196+
self._pending.pop(request.request_id, None)

tests/integration/test_mcp_tools.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2067,9 +2067,7 @@ async def respond():
20672067
assert result.data["shape_created"] is True
20682068
assert result.data["size"]["x"] == 2.0
20692069

2070-
async def test_ambiguous_visual_candidates_preserved_in_structured_error(
2071-
self, mcp_stack
2072-
):
2070+
async def test_ambiguous_visual_candidates_preserved_in_structured_error(self, mcp_stack):
20732071
client, plugin = mcp_stack
20742072
candidates = ["/Main/VisualA", "/Main/VisualB"]
20752073

@@ -3328,6 +3326,7 @@ async def test_concurrent_read_and_write_route_to_target_sessions(self, mcp_stac
33283326
client, plugin_a = mcp_stack
33293327
plugin_b = await self._connect_second_plugin("proj-b@0002")
33303328
try:
3329+
33313330
async def respond_a():
33323331
cmd = await plugin_a.recv_command()
33333332
assert cmd["command"] == "get_open_scenes"

tests/integration/test_websocket.py

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -561,6 +561,127 @@ async def test_rejected_request_does_not_register_session(self, harness):
561561
assert len(harness.registry) == before
562562

563563

564+
# ---------------------------------------------------------------------------
565+
# Duplicate-ID handshake hardening (#343 finding #2)
566+
# ---------------------------------------------------------------------------
567+
568+
569+
class TestDuplicateHandshake:
570+
async def test_duplicate_session_id_handshake_is_rejected(self, harness):
571+
## Without rejection, a second handshake with the same session_id
572+
## silently overwrites both `_connections[session_id]` and the
573+
## registry entry — routing every subsequent command to the
574+
## attacker. session_id is `<slug>@<4hex>` so 16 bits of suffix is
575+
## locally guessable. Reject keeps the first peer authoritative.
576+
first = await harness.connect_plugin(session_id="dup-target")
577+
original_session = harness.registry.get("dup-target")
578+
assert original_session is not None
579+
original_pid = original_session.editor_pid
580+
581+
## Hand-roll the second handshake so we observe the close on the
582+
## wire — `connect_plugin()` would assert on the missing ack.
583+
ws2 = await websockets.connect(f"ws://127.0.0.1:{harness.port}")
584+
await ws2.send(
585+
json.dumps(
586+
{
587+
"type": "handshake",
588+
"session_id": "dup-target",
589+
"godot_version": "4.4.1",
590+
"project_path": "/tmp/attacker",
591+
"plugin_version": "0.0.1",
592+
"protocol_version": 1,
593+
"readiness": "ready",
594+
"editor_pid": 9999,
595+
}
596+
)
597+
)
598+
599+
## Server should close us before sending an ack. Drain until the WS
600+
## reports closed; recv() will raise ConnectionClosed.
601+
with pytest.raises(websockets.ConnectionClosed):
602+
await asyncio.wait_for(ws2.recv(), timeout=2.0)
603+
604+
## Original session must still be live and unaffected.
605+
live = harness.registry.get("dup-target")
606+
assert live is original_session, "registry entry was overwritten by duplicate"
607+
assert live.editor_pid == original_pid
608+
assert live.project_path != "/tmp/attacker"
609+
610+
## Round-trip a command through the original to prove its WS is
611+
## still wired to the routing map (regression: silent overwrite
612+
## also hijacks `_connections[session_id]`).
613+
client = GodotClient(harness.server, harness.registry)
614+
615+
async def mock_handler():
616+
cmd = await first.recv_command()
617+
await first.send_response(cmd["request_id"], {"alive": True})
618+
619+
handler = asyncio.create_task(mock_handler())
620+
result = await client.send("ping", session_id="dup-target")
621+
await handler
622+
assert result == {"alive": True}
623+
624+
await first.close()
625+
626+
async def test_reconnect_after_clean_disconnect_succeeds(self, harness):
627+
## The reject must not break the legitimate plugin reconnect path
628+
## (e.g. after `editor_reload_plugin`): close → unregister →
629+
## fresh connect with the same session_id should succeed because
630+
## the registry entry has already been removed.
631+
first = await harness.connect_plugin(session_id="reconnect-1")
632+
await first.close()
633+
await asyncio.sleep(0.1) # let server process disconnect
634+
assert harness.registry.get("reconnect-1") is None
635+
636+
second = await harness.connect_plugin(session_id="reconnect-1")
637+
assert harness.registry.get("reconnect-1") is not None
638+
await second.close()
639+
640+
641+
# ---------------------------------------------------------------------------
642+
# send_command pending-Future leak (#343 finding #5)
643+
# ---------------------------------------------------------------------------
644+
645+
646+
class TestPendingFutureCleanup:
647+
async def test_timeout_pops_pending_entry(self, harness):
648+
## TimeoutError path always cleared the pending dict; this test
649+
## pins that behavior so a future refactor doesn't regress it.
650+
plugin = await harness.connect_plugin(session_id="leak-timeout")
651+
client = GodotClient(harness.server, harness.registry)
652+
653+
with pytest.raises(TimeoutError):
654+
await client.send("never_responded", timeout=0.1)
655+
656+
assert harness.server._pending == {}, "TimeoutError should not leave entries in _pending"
657+
await plugin.close()
658+
659+
async def test_send_failure_pops_pending_entry(self, harness):
660+
## If `ws.send` raises (e.g. ConnectionClosed mid-send), the
661+
## pending Future was previously leaked into `_pending` forever.
662+
## Force the failure by replacing the connection's send with one
663+
## that raises after the pending entry has been registered.
664+
plugin = await harness.connect_plugin(session_id="leak-send")
665+
666+
ws = harness.server._connections["leak-send"]
667+
boom = ConnectionError("simulated mid-send transport error")
668+
669+
async def raising_send(_payload: str) -> None:
670+
raise boom
671+
672+
ws.send = raising_send # type: ignore[assignment]
673+
674+
with pytest.raises(ConnectionError):
675+
await harness.server.send_command(
676+
session_id="leak-send",
677+
command="will_fail",
678+
timeout=1.0,
679+
)
680+
681+
assert harness.server._pending == {}, "send-time exception must not leak _pending entries"
682+
await plugin.close()
683+
684+
564685
# --- Issue #262: editor_state self-heals a stale "playing" cache ---
565686

566687

tests/unit/test_gdscript_no_adjacent_string_concat.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -151,11 +151,7 @@ def test_no_python_style_adjacent_string_concat_in_gdscript() -> None:
151151

152152
def test_detector_flags_canonical_multiline_bug_pattern() -> None:
153153
"""The exact shape from PR #236 must be detected."""
154-
src = (
155-
"assert_false(cond,\n"
156-
' "first half - "\n'
157-
' "second half")\n'
158-
)
154+
src = 'assert_false(cond,\n "first half - "\n "second half")\n'
159155
hits = _find_adjacent_string_pairs(src)
160156
assert len(hits) == 1, f"expected 1 hit, got {hits}"
161157
(prev_line, _), (cur_line, _) = hits[0]

0 commit comments

Comments
 (0)