Skip to content

Commit 0baeb91

Browse files
committed
Thread live-status through the spawn path
The spawn path queried `_probe_live_server_status_for_port` from up to five independent decision points (compatibility check, strong-recovery proof, post-kill incompatible-server message, recovery proof again, recovery name-only fallback). On the occupied-port path each probe costs a ~500 ms localhost HTTPClient poll loop, so a user-reported trace measured five back-to-back probes totalling ~2.6 s — dominating the dock's first-paint window. Replace the redundant probes with explicit data flow: probe once at the top of `_start_server` and thread the snapshot down through the proof and incompatible-server helpers. Each helper now takes an optional `live: Dictionary = {}` and falls back to its historical fresh probe when the caller doesn't have one (preserves behavior for the dock's UI buttons and the version-handshake mismatch path). Also fixes a latent staleness bug in the no-drift + legacy_pidfile kill path: the original code reused the pre-kill `live` for the incompatible-server diagnostic even when a kill had been attempted, so the diagnostic could describe the killed occupant instead of the current one. The unified failure arm now re-probes after every recovery failure — costs one extra probe in the rare no-targets sub- case, fixes the bug structurally. Tests: five new unit tests in test_plugin_lifecycle.gd assert that each helper threads `live` correctly and that omitting `live` still triggers the historical probe. Counter on `_ProofPlugin` (probe_calls) backs the assertions.
1 parent 5405414 commit 0baeb91

2 files changed

Lines changed: 146 additions & 29 deletions

File tree

plugin/addons/godot_ai/plugin.gd

Lines changed: 52 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -581,24 +581,24 @@ func _start_server() -> void:
581581
## kill the process currently holding the port.
582582
print("MCP | managed server v%s does not match plugin v%s, restarting"
583583
% [record_version, current_version])
584-
if not _recover_strong_port_occupant(port, 3.0):
585-
_server_started_this_session = true
586-
_set_incompatible_server(_probe_live_server_status_for_port(port), current_version, port)
587-
push_warning(_server_status_message)
588-
return
589-
## Fall through to spawn.
590-
else:
591-
if not _recover_strong_port_occupant(port, 3.0):
592-
## No recorded version drift and the live status probe did not
593-
## verify an exact/current-compatible godot-ai server. A stale
594-
## matching record alone is not enough ownership proof to kill
595-
## the current port owner, and opening the WebSocket could route
596-
## clients to an incompatible HTTP/MCP tool surface.
597-
_server_started_this_session = true
598-
_set_incompatible_server(live, current_version, port)
599-
push_warning(_server_status_message)
600-
return
601-
## Fall through to spawn.
584+
## No recorded drift case: a stale matching record alone is not
585+
## enough ownership proof to kill the current port owner, and
586+
## opening the WebSocket could route clients to an incompatible
587+
## HTTP/MCP tool surface — `_recover_strong_port_occupant` only
588+
## acts on strong proof (managed_record / pidfile_listener /
589+
## status_matches_record) so this branch is structurally safe.
590+
##
591+
## `live` is forwarded so the recovery path's proof helpers reuse
592+
## the snapshot we already paid for. The kill itself invalidates
593+
## that snapshot, so the failure-mode arm below re-probes before
594+
## handing data to `_set_incompatible_server`.
595+
if not _recover_strong_port_occupant(port, 3.0, live):
596+
_server_started_this_session = true
597+
var post_recovery_live := _probe_live_server_status_for_port(port)
598+
_set_incompatible_server(post_recovery_live, current_version, port)
599+
push_warning(_server_status_message)
600+
return
601+
## Fall through to spawn.
602602

603603
_set_resolved_ws_port(_resolve_ws_port())
604604
ws_port = _resolved_ws_port
@@ -668,7 +668,12 @@ func _set_incompatible_server(live: Dictionary, expected_version: String, port:
668668
_server_actual_version = _live_version_for_message(live)
669669
_server_dev_version_mismatch_allowed = false
670670
_server_status_message = _incompatible_server_message(live, expected_version, port, _resolved_ws_port)
671-
var proof := _evaluate_recovery_port_occupant_proof(port)
671+
## `live` is the caller's most-current snapshot — pass it through to
672+
## the recovery proof helper so it doesn't fire another probe of the
673+
## same port. The `_set_incompatible_server` contract is "use exactly
674+
## this live", so threading it down keeps the proof determination
675+
## consistent with the diagnostic message we just built.
676+
var proof := _evaluate_recovery_port_occupant_proof(port, live)
672677
var proof_name := str(proof.get("proof", ""))
673678
_can_recover_incompatible = not proof_name.is_empty()
674679
print("MCP | proof: %s" % (proof_name if _can_recover_incompatible else "(none)"))
@@ -1320,7 +1325,16 @@ func _find_managed_pid(port: int) -> int:
13201325
return _find_pid_on_port(port)
13211326

13221327

1323-
func _evaluate_strong_port_occupant_proof(port: int) -> Dictionary:
1328+
## `live` is the result of a prior `_probe_live_server_status_for_port`
1329+
## call that the caller already has on hand. When non-empty it short-
1330+
## circuits the internal probe at the bottom of this helper, so a single
1331+
## `_start_server` invocation that probes once at the top can thread the
1332+
## same snapshot through compatibility check + recovery without paying
1333+
## for a second ~500 ms localhost HTTPClient poll loop. Default `{}`
1334+
## preserves the historical behavior for callers outside the spawn flow
1335+
## (`can_recover_incompatible_server`, the dock's UI buttons), where a
1336+
## fresh probe is the right thing.
1337+
func _evaluate_strong_port_occupant_proof(port: int, live: Dictionary = {}) -> Dictionary:
13241338
var result := {"proof": "", "pids": []}
13251339
var listener_pids := _find_all_pids_on_port(port)
13261340
if listener_pids.is_empty():
@@ -1338,31 +1352,40 @@ func _evaluate_strong_port_occupant_proof(port: int) -> Dictionary:
13381352
if not legacy_targets.is_empty():
13391353
return {"proof": "pidfile_listener", "pids": legacy_targets}
13401354

1341-
var live := _probe_live_server_status_for_port(port)
1355+
var current_live: Dictionary = live if not live.is_empty() else _probe_live_server_status_for_port(port)
13421356
if (
1343-
_live_status_identifies_godot_ai(live)
1357+
_live_status_identifies_godot_ai(current_live)
13441358
and not record_version.is_empty()
1345-
and str(live.get("version", "")) == record_version
1359+
and str(current_live.get("version", "")) == record_version
13461360
):
13471361
return {"proof": "status_matches_record", "pids": listener_pids}
13481362

13491363
return result
13501364

13511365

1352-
func _evaluate_recovery_port_occupant_proof(port: int) -> Dictionary:
1353-
var proof := _evaluate_strong_port_occupant_proof(port)
1366+
## See `_evaluate_strong_port_occupant_proof` for the `live` contract.
1367+
## Threads `live` through the strong-proof delegate so neither helper
1368+
## probes when the caller already knows the port-owner status.
1369+
func _evaluate_recovery_port_occupant_proof(port: int, live: Dictionary = {}) -> Dictionary:
1370+
var proof := _evaluate_strong_port_occupant_proof(port, live)
13541371
if not str(proof.get("proof", "")).is_empty():
13551372
return proof
13561373

1357-
var live := _probe_live_server_status_for_port(port)
1358-
if _live_status_identifies_godot_ai(live):
1374+
var current_live: Dictionary = live if not live.is_empty() else _probe_live_server_status_for_port(port)
1375+
if _live_status_identifies_godot_ai(current_live):
13591376
return {"proof": "status_name", "pids": _find_all_pids_on_port(port)}
13601377

13611378
return {"proof": "", "pids": []}
13621379

13631380

1364-
func _recover_strong_port_occupant(port: int, wait_s: float) -> bool:
1365-
var proof := _evaluate_strong_port_occupant_proof(port)
1381+
## `pre_kill_live` is forwarded to `_evaluate_strong_port_occupant_proof`
1382+
## so the proof helper doesn't re-probe a port the caller already
1383+
## probed. The kill itself invalidates that snapshot — by the time this
1384+
## function returns, the caller must re-probe before consuming any
1385+
## live-status data. Default `{}` lets non-startup callers (none today)
1386+
## still use the historical fresh-probe behavior.
1387+
func _recover_strong_port_occupant(port: int, wait_s: float, pre_kill_live: Dictionary = {}) -> bool:
1388+
var proof := _evaluate_strong_port_occupant_proof(port, pre_kill_live)
13661389
var targets: Array[int] = []
13671390
targets.assign(proof.get("pids", []))
13681391
if targets.is_empty():

test_project/tests/test_plugin_lifecycle.gd

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ class _ProofPlugin extends GodotAiPlugin:
2727
var killed_targets: Array[int] = []
2828
var cleared_record_calls := 0
2929
var waited_calls := 0
30+
var probe_calls := 0
3031

3132
func _find_all_pids_on_port(_port: int) -> Array[int]:
3233
var pids: Array[int] = []
@@ -46,6 +47,7 @@ class _ProofPlugin extends GodotAiPlugin:
4647
return branded_pids.has(pid)
4748

4849
func _probe_live_server_status_for_port(_port: int) -> Dictionary:
50+
probe_calls += 1
4951
return live_status.duplicate()
5052

5153
func _is_port_in_use(_port: int) -> bool:
@@ -1134,6 +1136,98 @@ func test_parse_lsof_pids_ignores_non_numeric_lines() -> void:
11341136
assert_eq(pids[0], 32696)
11351137

11361138

1139+
## --- Live-status threading ------------------------------------------
1140+
##
1141+
## `_start_server` probes once at the top of the spawn body and threads
1142+
## that snapshot through `_recover_strong_port_occupant`,
1143+
## `_evaluate_*_port_occupant_proof`, and `_set_incompatible_server` so
1144+
## downstream decision points reuse the result instead of re-probing.
1145+
## Each probe costs a ~500 ms localhost HTTPClient poll loop, so a user-
1146+
## reported occupied-port trace measured five back-to-back probes
1147+
## dominating the dock's first-paint window.
1148+
1149+
func test_strong_proof_uses_provided_live_without_probing() -> void:
1150+
var plugin := _ProofPlugin.new()
1151+
plugin.listener_pids = [13579] as Array[int]
1152+
plugin.managed_record = {"pid": 0, "version": "2.1.0", "ws_port": 9500}
1153+
var caller_live := {"name": "godot-ai", "version": "2.1.0", "ws_port": 9500, "status_code": 200}
1154+
1155+
var proof := plugin._evaluate_strong_port_occupant_proof(TEST_PORT, caller_live)
1156+
var probe_calls := plugin.probe_calls
1157+
plugin.free()
1158+
1159+
assert_eq(str(proof.get("proof", "")), "status_matches_record")
1160+
assert_eq(probe_calls, 0, "passing live must skip the internal probe")
1161+
1162+
1163+
func test_strong_proof_probes_when_live_omitted() -> void:
1164+
var plugin := _ProofPlugin.new()
1165+
plugin.listener_pids = [13579] as Array[int]
1166+
plugin.managed_record = {"pid": 0, "version": "2.1.0", "ws_port": 9500}
1167+
plugin.live_status = {"name": "godot-ai", "version": "2.1.0", "ws_port": 9500, "status_code": 200}
1168+
1169+
var proof := plugin._evaluate_strong_port_occupant_proof(TEST_PORT)
1170+
var probe_calls := plugin.probe_calls
1171+
plugin.free()
1172+
1173+
assert_eq(str(proof.get("proof", "")), "status_matches_record")
1174+
assert_eq(probe_calls, 1, "omitting live must trigger the internal probe (preserves historical behavior)")
1175+
1176+
1177+
func test_recovery_proof_threads_live_through_strong_call() -> void:
1178+
## `_evaluate_recovery_port_occupant_proof` first delegates to
1179+
## `_evaluate_strong_port_occupant_proof` (one probe site) and on
1180+
## empty proof probes again itself (second probe site). Passing
1181+
## `live` must skip both.
1182+
var plugin := _ProofPlugin.new()
1183+
plugin.listener_pids = [13579] as Array[int]
1184+
plugin.managed_record = {"pid": 0, "version": "", "ws_port": 0}
1185+
var caller_live := {"name": "godot-ai", "version": "2.1.0", "ws_port": 9500, "status_code": 200}
1186+
1187+
var proof := plugin._evaluate_recovery_port_occupant_proof(TEST_PORT, caller_live)
1188+
var probe_calls := plugin.probe_calls
1189+
plugin.free()
1190+
1191+
assert_eq(str(proof.get("proof", "")), "status_name", "fall-through path uses status name proof")
1192+
assert_eq(probe_calls, 0, "threading live must skip both internal probes")
1193+
1194+
1195+
func test_recover_strong_port_occupant_threads_live_to_proof() -> void:
1196+
## `_recover_strong_port_occupant` uses `pre_kill_live` for the
1197+
## ownership-proof determination only. Anything after the kill must
1198+
## re-probe at the caller; the function itself never reuses
1199+
## `pre_kill_live` past `_kill_processes_and_windows_spawn_children`.
1200+
var plugin := _ProofPlugin.new()
1201+
plugin.listener_pids = [13579] as Array[int]
1202+
plugin.managed_record = {"pid": 13579, "version": "2.1.0", "ws_port": 9500}
1203+
plugin.alive_pids = [13579] as Array[int]
1204+
plugin.port_in_use_sequence = [false] as Array[bool]
1205+
var caller_live := {"name": "godot-ai", "version": "2.1.0", "ws_port": 9500, "status_code": 200}
1206+
1207+
var ok := plugin._recover_strong_port_occupant(TEST_PORT, 0.1, caller_live)
1208+
var probe_calls := plugin.probe_calls
1209+
plugin.free()
1210+
1211+
assert_true(ok, "managed-record proof should recover when the port frees")
1212+
assert_eq(probe_calls, 0, "_recover_strong_port_occupant must thread live to its proof helper")
1213+
1214+
1215+
func test_set_incompatible_server_threads_live_to_recovery_proof() -> void:
1216+
## `_set_incompatible_server` already accepts `live`; with the
1217+
## refactor it forwards that snapshot to its internal recovery-proof
1218+
## call so the proof helper doesn't re-probe.
1219+
var plugin := _ProofPlugin.new()
1220+
plugin.listener_pids = [13579] as Array[int]
1221+
plugin.managed_record = {"pid": 0, "version": "", "ws_port": 0}
1222+
var caller_live := {"name": "godot-ai", "version": "2.1.0", "ws_port": 9500, "status_code": 200}
1223+
1224+
plugin._set_incompatible_server(caller_live, "2.3.1", TEST_PORT)
1225+
var probe_calls := plugin.probe_calls
1226+
plugin.free()
1227+
1228+
assert_eq(probe_calls, 0, "_set_incompatible_server must reuse the caller's live for its proof determination")
1229+
1230+
11371231
func _seed_managed_record(pid: int, version: String) -> void:
11381232
var es := EditorInterface.get_editor_settings()
11391233
if es == null:

0 commit comments

Comments
 (0)