Enrich EDITOR_NOT_READY with editor_state + actionable hint (#454)

dsarno · web-flow · commit 1d505eade199 · 2026-05-17T14:12:39.000-07:00
* Enrich EDITOR_NOT_READY with editor_state + actionable hint Telemetry on plugin v2.5.6 surfaced an EDITOR_NOT_READY-loop pattern: 89% of EDITOR_NOT_READY errors over 24h came from two users alone, each retrying node_set_property dozens of times during play mode before eventually calling project_manage(op="stop") and seeing the write succeed immediately. The editor was correctly refusing — the caller had no signal telling it why the editor was unwritable or how to unstick it, so the LLM looped the failing call. Mirror the F-001/PROPERTY_NOT_ON_CLASS approach (PR #436): give the AI caller an explicit, action-oriented recovery hint in the error payload. Now every EDITOR_NOT_READY response carries: data: { editor_state: "playing" | "importing" | "no_scene", retryable: bool, hint: "<one-line recovery instruction naming the exact tool to call>" } Both halves of the system contribute: the Python require_writable_async gate attaches the payload for "playing" / "importing", and the GDScript scene_path.require_edited_scene helper attaches it for "no_scene". The "playing" hint names project_manage(op="stop") so an LLM can chain directly to the recovery call instead of guessing. The data-key name changed from "state" to "editor_state" so it reads unambiguously in the GodotCommandError serialized form (which LLMs often see in tool error responses). * Scope EDITOR_NOT_READY-hint claim to recoverable editor states only Copilot review on #454 caught an overclaim in the contract test's docstring (and the parallel comment in scene_path.gd): they read as if *every* EDITOR_NOT_READY response now carries the structured data block. It doesn't — only the three recoverable editor *states* (playing / importing / no_scene) do, because those are the paths an AI caller can act on. Other EDITOR_NOT_READY callsites in handlers ("EditorFileSystem not available", "AnimationHandler not available", "No 3D viewport available", etc.) describe internal-state failures with no caller-actionable recovery and intentionally stay unenriched. Tighten the docstring and the GDScript comment to say exactly that. No behavior change.
diff --git a/plugin/addons/godot_ai/utils/scene_path.gd b/plugin/addons/godot_ai/utils/scene_path.gd
@@ -74,7 +74,23 @@ static func resolve(scene_path: String, scene_root: Node) -> Node:
 static func require_edited_scene(expected_scene_file: String) -> Dictionary:
 	var root := EditorInterface.get_edited_scene_root()
 	if root == null:
-		return ErrorCodes.make(ErrorCodes.EDITOR_NOT_READY, "No scene open")
+		# Mirrors the structured payload that the Python-side require_writable
+		# gate attaches for `playing` / `importing`. Together these cover the
+		# three recoverable editor *states* (playing / importing / no_scene)
+		# — the EDITOR_NOT_READY paths an AI caller can act on. Other
+		# EDITOR_NOT_READY callsites describing internal-state failures
+		# ("EditorFileSystem not available" etc.) intentionally don't carry
+		# this payload because there's no useful caller hint to give.
+		var err := ErrorCodes.make(ErrorCodes.EDITOR_NOT_READY, "No scene open")
+		err["error"]["data"] = {
+			"editor_state": "no_scene",
+			"retryable": false,
+			"hint": (
+				"No scene is open. Call scene_open with a scene path "
+				+ "(e.g. \"res://main.tscn\") before issuing scene-mutating tools."
+			),
+		}
+		return err
 	if not expected_scene_file.is_empty() and root.scene_file_path != expected_scene_file:
 		var actual := root.scene_file_path if not root.scene_file_path.is_empty() else "<unsaved>"
 		return ErrorCodes.make(
diff --git a/src/godot_ai/handlers/_readiness.py b/src/godot_ai/handlers/_readiness.py
@@ -10,12 +10,32 @@
 if TYPE_CHECKING:
     from godot_ai.runtime.direct import DirectRuntime
 
-# (message, retryable). Retryable means the condition clears on its own
+# (message, retryable, hint). Retryable means the condition clears on its own
 # (Godot finishes reimporting); non-retryable requires the caller to change
-# state (stop the game).
-_READINESS_INFO: dict[str, tuple[str, bool]] = {
-    "importing": ("Editor is importing resources — try again shortly", True),
-    "playing": ("Editor is in play mode — call project_stop to stop the game, then retry", False),
+# state (stop the game). The ``hint`` is a one-line, action-oriented sentence
+# surfaced to AI callers via the error ``data`` payload — its job is to tell
+# an LLM exactly which tool call (or wait) breaks the stall, so it stops
+# looping the failing write. See PR for the F-EDITOR-NOT-READY-LOOP fix
+# (telemetry showed two users alone producing 89% of EDITOR_NOT_READY
+# errors on plugin v2.5.6, all from caller-side retry loops during
+# ``playing``).
+_READINESS_INFO: dict[str, tuple[str, bool, str]] = {
+    "importing": (
+        "Editor is importing resources — try again shortly",
+        True,
+        (
+            "Editor is importing assets. Wait briefly and retry — "
+            "readiness will update via the response envelope."
+        ),
+    ),
+    "playing": (
+        'Editor is in play mode — call project_manage(op="stop") to stop the game, then retry',
+        False,
+        (
+            'Editor is playing the scene. Call project_manage(op="stop") '
+            "(or wait for the user to stop the game) before retrying writes."
+        ),
+    ),
 }
 
 # Every readiness value the plugin can emit. Derived from the blocking-state
@@ -87,9 +107,12 @@ async def require_writable_async(runtime: "DirectRuntime") -> None:
     is open.  If no session exists, this is a no-op; the downstream
     ``send_command`` will raise on its own.
 
-    The raised error carries ``data={"retryable": bool, "state": str}`` so
-    callers can distinguish a transient ``importing`` window (retry with
-    backoff) from a terminal ``playing`` state (stop the game first).
+    The raised error carries ``data={"editor_state": str, "retryable": bool,
+    "hint": str}`` so callers can distinguish a transient ``importing`` window
+    (retry with backoff) from a terminal ``playing`` state (stop the game
+    first) AND get an explicit one-line recovery instruction. The hint is
+    what stops the EDITOR_NOT_READY-loop pattern: without it, AI callers
+    just retry the failing write until the user notices.
     """
     session = runtime.get_active_session()
     if session is None:
@@ -121,9 +144,13 @@ def _enforce_blocking_state(session: "Session | None") -> None:
     ## Hoisting the import to module top would re-establish the cycle.
     from godot_ai.godot_client.client import GodotCommandError
 
-    message, retryable = info
+    message, retryable, hint = info
     raise GodotCommandError(
         code=ErrorCode.EDITOR_NOT_READY,
         message=message,
-        data={"retryable": retryable, "state": session.readiness},
+        data={
+            "editor_state": session.readiness,
+            "retryable": retryable,
+            "hint": hint,
+        },
     )
diff --git a/tests/unit/test_editor_not_ready_hint_contract.py b/tests/unit/test_editor_not_ready_hint_contract.py
@@ -0,0 +1,63 @@
+"""Source-level contract for the structured EDITOR_NOT_READY payload.
+
+Telemetry on plugin v2.5.6 showed 89% of EDITOR_NOT_READY errors came from
+two users in retry loops during ``playing`` — the LLM saw a bare error and
+kept guessing. The fix attaches a ``data`` block with
+``editor_state``/``retryable``/``hint`` to the EDITOR_NOT_READY paths that
+correspond to recoverable editor *states* — the Python ``require_writable``
+gate (``playing``/``importing``) and the GDScript ``require_edited_scene``
+helper (``no_scene``). These are the paths AI callers loop on because
+there's an obvious recovery action.
+
+Other EDITOR_NOT_READY callsites in handlers (e.g. "EditorFileSystem not
+available", "AnimationHandler not available", "No 3D viewport available")
+describe internal-state failures with no caller-actionable recovery and
+are intentionally left unenriched — adding a fabricated hint there would
+mislead more than it would help.
+
+The Python gate is covered behaviorally by ``test_readiness.py``. This
+file locks the GDScript-side ``no_scene`` branch in ``utils/scene_path.gd``
+— that path can't be exercised from the live GDScript test runner because
+the test harness always has a scene open, so we verify the source attaches
+the structured payload.
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+PLUGIN_ROOT = Path(__file__).resolve().parents[2] / "plugin" / "addons" / "godot_ai"
+
+
+def test_scene_path_no_scene_error_carries_editor_state_payload() -> None:
+    source = (PLUGIN_ROOT / "utils" / "scene_path.gd").read_text(encoding="utf-8")
+    # Locate the no-scene branch by its message so we don't false-positive
+    # on the EDITED_SCENE_MISMATCH path further down.
+    no_scene_marker = '"No scene open"'
+    assert no_scene_marker in source
+    branch_start = source.index(no_scene_marker)
+    # The data attachment must follow within the same branch (next ~25 lines).
+    branch = source[branch_start : branch_start + 600]
+    assert '"editor_state": "no_scene"' in branch, (
+        "no_scene error must carry editor_state for the AI-caller hint payload"
+    )
+    assert '"retryable": false' in branch, "no_scene is terminal until scene_open is called"
+    assert '"hint":' in branch
+    # The hint must name the exact recovery tool so the LLM doesn't guess.
+    assert "scene_open" in branch
+
+
+def test_python_gate_payload_uses_editor_state_key_not_legacy_state() -> None:
+    """The data shape changed from ``state`` to ``editor_state`` to match
+    the GDScript-side payload. Both halves must stay in sync — same key
+    name, same shape, regardless of which non-writable condition triggers
+    the error."""
+    source = (
+        Path(__file__).resolve().parents[2] / "src" / "godot_ai" / "handlers" / "_readiness.py"
+    ).read_text(encoding="utf-8")
+    assert '"editor_state": session.readiness' in source
+    assert '"hint": hint' in source
+    # Make sure the legacy key didn't survive a careless refactor.
+    assert '"state":' not in source.split("_enforce_blocking_state")[1].split("def ")[0], (
+        "the data block must use 'editor_state' (mirrors GDScript no_scene payload), not 'state'"
+    )
diff --git a/tests/unit/test_readiness.py b/tests/unit/test_readiness.py
@@ -202,11 +202,17 @@ async def test_require_writable_async_rejects_importing_after_probe_confirms():
     with pytest.raises(GodotCommandError) as exc_info:
         await require_writable_async(runtime)
     assert exc_info.value.code == ErrorCode.EDITOR_NOT_READY
-    assert exc_info.value.data == {"retryable": True, "state": "importing"}
+    data = exc_info.value.data
+    assert data["editor_state"] == "importing"
+    assert data["retryable"] is True
+    # The hint must be an explicit, action-oriented instruction so AI callers
+    # don't loop the failing write. See F-EDITOR-NOT-READY-LOOP fix.
+    assert "Wait" in data["hint"] or "wait" in data["hint"]
+    assert "importing" in data["hint"].lower()
     # Structured hints are also embedded in the serialized form so MCP
     # clients that only see str(exc) can still distinguish retryable cases.
     assert "retryable=True" in str(exc_info.value)
-    assert "state=importing" in str(exc_info.value)
+    assert "editor_state=importing" in str(exc_info.value)
     assert client.probe_calls == 1
 
 
@@ -216,11 +222,19 @@ async def test_require_writable_async_rejects_playing_after_probe_confirms():
         await require_writable_async(runtime)
     assert exc_info.value.code == ErrorCode.EDITOR_NOT_READY
     assert "play mode" in exc_info.value.message
-    # The message names the recovery tool so MCP clients don't have to
-    # infer "how do I unstick this" from the state string alone.
-    assert "project_stop" in exc_info.value.message
-    assert exc_info.value.data == {"retryable": False, "state": "playing"}
+    # The message names the recovery tool (the rolled-up form) so MCP
+    # clients don't have to infer "how do I unstick this" from the state
+    # string alone.
+    assert 'project_manage(op="stop")' in exc_info.value.message
+    data = exc_info.value.data
+    assert data["editor_state"] == "playing"
+    assert data["retryable"] is False
+    # Hint must name the exact recovery call — this is the F-EDITOR-NOT-READY-
+    # LOOP fix: cure the 89%-of-EDITOR_NOT_READYs-from-2-users retry pattern
+    # by telling the LLM exactly which tool stops the stall.
+    assert 'project_manage(op="stop")' in data["hint"]
     assert "retryable=False" in str(exc_info.value)
+    assert "editor_state=playing" in str(exc_info.value)
     assert client.probe_calls == 1
 
 
@@ -235,7 +249,10 @@ async def test_require_writable_async_probe_failure_falls_back_to_cached_value()
     with pytest.raises(GodotCommandError) as exc_info:
         await require_writable_async(runtime)
     assert exc_info.value.code == ErrorCode.EDITOR_NOT_READY
-    assert exc_info.value.data == {"retryable": False, "state": "playing"}
+    data = exc_info.value.data
+    assert data["editor_state"] == "playing"
+    assert data["retryable"] is False
+    assert 'project_manage(op="stop")' in data["hint"]
     assert client.probe_calls == 1