Skip to content

Commit ff2623e

Browse files
dsarnoclaude
andcommitted
game_eval: per-request eval tracking + token-correlated runtime errors (#490 review)
Addresses two regressions in the runtime-error detection from review: - False-fail on unrelated errors (P2a): the detector used a process-wide script-error counter, so any background GDScript error during an in-flight (awaiting) eval would fail that eval with someone else's error text. Now each eval's wrapper has a uniquely named inner function (`_mcp_run_<n>`), the game logger records each script error's backtrace function names, and an error is only attributed to an eval when that eval's function appears in the error's stack (past the eval's pre-run baseline). Unrelated errors lack the token and are ignored. - Overlapping evals (P2b): the single `_eval_request_id`/`_eval_node` slot was clobbered when a second deferred game_eval arrived while the first awaited, causing misattribution or lost detection. Replaced with a per-request `_inflight_evals` dict, preserving #488's concurrent-eval support; the token match keeps siblings from cross-attributing. Also drops the per-frame `_process` runtime-error hook (the editor probe and #488's in-flight poll loop already cover detection focused + backgrounded), so nothing extra runs on the game's main thread per frame. GDScript tests extended: logger token lookup (find_script_error_since), and game_helper coverage for unrelated-error isolation (P2a) and overlapping-eval isolation (P2b). Full GDScript suite green (1362). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
1 parent 00503e1 commit ff2623e

3 files changed

Lines changed: 218 additions & 173 deletions

File tree

plugin/addons/godot_ai/runtime/game_helper.gd

Lines changed: 71 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -36,13 +36,16 @@ var _logger_attached := false
3636
## frames at FLUSH_BATCH_LIMIT per frame rather than blasting the whole
3737
## queue in a single _process tick.
3838
var _pending_outbound: Array = []
39-
## #490: the in-flight eval. The editor's eval_check probe (and, when the game
40-
## is focused, the _process fast path) read these to report a runtime error
41-
## that aborted execute() before _handle_eval could reply. _eval_request_id
42-
## is "" when no eval is running.
43-
var _eval_request_id := ""
44-
var _eval_node: Node = null
45-
var _eval_script_err_baseline := 0
39+
## #490: in-flight evals, keyed by request_id (multiple deferred game_evals
40+
## can run at once). Each entry: {node:Node, token:String, baseline:int}.
41+
## `token` names this eval's unique wrapper function so a runtime error is
42+
## attributed only to the eval that actually raised it — not an unrelated
43+
## background game error, and not a sibling overlapping eval. `baseline` is the
44+
## logger's script-error seq just before this eval ran. The editor's eval_check
45+
## probe (and #488's in-flight poll loop, when the game is focused) consult
46+
## these to report a runtime error that aborted execute() before the reply.
47+
var _inflight_evals: Dictionary = {}
48+
var _eval_token_counter: int = 0
4649

4750

4851
func _ready() -> void:
@@ -89,12 +92,6 @@ func _process(_delta: float) -> void:
8992
## print loop can't stall the game by shoving thousands of entries
9093
## through the debugger packet path in a single tick. Surplus stays in
9194
## `_pending_outbound` and bleeds out across subsequent frames.
92-
## #490: best-effort fast path — when the game is focused and its idle
93-
## loop ticks, report an eval-aborting runtime error a frame after it
94-
## happens. When the game is backgrounded this never runs (the idle loop
95-
## freezes), so the editor's eval_check probe → _handle_eval_check is the
96-
## reliable path. Cheap no-op when no eval is in flight.
97-
_try_report_eval_runtime_error()
9895
if not _logger_attached or _logger == null:
9996
return
10097
if not EngineDebugger.is_active():
@@ -569,31 +566,30 @@ func _handle_eval(data: Array) -> void:
569566
_reply_eval_error(request_id, "No code provided")
570567
return
571568

572-
## Wrap user code so we can capture a return value and a completion flag.
573-
## Uses await so user code can use `await` internally. __mcp_finished marks
574-
## a clean finish so a runtime error logged afterwards can't be mis-reported
575-
## against this request. (#490)
569+
## Wrap user code in an execute() coroutine (so it can `await` internally)
570+
## whose inner function is uniquely named per eval. A runtime error's
571+
## backtrace then carries `_mcp_run_<token>`, letting us attribute it to
572+
## THIS eval — not an unrelated background game error, and not a sibling
573+
## overlapping eval. (#490)
574+
_eval_token_counter += 1
575+
var token := str(_eval_token_counter)
576+
var run_fn := "_mcp_run_%s" % token
576577
var script_source := (
577578
"extends Node\n"
578-
+ "var __mcp_finished := false\n"
579579
+ "func execute():\n"
580-
+ "\tvar __result = await _run()\n"
581-
+ "\t__mcp_finished = true\n"
582-
+ "\treturn __result\n\n"
583-
+ "func _run():\n"
580+
+ "\treturn await %s()\n\n" % run_fn
581+
+ "func %s():\n" % run_fn
584582
+ _indent_eval_code(code)
585583
)
586584

587-
## #490: mark this eval in-flight and snapshot the script-error counter
588-
## BEFORE reload()/execute(). In a debug build a parse error aborts
589-
## reload() and a runtime error aborts execute() — either way this
590-
## function may never reach its reply. The editor infers a compile error
591-
## from the missing mcp:eval_compiled beacon, and reports a runtime error
592-
## (via its eval_check probe / the _process fast path) when the
593-
## script-error counter advances past this baseline.
594-
_eval_request_id = request_id
595-
_eval_node = null
596-
_eval_script_err_baseline = _logger.script_error_seq() if _logger != null else 0
585+
## Snapshot the logger's script-error seq BEFORE running so we only attribute
586+
## errors raised by this eval. In a debug build a parse error aborts reload()
587+
## and a runtime error aborts execute() — either way this function may never
588+
## reach its reply: the editor infers a compile error from the missing
589+
## mcp:eval_compiled beacon, and a runtime error is reported (via the
590+
## eval_check probe / the in-flight poll loop) once a logged error past this
591+
## baseline carries this eval's token.
592+
var baseline: int = _logger.script_error_seq() if _logger != null else 0
597593

598594
var script: GDScript = GDScript.new()
599595
script.source_code = script_source
@@ -604,57 +600,57 @@ func _handle_eval(data: Array) -> void:
604600
## editor process (handler unit tests), where reload() does return.
605601
var err: int = script.reload()
606602
if err != OK:
607-
_eval_request_id = ""
608603
_reply_eval_error(request_id,
609604
"Failed to compile GDScript (error %d). Check syntax." % err)
610605
return
611606

612-
## Compiled OK — tell the editor so it doesn't flag a compile error.
607+
## Compiled OK — tell the editor so its grace timer doesn't flag a compile
608+
## error and so it begins probing for a runtime error.
613609
EngineDebugger.send_message("mcp:eval_compiled", [request_id])
614610

615611
var temp_node := Node.new()
616612
temp_node.set_script(script)
617613
temp_node.process_mode = Node.PROCESS_MODE_ALWAYS
618-
_eval_node = temp_node
619614
add_child(temp_node)
620615

621616
if not temp_node.has_method("execute"):
622-
_eval_request_id = ""
623-
_eval_node = null
624617
temp_node.queue_free()
625618
_reply_eval_error(request_id, "Internal error: eval wrapper is missing execute().")
626619
return
627620

628-
## Drive execute() as a fire-and-forget coroutine that records its
629-
## outcome into `holder`, then poll frames until it finishes or the
630-
## deadline passes. A plain `await temp_node.execute()` has no escape
631-
## hatch: if user code never returns, we never reach the reply/cleanup
621+
## Register in-flight BEFORE running: a runtime error aborts execute() (and
622+
## may unwind this function) before we could record it afterward, and the
623+
## editor probe / poll loop need the entry to attribute and report the error.
624+
_inflight_evals[request_id] = {"node": temp_node, "token": token, "baseline": baseline}
625+
626+
## Drive execute() as a fire-and-forget coroutine that records its outcome
627+
## into `holder`, then poll frames until it finishes or the deadline passes
628+
## (#488's hung-await guard). A plain `await temp_node.execute()` has no
629+
## escape hatch: if user code never returns, we never reach the reply/cleanup
632630
## below and the request hangs with the node leaked.
633631
var holder := {"done": false, "value": null, "abandoned": false}
634632
_drive_eval(temp_node, holder)
635633

636634
var tree := get_tree()
637635
var deadline_ms := int(EVAL_TIMEOUT_SEC * 1000.0)
638636
var start_ms := Time.get_ticks_msec()
639-
## process_frame fires every idle frame regardless of tree pause, so this
640-
## deadline still elapses while the game is paused.
641637
while not holder["done"] and (Time.get_ticks_msec() - start_ms) < deadline_ms:
638+
## #490 focused fast path: a runtime error aborts _drive_eval (holder
639+
## never completes), so check each frame whether THIS eval's token now
640+
## appears in a logged error and report it immediately. (Backgrounded,
641+
## this loop is frozen and the editor probe does the same job.)
642+
if _try_report_eval_runtime_error(request_id):
643+
holder["abandoned"] = true
644+
return
642645
await tree.process_frame
643646

644647
if not holder["done"]:
645-
## Still running past the deadline. Mark abandoned so a late
646-
## completion in _drive_eval drops its result and frees the node.
648+
## Past the 8s deadline. Disambiguate a runtime error (its token is in a
649+
## logged error) from a genuine hung await before the generic timeout.
647650
holder["abandoned"] = true
648-
## #490: a runtime error aborts _drive_eval the same way a hung await
649-
## does — holder never completes. Disambiguate via the script-error
650-
## counter: if it advanced, report the real runtime error (with text +
651-
## line) instead of the generic timeout. (Usually the editor probe or
652-
## the _process fast path already reported it well before this 8s
653-
## deadline; this is the last-resort path.)
654651
if _try_report_eval_runtime_error(request_id):
655652
return
656-
_eval_request_id = ""
657-
_eval_node = null
653+
_inflight_evals.erase(request_id)
658654
if is_instance_valid(temp_node):
659655
remove_child(temp_node)
660656
_reply_eval_error(request_id,
@@ -664,12 +660,8 @@ func _handle_eval(data: Array) -> void:
664660
% int(EVAL_TIMEOUT_SEC))
665661
return
666662

667-
## Reached only if execute() did NOT abort. Clear the in-flight marker
668-
## first so the _process fast path / editor probe can't double-report,
669-
## then reply.
670-
if _eval_request_id == request_id:
671-
_eval_request_id = ""
672-
_eval_node = null
663+
## Clean finish.
664+
_inflight_evals.erase(request_id)
673665
temp_node.queue_free()
674666
_reply_eval_response(request_id, holder["value"])
675667

@@ -706,34 +698,30 @@ func _reply_eval_response(request_id: String, value: Variant) -> void:
706698
[request_id, JSON.stringify(_variant_to_json(value))])
707699

708700

709-
## #490: report a runtime error that aborted the in-flight eval before it
710-
## could reply, using the logger's captured text + resolved line. Called two
711-
## ways: every frame from _process (the fast path, only effective when the
712-
## game is focused and its idle loop ticks), and on demand from
713-
## _handle_eval_check when the editor probes — the ONLY reliable path when a
714-
## backgrounded game's idle loop is frozen, because the debugger capture
715-
## callback still runs. Gated on the logger's ERROR_TYPE_SCRIPT counter so
716-
## push_error()/push_warning() (types 0/1) can't trip it, and skipped once the
717-
## eval node reports __mcp_finished so an error logged after a clean finish
718-
## can't misfire. `request_id_filter` (when non-empty) restricts reporting to
719-
## that request. Returns true if it reported.
720-
func _try_report_eval_runtime_error(request_id_filter := "") -> bool:
721-
if _eval_request_id == "" or _logger == null:
722-
return false
723-
if request_id_filter != "" and _eval_request_id != request_id_filter:
701+
## #490: if a logged script error past THIS eval's baseline carries its unique
702+
## wrapper-function token, a runtime error aborted it before it could reply —
703+
## report it with the real text + line. Returns true if it reported. Called
704+
## from the editor's eval_check probe (the reliable path when a backgrounded
705+
## game's idle loop is frozen — the debugger capture callback still runs) and
706+
## from _handle_eval's poll loop (the focused fast path). Token + baseline
707+
## matching means an unrelated background error, or a sibling overlapping
708+
## eval's error, can never fail this request.
709+
func _try_report_eval_runtime_error(request_id: String) -> bool:
710+
if _logger == null:
724711
return false
725-
if _eval_node != null and is_instance_valid(_eval_node) and _eval_node.get("__mcp_finished"):
712+
var entry = _inflight_evals.get(request_id)
713+
if entry == null:
726714
return false
727-
if _logger.script_error_seq() <= _eval_script_err_baseline:
715+
var text: String = _logger.find_script_error_since(
716+
int(entry["baseline"]), "_mcp_run_%s" % str(entry["token"]))
717+
if text.is_empty():
728718
return false
729-
var rid := _eval_request_id
730-
var text: String = _logger.last_script_error_text()
731-
_eval_request_id = ""
732-
if _eval_node != null and is_instance_valid(_eval_node):
733-
_eval_node.queue_free()
734-
_eval_node = null
719+
_inflight_evals.erase(request_id)
720+
var node: Node = entry["node"]
721+
if node != null and is_instance_valid(node):
722+
node.queue_free()
735723
if EngineDebugger.is_active():
736-
EngineDebugger.send_message("mcp:eval_runtime_error", [rid, text])
724+
EngineDebugger.send_message("mcp:eval_runtime_error", [request_id, text])
737725
return true
738726

739727

plugin/addons/godot_ai/runtime/loggers/game_logger.gd

Lines changed: 45 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -32,16 +32,20 @@ const _LogBacktrace := preload("res://addons/godot_ai/utils/log_backtrace.gd")
3232

3333
var _pending: Array = []
3434
var _mutex := Mutex.new()
35-
## #490: monotonic count of GDScript runtime (script-type) errors seen this
36-
## run, plus the text of the most recent one. game_helper snapshots the
37-
## count before running eval code and compares each frame to detect a
38-
## runtime error that aborted the eval before it could reply. Gated on
39-
## ERROR_TYPE_SCRIPT (2) so push_error()/push_warning() (types 0/1) don't
40-
## count — otherwise a benign push_error in eval code would be misreported
41-
## as a fatal error. Mutex-guarded: _log_error can fire from any thread.
35+
## #490: a monotonic sequence + a small ring of recent GDScript runtime
36+
## (script-type) errors, each with its text AND the function names in its
37+
## backtrace. game_helper uses this to attribute a runtime error to the
38+
## *specific* eval that raised it: each eval's wrapper has a uniquely named
39+
## inner function, and game_helper asks find_script_error_since() whether any
40+
## error past its pre-eval baseline carries that function in its stack. This
41+
## avoids failing an eval on an unrelated background game error that merely
42+
## advanced a global counter, and keeps overlapping evals from cross-
43+
## attributing. Gated on ERROR_TYPE_SCRIPT (2) so push_error()/push_warning()
44+
## (types 0/1) never count. Mutex-guarded: _log_error can fire from any thread.
4245
const _ERROR_TYPE_SCRIPT := 2
46+
const _MAX_RECENT_SCRIPT_ERRORS := 64
4347
var _script_error_seq: int = 0
44-
var _last_script_error_text: String = ""
48+
var _recent_script_errors: Array = []
4549

4650

4751
func _log_message(message: String, error: bool) -> void:
@@ -73,9 +77,19 @@ func _log_error(
7377
var text: String = "%s (%s)" % [resolved.message, loc] if not loc.is_empty() else resolved.message
7478
_append(resolved.level, text)
7579
if error_type == _ERROR_TYPE_SCRIPT:
80+
## Collect every function name in the first non-empty backtrace so
81+
## game_helper can match its eval's uniquely named wrapper function.
82+
var funcs := PackedStringArray()
83+
for bt in script_backtraces:
84+
if bt != null and bt.get_frame_count() > 0:
85+
for i in bt.get_frame_count():
86+
funcs.append(bt.get_frame_function(i))
87+
break
7688
_mutex.lock()
7789
_script_error_seq += 1
78-
_last_script_error_text = text
90+
_recent_script_errors.append({"seq": _script_error_seq, "text": text, "funcs": funcs})
91+
if _recent_script_errors.size() > _MAX_RECENT_SCRIPT_ERRORS:
92+
_recent_script_errors.remove_at(0)
7993
_mutex.unlock()
8094

8195

@@ -103,8 +117,8 @@ func has_pending() -> bool:
103117

104118

105119
## #490: monotonic count of script-type runtime errors seen this run.
106-
## game_helper snapshots this before eval and compares after to detect a
107-
## runtime error that aborted execute(). Mutex-guarded.
120+
## game_helper snapshots this before an eval to use as the `since_seq`
121+
## baseline for find_script_error_since(). Mutex-guarded.
108122
func script_error_seq() -> int:
109123
_mutex.lock()
110124
var v := _script_error_seq
@@ -116,6 +130,25 @@ func script_error_seq() -> int:
116130
## script-type runtime error, or "" if none seen this run.
117131
func last_script_error_text() -> String:
118132
_mutex.lock()
119-
var v := _last_script_error_text
133+
var v: String = _recent_script_errors[-1]["text"] if not _recent_script_errors.is_empty() else ""
120134
_mutex.unlock()
121135
return v
136+
137+
138+
## #490: text of the most recent script error with seq > since_seq whose
139+
## backtrace includes `function_name`, or "" if none. Lets game_helper
140+
## attribute a runtime error to the exact eval whose uniquely named wrapper
141+
## function appears in the stack — ignoring unrelated game errors and errors
142+
## from before the eval started. Mutex-guarded.
143+
func find_script_error_since(since_seq: int, function_name: String) -> String:
144+
_mutex.lock()
145+
var found := ""
146+
for i in range(_recent_script_errors.size() - 1, -1, -1):
147+
var rec: Dictionary = _recent_script_errors[i]
148+
if int(rec["seq"]) <= since_seq:
149+
break
150+
if (rec["funcs"] as PackedStringArray).has(function_name):
151+
found = rec["text"]
152+
break
153+
_mutex.unlock()
154+
return found

0 commit comments

Comments
 (0)