Gate on MCP load; use actual model name

angelovstanton · angelovstanton · commit 397401e3f92a · 2026-04-12T01:59:47.000+03:00
Wait up to 30s for MCP servers to finish loading before sending prompts so the SDK's tool catalog includes MCP-advertised tools (best-effort, falls back on timeout). Remove the previous diagnostic tool-list snapshot logic and its ListSessionToolsAsync helper. Use the concrete provider model name (e.g. "claude-sonnet-4.5") from config.Ai.Providers for grounding/metadata and user-described generator records instead of the wrapper agent.ProviderName.
diff --git a/src/Spectra.CLI/Agent/Copilot/CopilotService.cs b/src/Spectra.CLI/Agent/Copilot/CopilotService.cs
@@ -109,20 +109,6 @@ public async Task<CopilotSession> CreateCriticSessionAsync(
         return await _client.CreateSessionAsync(config, ct);
     }
 
-    /// <summary>
-    /// Lists every tool the Copilot SDK currently exposes for the given model,
-    /// including local AIFunctions, built-in skills, and tools advertised by
-    /// attached MCP servers. Used by GenerationAgent to write a one-shot
-    /// snapshot to .spectra-debug.log so we can verify whether testimize tools
-    /// actually reached the agent's tool list at session start.
-    /// </summary>
-    public async Task<GitHub.Copilot.SDK.Rpc.ToolsListResult> ListSessionToolsAsync(
-        string model,
-        CancellationToken ct = default)
-    {
-        return await _client.Rpc.Tools.ListAsync(model, ct);
-    }
-
     /// <summary>
     /// Checks if the Copilot CLI binary is available.
     /// </summary>
diff --git a/src/Spectra.CLI/Agent/Copilot/GenerationAgent.cs b/src/Spectra.CLI/Agent/Copilot/GenerationAgent.cs
@@ -277,47 +277,6 @@ public async Task<GenerationResult> GenerateTestsAsync(
             // the actual error message. Generation proceeds immediately
             // because the SDK doesn't block session creation on MCP load.
 
-            // Diagnostic snapshot: once MCP servers finish loading, ask the
-            // SDK for the merged tool list and write each tool name to the
-            // debug log. Lets us answer "did testimize tools actually reach
-            // the agent's tool list?" by looking at .spectra-debug.log
-            // instead of guessing from tool-call traces. Best-effort only —
-            // failures here must not block generation.
-            if (mcpServers is not null)
-            {
-                try
-                {
-                    var loaded = await Task.WhenAny(
-                        mcpServersLoadedTcs.Task,
-                        Task.Delay(TimeSpan.FromSeconds(10), ct));
-                    var loadedInTime = loaded == mcpServersLoadedTcs.Task;
-                    DebugLog($"TOOL LIST waiting_for_mcp_loaded={loadedInTime}");
-
-                    var modelName = ProviderMapping.GetModelName(_provider);
-                    var toolList = await service.ListSessionToolsAsync(modelName, ct);
-                    var toolCount = toolList?.Tools?.Count ?? 0;
-                    DebugLog($"TOOL LIST count={toolCount} model={modelName}");
-                    if (toolList?.Tools is { } toolItems)
-                    {
-                        var testimizeHits = 0;
-                        foreach (var tool in toolItems)
-                        {
-                            var displayName = !string.IsNullOrEmpty(tool.NamespacedName)
-                                ? tool.NamespacedName
-                                : tool.Name ?? "?";
-                            DebugLog($"TOOL LIST item={displayName}");
-                            if (displayName.IndexOf("testimize", StringComparison.OrdinalIgnoreCase) >= 0)
-                                testimizeHits++;
-                        }
-                        DebugLog($"TOOL LIST testimize_tools_visible={testimizeHits}");
-                    }
-                }
-                catch (Exception ex)
-                {
-                    DebugLog($"TOOL LIST error={ex.GetType().Name}: {ex.Message}");
-                }
-            }
-
             // Build the combined prompt with system instructions and user request.
             // The profile format (JSON schema sent to the AI) is resolved from
             // profiles/_default.yaml on disk if present, else the embedded default.
@@ -342,6 +301,25 @@ public async Task<GenerationResult> GenerateTestsAsync(
             // Slower / reasoning models may need 10–20+ minutes per batch.
             var timeoutMinutes = Math.Max(1, _config.Ai.GenerationTimeoutMinutes);
             var batchTimeout = TimeSpan.FromMinutes(timeoutMinutes);
+            // v1.48.2: gate the prompt send on MCP servers being fully loaded.
+            // Without this, the SDK builds the model's tool catalog at session
+            // start before testimize finishes its initialize handshake (~600ms
+            // typical), so testimize tools are absent from the catalog the
+            // model sees. The .spectra-debug.log under 1.48.1 showed BATCH
+            // START firing ~600–700ms before TESTIMIZE LOADED on every batch.
+            // Best-effort with a 30s ceiling — if loading is slow, we still
+            // send the prompt rather than blocking generation indefinitely.
+            if (mcpServers is not null)
+            {
+                var gateSw = System.Diagnostics.Stopwatch.StartNew();
+                var winner = await Task.WhenAny(
+                    mcpServersLoadedTcs.Task,
+                    Task.Delay(TimeSpan.FromSeconds(30), ct));
+                gateSw.Stop();
+                var result = winner == mcpServersLoadedTcs.Task ? "loaded" : "timeout";
+                DebugLog($"GATE mcp_loaded waited={gateSw.Elapsed.TotalSeconds:F2}s result={result}");
+            }
+
             var sw = System.Diagnostics.Stopwatch.StartNew();
             DebugLogAi($"BATCH START requested={requestedCount} timeout={timeoutMinutes}min", null, null, estimated: false);
             _onStatus?.Invoke($"Starting AI generation ({requestedCount} tests, timeout {timeoutMinutes} min)...");
diff --git a/src/Spectra.CLI/Commands/Generate/GenerateHandler.cs b/src/Spectra.CLI/Commands/Generate/GenerateHandler.cs
@@ -577,7 +577,11 @@ private async Task<int> ExecuteDirectModeAsync(
         var criteriaContext = await LoadCriteriaContextAsync(currentDir, suite, config, ct);
 
         // --- Batch generation loop ---
-        var generatorModel = agent.ProviderName;
+        // v1.48.2: write the actual model name (e.g. "claude-sonnet-4.5") to
+        // the grounding frontmatter, not the agent's ProviderName which is
+        // always "copilot-sdk (github-models)" since the unification under
+        // the Copilot SDK runtime. The critic field already does this.
+        var generatorModel = config.Ai.Providers?.FirstOrDefault(p => p.Enabled)?.Model ?? agent.ProviderName;
         var writer = new TestFileWriter();
         var allWrittenTests = new List<TestCase>();
         var allFilesCreated = new List<string>();
@@ -1259,7 +1263,9 @@ await _progress.StatusAsync("Generating tests...", async () =>
                 // Verify tests against documentation if critic is configured
                 var verificationResults = new List<(TestCase Test, VerificationResult Result)>();
                 var testsToWrite = result.Tests.ToList();
-                var generatorModel = agent.ProviderName;
+                // v1.48.2: see comment in GenerateAndVerifyAsync — write the
+                // actual model name, not the agent's wrapper ProviderName.
+                var generatorModel = config.Ai.Providers?.FirstOrDefault(p => p.Enabled)?.Model ?? agent.ProviderName;
 
                 if (ShouldVerify(config.Ai.Critic))
                 {
diff --git a/src/Spectra.CLI/Commands/Generate/UserDescribedGenerator.cs b/src/Spectra.CLI/Commands/Generate/UserDescribedGenerator.cs
@@ -147,7 +147,7 @@ criteria IDs in the test's `criteria` frontmatter field.
             {
                 Verdict = VerificationVerdict.Manual,
                 Score = 1.0,
-                Generator = agent.ProviderName,
+                Generator = config.Ai.Providers?.FirstOrDefault(p => p.Enabled)?.Model ?? agent.ProviderName,
                 Critic = "user-described",
                 VerifiedAt = DateTimeOffset.UtcNow
             }

Original file line number	Diff line number	Diff line change
@@ -147,7 +147,7 @@ criteria IDs in the test's `criteria` frontmatter field.
`147`	`147`	`{`
`148`	`148`	`Verdict = VerificationVerdict.Manual,`
`149`	`149`	`Score = 1.0,`
`150`		`- Generator = agent.ProviderName,`
	`150`	`+ Generator = config.Ai.Providers?.FirstOrDefault(p => p.Enabled)?.Model ?? agent.ProviderName,`
`151`	`151`	`Critic = "user-described",`
`152`	`152`	`VerifiedAt = DateTimeOffset.UtcNow`
`153`	`153`	`}`