Skip to content

Commit 397401e

Browse files
Gate on MCP load; use actual model name
Wait up to 30s for MCP servers to finish loading before sending prompts so the SDK's tool catalog includes MCP-advertised tools (best-effort, falls back on timeout). Remove the previous diagnostic tool-list snapshot logic and its ListSessionToolsAsync helper. Use the concrete provider model name (e.g. "claude-sonnet-4.5") from config.Ai.Providers for grounding/metadata and user-described generator records instead of the wrapper agent.ProviderName.
1 parent fca0497 commit 397401e

File tree

4 files changed

+28
-58
lines changed

4 files changed

+28
-58
lines changed

src/Spectra.CLI/Agent/Copilot/CopilotService.cs

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -109,20 +109,6 @@ public async Task<CopilotSession> CreateCriticSessionAsync(
109109
return await _client.CreateSessionAsync(config, ct);
110110
}
111111

112-
/// <summary>
113-
/// Lists every tool the Copilot SDK currently exposes for the given model,
114-
/// including local AIFunctions, built-in skills, and tools advertised by
115-
/// attached MCP servers. Used by GenerationAgent to write a one-shot
116-
/// snapshot to .spectra-debug.log so we can verify whether testimize tools
117-
/// actually reached the agent's tool list at session start.
118-
/// </summary>
119-
public async Task<GitHub.Copilot.SDK.Rpc.ToolsListResult> ListSessionToolsAsync(
120-
string model,
121-
CancellationToken ct = default)
122-
{
123-
return await _client.Rpc.Tools.ListAsync(model, ct);
124-
}
125-
126112
/// <summary>
127113
/// Checks if the Copilot CLI binary is available.
128114
/// </summary>

src/Spectra.CLI/Agent/Copilot/GenerationAgent.cs

Lines changed: 19 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -277,47 +277,6 @@ public async Task<GenerationResult> GenerateTestsAsync(
277277
// the actual error message. Generation proceeds immediately
278278
// because the SDK doesn't block session creation on MCP load.
279279

280-
// Diagnostic snapshot: once MCP servers finish loading, ask the
281-
// SDK for the merged tool list and write each tool name to the
282-
// debug log. Lets us answer "did testimize tools actually reach
283-
// the agent's tool list?" by looking at .spectra-debug.log
284-
// instead of guessing from tool-call traces. Best-effort only —
285-
// failures here must not block generation.
286-
if (mcpServers is not null)
287-
{
288-
try
289-
{
290-
var loaded = await Task.WhenAny(
291-
mcpServersLoadedTcs.Task,
292-
Task.Delay(TimeSpan.FromSeconds(10), ct));
293-
var loadedInTime = loaded == mcpServersLoadedTcs.Task;
294-
DebugLog($"TOOL LIST waiting_for_mcp_loaded={loadedInTime}");
295-
296-
var modelName = ProviderMapping.GetModelName(_provider);
297-
var toolList = await service.ListSessionToolsAsync(modelName, ct);
298-
var toolCount = toolList?.Tools?.Count ?? 0;
299-
DebugLog($"TOOL LIST count={toolCount} model={modelName}");
300-
if (toolList?.Tools is { } toolItems)
301-
{
302-
var testimizeHits = 0;
303-
foreach (var tool in toolItems)
304-
{
305-
var displayName = !string.IsNullOrEmpty(tool.NamespacedName)
306-
? tool.NamespacedName
307-
: tool.Name ?? "?";
308-
DebugLog($"TOOL LIST item={displayName}");
309-
if (displayName.IndexOf("testimize", StringComparison.OrdinalIgnoreCase) >= 0)
310-
testimizeHits++;
311-
}
312-
DebugLog($"TOOL LIST testimize_tools_visible={testimizeHits}");
313-
}
314-
}
315-
catch (Exception ex)
316-
{
317-
DebugLog($"TOOL LIST error={ex.GetType().Name}: {ex.Message}");
318-
}
319-
}
320-
321280
// Build the combined prompt with system instructions and user request.
322281
// The profile format (JSON schema sent to the AI) is resolved from
323282
// profiles/_default.yaml on disk if present, else the embedded default.
@@ -342,6 +301,25 @@ public async Task<GenerationResult> GenerateTestsAsync(
342301
// Slower / reasoning models may need 10–20+ minutes per batch.
343302
var timeoutMinutes = Math.Max(1, _config.Ai.GenerationTimeoutMinutes);
344303
var batchTimeout = TimeSpan.FromMinutes(timeoutMinutes);
304+
// v1.48.2: gate the prompt send on MCP servers being fully loaded.
305+
// Without this, the SDK builds the model's tool catalog at session
306+
// start before testimize finishes its initialize handshake (~600ms
307+
// typical), so testimize tools are absent from the catalog the
308+
// model sees. The .spectra-debug.log under 1.48.1 showed BATCH
309+
// START firing ~600–700ms before TESTIMIZE LOADED on every batch.
310+
// Best-effort with a 30s ceiling — if loading is slow, we still
311+
// send the prompt rather than blocking generation indefinitely.
312+
if (mcpServers is not null)
313+
{
314+
var gateSw = System.Diagnostics.Stopwatch.StartNew();
315+
var winner = await Task.WhenAny(
316+
mcpServersLoadedTcs.Task,
317+
Task.Delay(TimeSpan.FromSeconds(30), ct));
318+
gateSw.Stop();
319+
var result = winner == mcpServersLoadedTcs.Task ? "loaded" : "timeout";
320+
DebugLog($"GATE mcp_loaded waited={gateSw.Elapsed.TotalSeconds:F2}s result={result}");
321+
}
322+
345323
var sw = System.Diagnostics.Stopwatch.StartNew();
346324
DebugLogAi($"BATCH START requested={requestedCount} timeout={timeoutMinutes}min", null, null, estimated: false);
347325
_onStatus?.Invoke($"Starting AI generation ({requestedCount} tests, timeout {timeoutMinutes} min)...");

src/Spectra.CLI/Commands/Generate/GenerateHandler.cs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -577,7 +577,11 @@ private async Task<int> ExecuteDirectModeAsync(
577577
var criteriaContext = await LoadCriteriaContextAsync(currentDir, suite, config, ct);
578578

579579
// --- Batch generation loop ---
580-
var generatorModel = agent.ProviderName;
580+
// v1.48.2: write the actual model name (e.g. "claude-sonnet-4.5") to
581+
// the grounding frontmatter, not the agent's ProviderName which is
582+
// always "copilot-sdk (github-models)" since the unification under
583+
// the Copilot SDK runtime. The critic field already does this.
584+
var generatorModel = config.Ai.Providers?.FirstOrDefault(p => p.Enabled)?.Model ?? agent.ProviderName;
581585
var writer = new TestFileWriter();
582586
var allWrittenTests = new List<TestCase>();
583587
var allFilesCreated = new List<string>();
@@ -1259,7 +1263,9 @@ await _progress.StatusAsync("Generating tests...", async () =>
12591263
// Verify tests against documentation if critic is configured
12601264
var verificationResults = new List<(TestCase Test, VerificationResult Result)>();
12611265
var testsToWrite = result.Tests.ToList();
1262-
var generatorModel = agent.ProviderName;
1266+
// v1.48.2: see comment in GenerateAndVerifyAsync — write the
1267+
// actual model name, not the agent's wrapper ProviderName.
1268+
var generatorModel = config.Ai.Providers?.FirstOrDefault(p => p.Enabled)?.Model ?? agent.ProviderName;
12631269

12641270
if (ShouldVerify(config.Ai.Critic))
12651271
{

src/Spectra.CLI/Commands/Generate/UserDescribedGenerator.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ criteria IDs in the test's `criteria` frontmatter field.
147147
{
148148
Verdict = VerificationVerdict.Manual,
149149
Score = 1.0,
150-
Generator = agent.ProviderName,
150+
Generator = config.Ai.Providers?.FirstOrDefault(p => p.Enabled)?.Model ?? agent.ProviderName,
151151
Critic = "user-described",
152152
VerifiedAt = DateTimeOffset.UtcNow
153153
}

0 commit comments

Comments
 (0)