fix(generate): configurable per-batch timeout + batch size + .spectra-debug.log

angelovstanton · angelovstanton · commit 60e6348414d0 · 2026-04-11T01:27:01.000+03:00
The 5-minute SDK timeout in CopilotGenerationAgent has been hardcoded since
spec 009. Fast generators (gpt-4o, gpt-4o-mini) fit 30 tests per batch in
under 5 minutes; slower / reasoning models or large Azure deployments do
not. Symptom: --count 100 splits into 4 batches of 30, the first batch hits
the 5-minute ceiling, generation fails. --count 1 works because one test
fits in any budget.

AiConfig (new optional fields):
- generation_timeout_minutes (default 5) — per-batch SDK call timeout
- generation_batch_size (default 30) — number of tests per AI call
- debug_log_enabled (default true) — gates the per-batch diagnostic file

GenerationAgent.GenerateTestsAsync:
- Reads timeout from config; minimum 1 minute
- Logs each batch START / OK / TIMEOUT to .spectra-debug.log with the
  configured timeout, model name, provider, requested count, and elapsed
  seconds (best-effort, never throws)
- Surfaces the configured timeout and batch size in the live status message
- Timeout error message now lists the actual configured minutes, the model
  name, the batch size, and three concrete remediation options with
  copy-paste-ready spectra.config.json snippets

GenerateHandler:
- Replaces hardcoded BatchSize const with DefaultBatchSize fallback
- Reads ai.generation_batch_size from config when &gt; 0
- Renders "Batch N/M" in progress messages (was "Batch N")
- Loop math unchanged

For users hitting the timeout with a slow model, the workaround is now in
config — no code change required:

  "ai": {
    "generation_timeout_minutes": 15,
    "generation_batch_size": 10
  }

The .spectra-debug.log file shows exactly which batch was slow and why.
diff --git a/src/Spectra.CLI/Agent/Copilot/GenerationAgent.cs b/src/Spectra.CLI/Agent/Copilot/GenerationAgent.cs
@@ -170,15 +170,23 @@ public async Task<GenerationResult> GenerateTestsAsync(
             var promptLoader = new PromptTemplateLoader(_basePath);
             var fullPrompt = BuildFullPrompt(prompt, requestedCount, criteriaContext, templateLoader: promptLoader, profileFormat: profileFormat, testimizeEnabled: _config.Testimize.Enabled && testimizeClient is not null);
 
-            // Send and wait for the complete response
-            _onStatus?.Invoke("Starting AI generation...");
+            // Send and wait for the complete response.
+            // The per-batch timeout is configurable via ai.generation_timeout_minutes.
+            // Slower / reasoning models may need 10–20+ minutes per batch.
+            var timeoutMinutes = Math.Max(1, _config.Ai.GenerationTimeoutMinutes);
+            var batchTimeout = TimeSpan.FromMinutes(timeoutMinutes);
+            var sw = System.Diagnostics.Stopwatch.StartNew();
+            DebugLog($"BATCH START requested={requestedCount} model={_provider?.Model ?? "?"} provider={_provider?.Name ?? "?"} timeout={timeoutMinutes}min");
+            _onStatus?.Invoke($"Starting AI generation ({requestedCount} tests, timeout {timeoutMinutes} min)...");
             var response = await session.SendAndWaitAsync(
                 new MessageOptions { Prompt = fullPrompt },
-                timeout: TimeSpan.FromMinutes(5),
+                timeout: batchTimeout,
                 cancellationToken: ct);
 
             // Cancel delayed timers so they don't overwrite the final result
             await timerCts.CancelAsync();
+            sw.Stop();
+            DebugLog($"BATCH OK   requested={requestedCount} elapsed={sw.Elapsed.TotalSeconds:F1}s");
 
             var responseText = response?.Data?.Content ?? "";
 
@@ -219,10 +227,21 @@ public async Task<GenerationResult> GenerateTestsAsync(
         }
         catch (TimeoutException)
         {
+            var configuredMinutes = Math.Max(1, _config.Ai.GenerationTimeoutMinutes);
+            DebugLog($"BATCH TIMEOUT requested={requestedCount} model={_provider?.Model ?? "?"} configured_timeout={configuredMinutes}min");
             return new GenerationResult
             {
                 Tests = [],
-                Errors = ["Generation timed out after 5 minutes. Try reducing --count."]
+                Errors = [
+                    $"Generation timed out after {configuredMinutes} minutes (model: {_provider?.Model ?? "?"}, batch size: {requestedCount}).",
+                    "Options to fix this:",
+                    "  1. Increase the per-batch timeout in spectra.config.json:",
+                    "       \"ai\": { \"generation_timeout_minutes\": 15 }",
+                    "  2. Reduce the batch size:",
+                    "       \"ai\": { \"generation_batch_size\": 10 }",
+                    "  3. Reduce --count or use a faster model.",
+                    "See .spectra-debug.log for per-batch timing details."
+                ]
             };
         }
         catch (Exception ex) when (ex.Message.Contains("copilot", StringComparison.OrdinalIgnoreCase)
@@ -269,6 +288,26 @@ public async Task<GenerationResult> GenerateTestsAsync(
         }
     }
 
+    /// <summary>
+    /// Append a one-line timestamped diagnostic to <c>.spectra-debug.log</c>
+    /// in the project root. Best-effort; never throws. Gated by
+    /// <c>ai.debug_log_enabled</c> (default true).
+    /// </summary>
+    private void DebugLog(string message)
+    {
+        try
+        {
+            if (!_config.Ai.DebugLogEnabled) return;
+            var path = Path.Combine(_basePath, ".spectra-debug.log");
+            var line = $"{DateTime.UtcNow:yyyy-MM-ddTHH:mm:ss.fffZ} [generate] {message}{Environment.NewLine}";
+            File.AppendAllText(path, line);
+        }
+        catch
+        {
+            // Diagnostics must never block generation.
+        }
+    }
+
     private void SaveDebugResponse(string responseText)
     {
         try
diff --git a/src/Spectra.CLI/Commands/Generate/GenerateHandler.cs b/src/Spectra.CLI/Commands/Generate/GenerateHandler.cs
@@ -32,7 +32,7 @@ namespace Spectra.CLI.Commands.Generate;
 /// </summary>
 public sealed class GenerateHandler
 {
-    private const int BatchSize = 30;
+    private const int DefaultBatchSize = 30;
 
     private readonly VerbosityLevel _verbosity;
     private readonly bool _dryRun;
@@ -522,14 +522,22 @@ private async Task<int> ExecuteDirectModeAsync(
         var batchErrors = new List<string>();
         var batchesCompleted = 0;
 
+        // Spec post-038 fix: batch size is configurable via ai.generation_batch_size.
+        // Slower / reasoning models benefit from smaller batches paired with a
+        // larger ai.generation_timeout_minutes setting.
+        var configuredBatchSize = config.Ai.GenerationBatchSize > 0
+            ? config.Ai.GenerationBatchSize
+            : DefaultBatchSize;
+        var totalBatches = (int)Math.Ceiling(effectiveCount / (double)configuredBatchSize);
+
         for (var batchNum = 1; totalGenerated < effectiveCount; batchNum++)
         {
             var remaining = effectiveCount - totalGenerated;
-            var batchRequestCount = Math.Min(remaining, BatchSize);
+            var batchRequestCount = Math.Min(remaining, configuredBatchSize);
 
             UpdateProgress(suite, "generating",
-                $"Generating batch {batchNum}: {allWrittenTests.Count}/{effectiveCount} tests complete...");
-            _progress.Info($"Batch {batchNum}: requesting {batchRequestCount} tests ({allWrittenTests.Count}/{effectiveCount} complete)");
+                $"Generating batch {batchNum}/{totalBatches}: {allWrittenTests.Count}/{effectiveCount} tests complete...");
+            _progress.Info($"Batch {batchNum}/{totalBatches}: requesting {batchRequestCount} tests ({allWrittenTests.Count}/{effectiveCount} complete)");
 
             var prompt = BuildPrompt(suite, batchRequestCount, mutableExistingIds, effectiveProfile, focus);
             GenerationResult batchResult = null!;
diff --git a/src/Spectra.Core/Models/Config/AiConfig.cs b/src/Spectra.Core/Models/Config/AiConfig.cs
@@ -19,4 +19,29 @@ public sealed class AiConfig
     /// </summary>
     [JsonPropertyName("critic")]
     public CriticConfig? Critic { get; init; }
+
+    /// <summary>
+    /// Per-batch timeout for the AI generation SDK call, in minutes. Default 5.
+    /// Slower / larger models (e.g. reasoning models, large Azure deployments)
+    /// may need to bump this to 10–20+ minutes. The timer measures the entire
+    /// batch round-trip including all tool calls the AI makes.
+    /// </summary>
+    [JsonPropertyName("generation_timeout_minutes")]
+    public int GenerationTimeoutMinutes { get; init; } = 5;
+
+    /// <summary>
+    /// Number of tests requested per AI call. Default 30. Smaller batches
+    /// reduce per-batch latency on slow models at the cost of more total
+    /// round-trips. Pair with <see cref="GenerationTimeoutMinutes"/>.
+    /// </summary>
+    [JsonPropertyName("generation_batch_size")]
+    public int GenerationBatchSize { get; init; } = 30;
+
+    /// <summary>
+    /// Append per-batch diagnostics to <c>.spectra-debug.log</c> in the
+    /// project root. Default true. Useful for diagnosing slow models and
+    /// timeout issues. Set false to silence.
+    /// </summary>
+    [JsonPropertyName("debug_log_enabled")]
+    public bool DebugLogEnabled { get; init; } = true;
 }