Skip to content

Commit 4ce3d00

Browse files
fix: criteria generation, polling, progress caching & CI build
- Fix criteria not written to test files: CreateTestWithGrounding was rebuilding TestCase without copying Criteria field. Added Criteria = test.Criteria to all 3 code paths. - Fix suite-criteria matching: changed exact-match to contains-match so suite "reporting" finds criteria with component "reporting-analytics". - Strengthen AI prompt: criteria instructions now MANDATORY with explicit mapping requirement instead of optional "if provided" wording. - Remove polling tools (getTerminalOutput, terminalLastCommand) from generate/readonly tool lists so GPT-4o cannot poll terminal output. - Fix progress page stale on open: auto-refresh now always runs (even on terminal pages), Reset() writes placeholder HTML instead of deleting, all SKILLs use ?nocache=1 on initial open. - Fix CI build: add #pragma warning disable CS0618 to RequirementsWriterTests for obsolete RequirementDefinition usage. - Add CLI commands to agent delegation tables for GPT-4o reliability. - Strengthen help routing in both agents. - Version 1.34.4.
1 parent b120c10 commit 4ce3d00

File tree

29 files changed

+230
-182
lines changed

29 files changed

+230
-182
lines changed

src/Spectra.CLI/Agent/Copilot/GenerationAgent.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -305,13 +305,13 @@ Do NOT include any explanatory text before or after the JSON. Output ONLY the JS
305305
306306
{userPrompt}
307307
308-
{(string.IsNullOrEmpty(criteriaContext) ? "" : $"\n## ACCEPTANCE CRITERIA\n\n{criteriaContext}\n")}
308+
{(string.IsNullOrEmpty(criteriaContext) ? "" : $"\n## ACCEPTANCE CRITERIA — MANDATORY\n\nYou MUST map each test case to matching acceptance criteria below. Every test MUST have at least one criterion ID in its \"criteria\" array. If a test doesn't match any criterion, use the closest related one.\n\n{criteriaContext}\n")}
309309
IMPORTANT:
310310
1. Use the tools to read documentation and check for duplicates first
311311
2. Only generate tests that are grounded in the documentation
312312
3. Ensure unique test IDs using GetNextTestIds
313313
4. Your FINAL response must be ONLY the JSON array — no other text
314-
5. If acceptance criteria are provided, include matching criterion IDs in the "criteria" array field
314+
5. MANDATORY: For each test, populate the "criteria" array with IDs of acceptance criteria it verifies (e.g. ["AC-REPORTING-001", "AC-REPORTING-003"]). Never leave criteria empty when acceptance criteria are provided above.
315315
""";
316316
}
317317

src/Spectra.CLI/Commands/Generate/GenerateHandler.cs

Lines changed: 33 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1445,6 +1445,7 @@ private async Task<int> ExecuteFromDescriptionAsync(
14451445
Steps = test.Steps,
14461446
ExpectedResult = test.ExpectedResult,
14471447
TestData = test.TestData,
1448+
Criteria = test.Criteria,
14481449
FilePath = Path.GetRelativePath(testsPath, filePath),
14491450
Grounding = test.Grounding
14501451
};
@@ -1729,6 +1730,7 @@ internal static TestCase CreateTestWithGrounding(
17291730
Steps = test.Steps,
17301731
ExpectedResult = test.ExpectedResult,
17311732
TestData = test.TestData,
1733+
Criteria = test.Criteria,
17321734
FilePath = Path.GetRelativePath(testsPath, filePath),
17331735
Grounding = test.Grounding
17341736
};
@@ -1758,6 +1760,7 @@ internal static TestCase CreateTestWithGrounding(
17581760
Steps = test.Steps,
17591761
ExpectedResult = test.ExpectedResult,
17601762
TestData = test.TestData,
1763+
Criteria = test.Criteria,
17611764
FilePath = Path.GetRelativePath(testsPath, filePath),
17621765
Grounding = grounding
17631766
};
@@ -1961,20 +1964,40 @@ private void ShowCountMismatchReason(
19611964
if (allCriteria.Count == 0)
19621965
return null;
19631966

1964-
// Filter: criteria from documents matching suite name, or criteria with matching component
1967+
// Filter: criteria matching suite name (exact or partial match on component, source doc, or file name)
19651968
var relevant = allCriteria.Where(c =>
1966-
// Match by source document name (e.g., suite "checkout" matches "checkout.criteria.yaml")
1967-
(c.SourceDoc != null && Path.GetFileNameWithoutExtension(c.SourceDoc)
1968-
.Equals(suiteName, StringComparison.OrdinalIgnoreCase)) ||
1969-
// Match by component field
1969+
// Exact match by component
19701970
(c.Component != null && c.Component.Equals(suiteName, StringComparison.OrdinalIgnoreCase)) ||
1971-
// Match by criteria file name (e.g., "checkout.criteria.yaml" for suite "checkout")
1972-
criteriaFiles.Any(f => Path.GetFileName(f)
1973-
.StartsWith(suiteName + ".", StringComparison.OrdinalIgnoreCase) &&
1974-
allCriteria.IndexOf(c) >= 0)
1971+
// Component contains suite name (e.g., suite "reporting" matches component "reporting-analytics")
1972+
(c.Component != null && c.Component.Contains(suiteName, StringComparison.OrdinalIgnoreCase)) ||
1973+
// Suite contains component (e.g., suite "reporting-analytics" matches component "reporting")
1974+
(c.Component != null && suiteName.Contains(c.Component, StringComparison.OrdinalIgnoreCase)) ||
1975+
// Source doc file name contains suite name
1976+
(c.SourceDoc != null && Path.GetFileNameWithoutExtension(c.SourceDoc)
1977+
.Contains(suiteName, StringComparison.OrdinalIgnoreCase))
19751978
).ToList();
19761979

1977-
// If no suite-specific criteria found, use all criteria (better than none)
1980+
// Also include criteria from files whose name starts with the suite name
1981+
if (relevant.Count == 0)
1982+
{
1983+
var matchingFiles = criteriaFiles.Where(f =>
1984+
Path.GetFileNameWithoutExtension(f).Replace(".criteria", "")
1985+
.Contains(suiteName, StringComparison.OrdinalIgnoreCase)).ToList();
1986+
1987+
if (matchingFiles.Count > 0)
1988+
{
1989+
var matchingFileSet = new HashSet<string>(matchingFiles, StringComparer.OrdinalIgnoreCase);
1990+
// Reload only from matching files
1991+
relevant = new List<AcceptanceCriterion>();
1992+
foreach (var file in matchingFiles)
1993+
{
1994+
var fileCriteria = await reader.ReadAsync(file, ct);
1995+
relevant.AddRange(fileCriteria);
1996+
}
1997+
}
1998+
}
1999+
2000+
// Last resort: use all criteria (better than none, but may be noisy)
19782001
if (relevant.Count == 0)
19792002
relevant = allCriteria;
19802003

src/Spectra.CLI/Progress/ProgressManager.cs

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,15 +46,26 @@ internal ProgressManager(string command, string[] phases, string resultPath, str
4646
public string ResultPath => _resultPath;
4747
public string ProgressPath => _progressPath;
4848

49-
/// <summary>Delete stale files from previous runs.</summary>
49+
/// <summary>
50+
/// Reset stale files from previous runs.
51+
/// Writes a "starting" placeholder HTML so the browser shows a loading state
52+
/// (with auto-refresh enabled) instead of the previous run's completed state.
53+
/// </summary>
5054
public void Reset()
5155
{
5256
try
5357
{
5458
if (File.Exists(_resultPath))
5559
File.Delete(_resultPath);
56-
if (File.Exists(_progressPath))
57-
File.Delete(_progressPath);
60+
61+
var placeholder = new CommandResult
62+
{
63+
Command = _command,
64+
Status = "starting",
65+
Message = "Initializing..."
66+
};
67+
var json = JsonSerializer.Serialize(placeholder, ResultFileOptions);
68+
ProgressPageWriter.WriteProgressPage(_progressPath, json, isTerminal: false, _title);
5869
}
5970
catch
6071
{

src/Spectra.CLI/Progress/ProgressPageWriter.cs

Lines changed: 17 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -471,15 +471,15 @@ .breakdown h3 {
471471
{{BuildBody(jsonData, isTerminal, workspaceRoot)}}
472472
</div>
473473
<script>
474-
// Auto-refresh: reload page every 1.5s while status is not terminal.
475-
// Uses JavaScript instead of <meta refresh> for reliable file:// support.
474+
// Auto-refresh: always reload with cache-busting query parameter.
475+
// Runs even on terminal pages so that when a new run starts and
476+
// overwrites the file, a stale "Completed" page picks up the change.
477+
// Uses cache-busting query param because Chromium caches file:// URLs.
476478
(function() {
477-
var isTerminal = {{(isTerminal ? "true" : "false")}};
478-
if (!isTerminal) {
479-
setInterval(function() {
480-
window.location.reload();
481-
}, 1500);
482-
}
479+
setInterval(function() {
480+
var base = window.location.pathname;
481+
window.location.replace(base + '?_=' + Date.now());
482+
}, 1500);
483483
})();
484484
485485
// File links: open vscode:// URIs via JavaScript click handler
@@ -647,22 +647,15 @@ private static string BuildBody(string jsonData, bool isTerminal, string workspa
647647
sb.Append(BuildFilesSection(files, workspaceRoot));
648648
}
649649

650-
// Footer
651-
if (!isTerminal)
652-
{
653-
sb.Append("""
654-
<div class="footer">
655-
<span class="refresh-indicator">
656-
<span class="refresh-dot"></span>
657-
Auto-refreshing every 1.5 seconds
658-
</span>
659-
</div>
660-
""");
661-
}
662-
else
663-
{
664-
sb.Append("""<div class="footer">Generated by SPECTRA</div>""");
665-
}
650+
// Footer — always show refresh indicator since auto-refresh is always on
651+
sb.Append("""
652+
<div class="footer">
653+
<span class="refresh-indicator">
654+
<span class="refresh-dot"></span>
655+
Auto-refreshing every 1.5 seconds
656+
</span>
657+
</div>
658+
""");
666659

667660
return sb.ToString();
668661
}

src/Spectra.CLI/Skills/Content/Agents/spectra-execution.agent.md

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,10 @@ You are a QA Test Execution Assistant. You execute manual test suites interactiv
2020

2121
## IMPORTANT RULES
2222

23+
- **HELP**: If user asks "help", "what can I do", or "what commands": follow the **`spectra-help`** SKILL (NOT `spectra-execution`). Read `spectra-help` and reply with its content.
2324
- **NEVER use `askQuestion`, `askForConfirmation`, or ANY dialog/popup tool.** Always use plain text responses so users can paste screenshots.
2425
- **NEVER fabricate failure notes.** Ask the user and wait for their exact words.
25-
- For non-execution tasks, follow the corresponding `spectra-*` SKILL (see delegation table at end). Do NOT use MCP tools or createFile for those.
26-
27-
## If user asks for help: Follow the `spectra-help` SKILL.
26+
- For non-execution CLI tasks, see the **CLI Tasks** delegation table at end. Read the named SKILL, follow its steps exactly. Do NOT invent CLI commands.
2827

2928
## Execution Workflow
3029

@@ -107,14 +106,18 @@ When user doesn't specify a suite:
107106

108107
## CLI Tasks (delegation)
109108

110-
For these tasks, follow the named SKILL via `runInTerminal`. Do NOT use MCP tools.
111-
112-
| Task | SKILL to follow |
113-
|------|----------------|
114-
| Dashboard | `spectra-dashboard` |
115-
| Coverage analysis | `spectra-coverage` |
116-
| Acceptance criteria | `spectra-criteria` |
117-
| Validate tests | `spectra-validate` |
118-
| List / show tests | `spectra-list` |
119-
| Docs index | `spectra-docs` |
120-
| Test generation | `spectra-generate` (or switch to Generation agent) |
109+
For these tasks, read the named SKILL first, then follow its steps exactly via `runInTerminal`. Do NOT use MCP tools. Do NOT invent CLI commands — the commands below are the ONLY valid forms.
110+
111+
| Task | SKILL | CLI command |
112+
|------|-------|-------------|
113+
| Coverage analysis | `spectra-coverage` | `spectra ai analyze --coverage --auto-link --no-interaction --output-format json --verbosity quiet` |
114+
| Dashboard | `spectra-dashboard` | `spectra ai analyze --coverage --auto-link --no-interaction --output-format json --verbosity quiet && spectra dashboard --output ./site --no-interaction --output-format json --verbosity quiet` |
115+
| Extract criteria | `spectra-criteria` | `spectra ai analyze --extract-criteria --no-interaction --output-format json --verbosity quiet` |
116+
| Validate tests | `spectra-validate` | `spectra validate --no-interaction --output-format json --verbosity quiet` |
117+
| List suites | `spectra-list` | `spectra list --no-interaction --output-format json --verbosity quiet` |
118+
| Show test | `spectra-list` | `spectra show {test-id} --no-interaction --output-format json --verbosity quiet` |
119+
| Docs index | `spectra-docs` | `spectra docs index --no-interaction --output-format json --verbosity quiet` |
120+
| Docs reindex | `spectra-docs` | `spectra docs index --force --no-interaction --output-format json --verbosity quiet` |
121+
| Test generation | `spectra-generate` | (switch to Generation agent or follow SKILL steps) |
122+
123+
**Workflow for CLI tasks**: open `.spectra-progress.html?nocache=1` → runInTerminal → awaitTerminal (do NOTHING while waiting) → readFile `.spectra-result.json` → present results. Never re-run a command that completed successfully. **Dashboard**: after results, also `show preview site/index.html` to open the dashboard.

src/Spectra.CLI/Skills/Content/Agents/spectra-generation.agent.md

Lines changed: 22 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -10,43 +10,43 @@ disable-model-invocation: true
1010

1111
You help users manage test cases using the SPECTRA CLI. Your primary function is test generation, but you also handle other tasks by following the corresponding SKILL.
1212

13-
**CRITICAL: First show preview .spectra-progress.html, then runInTerminal. Between runInTerminal and awaitTerminal, do NOTHING.**
13+
**CRITICAL: First open `.spectra-progress.html?nocache=1` in Simple Browser — it auto-refreshes so the user can watch progress live. Then runInTerminal. Between runInTerminal and awaitTerminal, do NOTHING — no readFile, no listDirectory, no checking terminal output, no status messages. The progress page already shows live status. You ONLY read `.spectra-result.json` AFTER awaitTerminal returns.**
1414

1515
**ALWAYS follow the full analyze → approve → generate flow. Never skip analysis.**
1616

17-
## If user asks for help: Follow the `spectra-help` SKILL.
17+
**HELP**: If user asks "help", "what can I do", or "what commands": follow the **`spectra-help`** SKILL (NOT this agent's own file). Read `spectra-help` and reply with its content.
1818

1919
## Generate test cases
2020

2121
| Flag | Description |
2222
|------|-------------|
2323
| `--suite {name}` | Target suite (REQUIRED) |
2424
| `--count {n}` | Number of tests (default: 5) |
25-
| `--focus {text}` | Focus: "negative", "edge cases", "high priority security" |
25+
| `--focus {text}` | Focus: "negative", "edge cases", "acceptance criteria", "happy path acceptance criteria" |
2626
| `--skip-critic` | Skip grounding verification |
2727
| `--analyze-only` | Only analyze, don't generate |
2828

29-
**No `--priority`/`--type`/`--category` flag.** Use `--focus` for all filtering.
29+
**No `--priority`/`--type`/`--category` flag.** Use `--focus` for all filtering. Capture the user's FULL intent — don't split or drop parts. E.g. "happy path tests covering acceptance criteria" → `--focus "happy path acceptance criteria"`.
3030

3131
### Analyze (ALWAYS first)
3232

33-
**Step 1**: show preview .spectra-progress.html
34-
**Step 2** — runInTerminal:
33+
**Step 1**: show preview .spectra-progress.html?nocache=1
34+
**Step 2** — runInTerminal (include `--focus` if user specified any filtering):
3535
```
36-
spectra ai generate --suite {suite} --analyze-only --no-interaction --output-format json
36+
spectra ai generate --suite {suite} --analyze-only [--focus "{focus}"] --no-interaction --output-format json
3737
```
38-
**Step 3** — awaitTerminal. Do NOTHING until complete.
38+
**Step 3** — awaitTerminal. The progress page auto-refreshes. Do NOTHING until complete — no readFile, no status messages.
3939
**Step 4** — readFile `.spectra-result.json`:
4040
- `"failed"` → show error
4141
- `"analyzed"` → show: "{already_covered} tests exist. Recommend {recommended} new tests:" with breakdown. STOP. Wait for user.
4242

4343
### Generate (after approval)
4444

45-
**Step 5** — runInTerminal (add `--focus` if user specified type/priority):
45+
**Step 5** — runInTerminal (keep the SAME `--focus` from analysis):
4646
```
4747
spectra ai generate --suite {suite} --count {count} [--focus "{focus}"] --no-interaction --output-format json
4848
```
49-
**Step 6** — awaitTerminal. Do NOTHING until complete.
49+
**Step 6** — awaitTerminal. The progress page auto-refreshes. Do NOTHING until complete — no readFile, no status messages.
5050
**Step 7** — readFile `.spectra-result.json`:
5151
- `"failed"` → show error
5252
- `"completed"` → "Generated {tests_written} test cases." List files. If < requested, say "Run again for more."
@@ -55,7 +55,7 @@ spectra ai generate --suite {suite} --count {count} [--focus "{focus}"] --no-int
5555

5656
## Update tests
5757

58-
**Step 1** — show preview `.spectra-progress.html`
58+
**Step 1** — show preview `.spectra-progress.html?nocache=1`
5959
**Step 2** — runInTerminal:
6060
```
6161
spectra ai update --suite {suite} --no-interaction --output-format json --verbosity quiet
@@ -69,13 +69,15 @@ Show UP_TO_DATE, OUTDATED, ORPHANED counts from classification field.
6969

7070
## Other tasks (delegation)
7171

72-
Follow the named SKILL exactly:
72+
Read the named SKILL first, then follow its steps exactly. Do NOT invent CLI commands — the commands below are the ONLY valid forms.
73+
74+
| Task | SKILL | CLI command |
75+
|------|-------|-------------|
76+
| Coverage analysis | `spectra-coverage` | `spectra ai analyze --coverage --auto-link --no-interaction --output-format json --verbosity quiet` |
77+
| Dashboard | `spectra-dashboard` | `spectra ai analyze --coverage --auto-link ... && spectra dashboard --output ./site ...` |
78+
| Extract criteria | `spectra-criteria` | `spectra ai analyze --extract-criteria --no-interaction --output-format json --verbosity quiet` |
79+
| Validate tests | `spectra-validate` | `spectra validate --no-interaction --output-format json --verbosity quiet` |
80+
| List / show tests | `spectra-list` | `spectra list --no-interaction --output-format json --verbosity quiet` |
81+
| Docs index | `spectra-docs` | `spectra docs index [--force] --no-interaction --output-format json --verbosity quiet` |
7382

74-
| Task | SKILL |
75-
|------|-------|
76-
| Coverage analysis | `spectra-coverage` |
77-
| Acceptance criteria | `spectra-criteria` |
78-
| Dashboard | `spectra-dashboard` |
79-
| Validate tests | `spectra-validate` |
80-
| List / show tests | `spectra-list` |
81-
| Docs index | `spectra-docs` |
83+
**Never re-run a command that completed successfully.** If the result shows "completed", present the results and stop. **Dashboard**: after results, also `show preview site/index.html` to open the dashboard.

src/Spectra.CLI/Skills/Content/Skills/spectra-coverage.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,15 @@ You analyze test coverage by running a CLI command via runInTerminal.
1414

1515
**Step 1** — Open the live progress page:
1616
```
17-
show preview .spectra-progress.html
17+
show preview .spectra-progress.html?nocache=1
1818
```
1919

2020
**Step 2** — runInTerminal:
2121
```
2222
spectra ai analyze --coverage --auto-link --no-interaction --output-format json --verbosity quiet
2323
```
2424

25-
**Step 3** — awaitTerminal. Wait for the command to finish. Between runInTerminal and awaitTerminal, do NOTHING. No readFile, no listDirectory, no extra tool calls.
25+
**Step 3** — awaitTerminal. The progress page auto-refreshes — the user can watch live. Between runInTerminal and awaitTerminal, do NOTHING. No readFile, no listDirectory, no checking terminal output, no status messages.
2626

2727
**Step 4** — readFile `.spectra-result.json`
2828

src/Spectra.CLI/Skills/Content/Skills/spectra-criteria.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ You help users manage acceptance criteria in SPECTRA. Run CLI commands via runIn
1414

1515
**Step 1** — Open the live progress page:
1616
```
17-
show preview .spectra-progress.html
17+
show preview .spectra-progress.html?nocache=1
1818
```
1919

2020
**Step 2** — runInTerminal:
@@ -23,7 +23,7 @@ spectra ai analyze --extract-criteria --no-interaction --output-format json --ve
2323
```
2424
For full re-extraction (ignore cache), add `--force`.
2525

26-
**Step 3** — awaitTerminal. Wait for the command to finish. This takes 1-5 minutes for large doc sets. Do NOT type anything into the terminal.
26+
**Step 3** — awaitTerminal. The progress page auto-refreshes — the user can watch live. Between runInTerminal and awaitTerminal, do NOTHING. No readFile, no listDirectory, no checking terminal output, no status messages.
2727

2828
**Step 4** — readFile `.spectra-result.json`
2929

src/Spectra.CLI/Skills/Content/Skills/spectra-dashboard.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,15 @@ You manage the dashboard by running CLI commands via runInTerminal. **NEVER use
1414

1515
**Step 1** — Open the live progress page:
1616
```
17-
show preview .spectra-progress.html
17+
show preview .spectra-progress.html?nocache=1
1818
```
1919

2020
**Step 2** — runInTerminal:
2121
```
2222
spectra ai analyze --coverage --auto-link --no-interaction --output-format json --verbosity quiet && spectra dashboard --output ./site --no-interaction --output-format json --verbosity quiet
2323
```
2424

25-
**Step 3** — awaitTerminal. Wait for the command to finish. Between runInTerminal and awaitTerminal, do NOTHING. No readFile, no listDirectory, no extra tool calls.
25+
**Step 3** — awaitTerminal. The progress page auto-refreshes — the user can watch live. Between runInTerminal and awaitTerminal, do NOTHING. No readFile, no listDirectory, no checking terminal output, no status messages.
2626

2727
**Step 4** — readFile `.spectra-result.json`
2828

0 commit comments

Comments
 (0)