Skip to content

Commit 609866b

Browse files
authored
Merge pull request #80 from agent-ecosystem/fix/http-status-codes-false-positives
fix: classify HTTP 202/5xx as indeterminate in http-status-codes
2 parents 8a8ac3b + 8fd3250 commit 609866b

8 files changed

Lines changed: 152 additions & 12 deletions

File tree

SCORING.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ Not all warnings represent the same degree of degradation. A warning on `llms-tx
150150
| **0.50** | Genuine functional degradation | `llms-txt-exists`, `llms-txt-size`, `rendering-strategy`, `markdown-url-support`, `page-size-markdown`, `page-size-html`, `content-start-position`, `tabbed-content-serialization`, `section-header-quality`, `cache-header-hygiene`, `auth-gate-detection`, `auth-alternative-access` |
151151
| **0.25** | Actively steering agents to a worse path | `llms-txt-links-markdown` (markdown exists but llms.txt links to HTML; agents don't discover .md variants on their own) |
152152

153-
Checks that only have pass/fail (no warn state): `http-status-codes`, `markdown-code-fence-validity`.
153+
`markdown-code-fence-validity` only has pass/fail (no warn state). `http-status-codes` is normally pass/fail but warns when every sampled response is indeterminate (HTTP 202 from CDN cache-miss/build, or 5xx) so the check couldn't measure bad-URL handling.
154154

155155
## Score caps
156156

docs/agent-score-calculation.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ A warning is not a binary "half credit." Different warnings represent different
141141
| **0.50** | Genuine functional degradation | `llms-txt-exists`, `llms-txt-size`, `rendering-strategy`, `markdown-url-support`, `page-size-markdown`, `page-size-html`, `content-start-position`, `tabbed-content-serialization`, `section-header-quality`, `cache-header-hygiene`, `auth-gate-detection`, `auth-alternative-access` |
142142
| **0.25** | Actively steering agents to a worse path | `llms-txt-links-markdown` (markdown exists but llms.txt links to HTML) |
143143

144-
Two checks have no warn state and are strictly pass/fail: `http-status-codes` and `markdown-code-fence-validity`.
144+
`markdown-code-fence-validity` is strictly pass/fail. `http-status-codes` is normally pass/fail but emits a warn when every sampled response is indeterminate (HTTP 202 during CDN cache-miss/build, or 5xx) so we couldn't measure bad-URL handling.
145145

146146
## Score caps
147147

scoring-reference.md

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -131,9 +131,12 @@ Each check has a specific warn coefficient rather than a uniform default.
131131
| `auth-gate-detection` | 0.50 | Partial gating. Some docs accessible, some invisible to agents. |
132132
| `auth-alternative-access` | 0.50 | Partial alternative access. Covers some gated content but not all. |
133133

134-
Checks without a warn state (`http-status-codes`,
135-
`markdown-code-fence-validity`) don't appear in this table. Their spec
136-
definitions only have pass and fail levels.
134+
`markdown-code-fence-validity` doesn't appear in this table because its
135+
spec definition only has pass and fail levels. `http-status-codes` is
136+
normally pass/fail too, but emits a warn when every sampled response is
137+
indeterminate (HTTP 202 during CDN cache-miss/build, or 5xx); in that
138+
case scoring falls back to the default warn coefficient of 0.5 because
139+
the check couldn't measure bad-URL handling.
137140

138141
This replaces the worst-case aggregation for scoring purposes. A site where
139142
3/50 pages exceed the size limit scores ~94% of the check's weight, not 0%.

src/checks/url-stability/http-status-codes.ts

Lines changed: 42 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,11 @@ interface StatusCodeResult {
77
url: string;
88
testUrl: string;
99
status: number | null;
10-
classification: 'correct-error' | 'soft-404' | 'fetch-error';
10+
classification: 'correct-error' | 'soft-404' | 'indeterminate' | 'fetch-error';
1111
redirected?: boolean;
1212
finalUrl?: string;
1313
bodyHint?: string;
14+
indeterminateReason?: string;
1415
error?: string;
1516
}
1617

@@ -44,10 +45,31 @@ async function check(ctx: CheckContext): Promise<CheckResult> {
4445
const redirected = response.redirected || response.url !== testUrl;
4546
const finalUrl = redirected ? response.url : undefined;
4647

47-
if (status >= 400) {
48+
if (status >= 400 && status < 500) {
4849
return { url, testUrl, status, classification: 'correct-error', redirected, finalUrl };
4950
}
5051

52+
// 202 Accepted: per RFC 7231, the request is being processed but not
53+
// complete. Vercel/Next.js ISR returns this during cache-miss/build
54+
// for fresh URLs — it's a CDN behavior, not site-level error handling.
55+
// 5xx: server failure tells us nothing about how the site handles
56+
// bad URLs. Both are excluded from the soft-404 tally.
57+
if (status === 202 || status >= 500) {
58+
const reason =
59+
status === 202
60+
? 'HTTP 202 (CDN still processing — not a site response)'
61+
: `HTTP ${status} (server error — bad-URL handling unknown)`;
62+
return {
63+
url,
64+
testUrl,
65+
status,
66+
classification: 'indeterminate',
67+
redirected,
68+
finalUrl,
69+
indeterminateReason: reason,
70+
};
71+
}
72+
5173
// Status 200 (or other 2xx/3xx) — possible soft 404
5274
let bodyHint: string | undefined;
5375
try {
@@ -86,6 +108,8 @@ async function check(ctx: CheckContext): Promise<CheckResult> {
86108
const fetchErrors = results.filter((r) => r.classification === 'fetch-error').length;
87109
const soft404s = results.filter((r) => r.classification === 'soft-404');
88110
const correctErrors = results.filter((r) => r.classification === 'correct-error');
111+
const indeterminate = results.filter((r) => r.classification === 'indeterminate');
112+
const determinate = correctErrors.length + soft404s.length;
89113

90114
if (tested.length === 0) {
91115
return {
@@ -104,15 +128,25 @@ async function check(ctx: CheckContext): Promise<CheckResult> {
104128
};
105129
}
106130

107-
const status = soft404s.length > 0 ? 'fail' : 'pass';
108131
const pageLabel = sampled ? 'sampled pages' : 'pages';
109-
const suffix = fetchErrors > 0 ? `; ${fetchErrors} failed to fetch` : '';
132+
const fetchSuffix = fetchErrors > 0 ? `; ${fetchErrors} failed to fetch` : '';
133+
const indetSuffix =
134+
indeterminate.length > 0 ? `; ${indeterminate.length} indeterminate (HTTP 202/5xx)` : '';
135+
const suffix = `${fetchSuffix}${indetSuffix}`;
110136

137+
let status: 'pass' | 'warn' | 'fail';
111138
let message: string;
112-
if (status === 'pass') {
113-
message = `All ${tested.length} ${pageLabel} return proper error codes for bad URLs${suffix}`;
139+
if (determinate === 0) {
140+
// Every response was indeterminate (e.g. all 202 or 5xx). We can't say
141+
// whether the site handles bad URLs correctly.
142+
status = 'warn';
143+
message = `Could not determine bad-URL handling: all ${indeterminate.length} ${pageLabel} returned indeterminate responses${fetchSuffix}`;
144+
} else if (soft404s.length > 0) {
145+
status = 'fail';
146+
message = `${soft404s.length} of ${determinate} ${pageLabel} return 200 for non-existent URLs (soft 404)${suffix}`;
114147
} else {
115-
message = `${soft404s.length} of ${tested.length} ${pageLabel} return 200 for non-existent URLs (soft 404)${suffix}`;
148+
status = 'pass';
149+
message = `All ${determinate} ${pageLabel} return proper error codes for bad URLs${suffix}`;
116150
}
117151

118152
return {
@@ -126,6 +160,7 @@ async function check(ctx: CheckContext): Promise<CheckResult> {
126160
sampled,
127161
soft404Count: soft404s.length,
128162
correctErrorCount: correctErrors.length,
163+
indeterminateCount: indeterminate.length,
129164
fetchErrors,
130165
pageResults: results,
131166
discoveryWarnings: warnings,

src/cli/formatters/text.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,7 @@ const DETAIL_FORMATTERS: Record<string, DetailFormatter> = {
272272
classification: string;
273273
status?: number | null;
274274
bodyHint?: string;
275+
indeterminateReason?: string;
275276
error?: string;
276277
}>
277278
| undefined;
@@ -280,6 +281,10 @@ const DETAIL_FORMATTERS: Record<string, DetailFormatter> = {
280281
.filter((p) => p.classification !== 'correct-error')
281282
.map((p) => {
282283
if (p.error) return formatDetailLine('fail', p.testUrl ?? p.url, p.error);
284+
if (p.classification === 'indeterminate') {
285+
const info = p.indeterminateReason ?? `HTTP ${p.status} (indeterminate)`;
286+
return formatDetailLine('warn', p.testUrl ?? p.url, info);
287+
}
283288
const info = p.bodyHint
284289
? `HTTP ${p.status} (${p.bodyHint})`
285290
: `HTTP ${p.status} instead of 404`;

src/scoring/proportions.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,10 @@ function httpStatusCodesExtractor(
292292
}
293293
});
294294

295+
// All responses indeterminate (or fetch errors): exclude from scoring.
296+
// We can't say whether the site handles bad URLs correctly.
297+
if (items.every((i) => i.status === 'skip')) return undefined;
298+
295299
return countByStatus(items, weight.warnCoefficient);
296300
}
297301

test/unit/checks/http-status-codes.test.ts

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,66 @@ describe('http-status-codes', () => {
204204
expect(result.message).toContain('1 failed to fetch');
205205
});
206206

207+
it('classifies HTTP 202 as indeterminate, not soft-404', async () => {
208+
server.use(
209+
http.get(
210+
'http://test.local/docs/page1-afdocs-nonexistent-8f3a',
211+
() => new HttpResponse(null, { status: 202 }),
212+
),
213+
http.get(
214+
'http://test.local/docs/page2-afdocs-nonexistent-8f3a',
215+
() => new HttpResponse('Not Found', { status: 404 }),
216+
),
217+
);
218+
219+
const content = `# Docs\n## Links\n- [Page 1](http://test.local/docs/page1): First\n- [Page 2](http://test.local/docs/page2): Second\n`;
220+
const result = await check.run(makeCtx(content));
221+
expect(result.status).toBe('pass');
222+
expect(result.details?.soft404Count).toBe(0);
223+
expect(result.details?.correctErrorCount).toBe(1);
224+
expect(result.details?.indeterminateCount).toBe(1);
225+
const pageResults = result.details?.pageResults as Array<{
226+
classification: string;
227+
indeterminateReason?: string;
228+
}>;
229+
const indet = pageResults.find((p) => p.classification === 'indeterminate');
230+
expect(indet?.indeterminateReason).toContain('202');
231+
});
232+
233+
it('classifies 5xx as indeterminate', async () => {
234+
server.use(
235+
http.get(
236+
'http://test.local/docs/page1-afdocs-nonexistent-8f3a',
237+
() => new HttpResponse('Server Error', { status: 503 }),
238+
),
239+
);
240+
241+
const content = `# Docs\n## Links\n- [Page 1](http://test.local/docs/page1): First\n`;
242+
const result = await check.run(makeCtx(content));
243+
const pageResults = result.details?.pageResults as Array<{ classification: string }>;
244+
expect(pageResults[0].classification).toBe('indeterminate');
245+
expect(result.details?.indeterminateCount).toBe(1);
246+
});
247+
248+
it('warns when all responses are indeterminate', async () => {
249+
server.use(
250+
http.get(
251+
'http://test.local/docs/page1-afdocs-nonexistent-8f3a',
252+
() => new HttpResponse(null, { status: 202 }),
253+
),
254+
http.get(
255+
'http://test.local/docs/page2-afdocs-nonexistent-8f3a',
256+
() => new HttpResponse('Server Error', { status: 503 }),
257+
),
258+
);
259+
260+
const content = `# Docs\n## Links\n- [Page 1](http://test.local/docs/page1): First\n- [Page 2](http://test.local/docs/page2): Second\n`;
261+
const result = await check.run(makeCtx(content));
262+
expect(result.status).toBe('warn');
263+
expect(result.message).toContain('Could not determine');
264+
expect(result.details?.indeterminateCount).toBe(2);
265+
});
266+
207267
it('strips fragments from test URLs', async () => {
208268
server.use(
209269
http.get(

test/unit/scoring/proportions.test.ts

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,39 @@ describe('proportions', () => {
120120
expect(result!.proportion).toBeCloseTo(0.667, 2);
121121
expect(result!.tested).toBe(3);
122122
});
123+
124+
it('skips indeterminate items from the proportion', () => {
125+
const result = getCheckProportion(
126+
makeResult('http-status-codes', 'fail', {
127+
pageResults: [
128+
{ url: '/a', classification: 'correct-error', status: 404 },
129+
{ url: '/b', classification: 'soft-404', status: 200 },
130+
{ url: '/c', classification: 'indeterminate', status: 202 },
131+
],
132+
}),
133+
makeWeight(7),
134+
);
135+
// 1 pass, 1 fail, 1 skipped = 1/2
136+
expect(result!.proportion).toBeCloseTo(0.5, 2);
137+
expect(result!.tested).toBe(2);
138+
});
139+
140+
it('falls back to top-level warn status when all items are indeterminate', () => {
141+
// Extractor returns undefined for all-indeterminate, so scoring falls
142+
// back to the top-level 'warn' status. The site gets warn-coefficient
143+
// credit for the check rather than a hard 0 (we couldn't measure).
144+
const result = getCheckProportion(
145+
makeResult('http-status-codes', 'warn', {
146+
pageResults: [
147+
{ url: '/a', classification: 'indeterminate', status: 202 },
148+
{ url: '/b', classification: 'indeterminate', status: 503 },
149+
],
150+
}),
151+
makeWeight(7),
152+
);
153+
expect(result!.proportion).toBe(0.5);
154+
expect(result!.tested).toBe(1);
155+
});
123156
});
124157

125158
describe('markdown-url-support', () => {

0 commit comments

Comments
 (0)