Skip to content

Commit 2537f0e

Browse files
committed
Refactor shared logic, handle sitemap, surface warnings
1 parent f30d06d commit 2537f0e

20 files changed

Lines changed: 1320 additions & 131 deletions

README.md

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ Test your documentation site against the [Agent-Friendly Documentation Spec](htt
77

88
Agents don't use docs like humans. They hit truncation limits, get walls of CSS instead of content, can't follow cross-host redirects, and don't know about quality-of-life improvements like `llms.txt` or `.md` docs pages that would make life swell. Maybe this is because the industry has lacked guidance - until now.
99

10-
afdocs runs 21 checks across 8 categories to evaluate how well your docs serve agent consumers.
10+
afdocs runs 21 checks across 8 categories to evaluate how well your docs serve agent consumers. 7 are fully implemented; the rest return `skip` until completed.
1111

1212
## Quick start
1313

@@ -27,8 +27,12 @@ llms-txt
2727
✓ llms-txt-links-resolve: All 50 tested links resolve (177 total links)
2828
✓ llms-txt-links-markdown: 50/50 links point to markdown content (100%)
2929
30+
Markdown Availability
31+
✗ content-negotiation: Server ignores Accept: text/markdown header (0/50 sampled pages return markdown)
32+
✗ markdown-url-support: No sampled pages support .md URLs (0/50 tested)
33+
3034
Summary
31-
5 passed, 16 skipped (21 total)
35+
5 passed, 2 failed, 14 skipped (21 total)
3236
```
3337

3438
## Install
@@ -128,7 +132,7 @@ describe('agent-friendliness', () => {
128132

129133
## Checks
130134

131-
21 checks across 8 categories. Checks marked with \* are not yet implemented and will return `skip`.
135+
21 checks across 8 categories. Checks marked with \* are stub implementations that return `skip`.
132136

133137
### Category 1: llms.txt
134138

@@ -142,10 +146,10 @@ describe('agent-friendliness', () => {
142146

143147
### Category 2: Markdown Availability
144148

145-
| Check | Description |
146-
| ------------------------- | ------------------------------------------------- |
147-
| `markdown-url-support` \* | Whether `.md` URL variants return markdown |
148-
| `content-negotiation` \* | Whether the server honors `Accept: text/markdown` |
149+
| Check | Description |
150+
| ---------------------- | ------------------------------------------------- |
151+
| `markdown-url-support` | Whether `.md` URL variants return markdown |
152+
| `content-negotiation` | Whether the server honors `Accept: text/markdown` |
149153

150154
### Category 3: Page Size and Truncation Risk
151155

src/checks/llms-txt/llms-txt-exists.ts

Lines changed: 91 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ async function checkLlmsTxtExists(ctx: CheckContext): Promise<CheckResult> {
3232
status: number;
3333
redirected: boolean;
3434
finalUrl?: string;
35+
error?: string;
3536
}> = [];
3637

3738
for (const url of candidates) {
@@ -50,8 +51,8 @@ async function checkLlmsTxtExists(ctx: CheckContext): Promise<CheckResult> {
5051
if (response.ok && isText) {
5152
const content = await response.text();
5253
// Check it's not an HTML error page
53-
const looksLikeHtml =
54-
content.trimStart().startsWith('<!') || content.trimStart().startsWith('<html');
54+
const trimmed = content.trimStart().toLowerCase();
55+
const looksLikeHtml = trimmed.startsWith('<!') || trimmed.startsWith('<html');
5556
if (!looksLikeHtml && content.trim().length > 0) {
5657
const crossHost = response.redirected && isCrossHostRedirect(url, response.url);
5758
discovered.push({
@@ -64,23 +65,106 @@ async function checkLlmsTxtExists(ctx: CheckContext): Promise<CheckResult> {
6465
});
6566
}
6667
}
67-
} catch {
68-
checkedUrls.push({ url, status: 0, redirected: false });
68+
} catch (err) {
69+
checkedUrls.push({
70+
url,
71+
status: 0,
72+
redirected: false,
73+
error: err instanceof Error ? err.message : String(err),
74+
});
6975
}
7076
}
7177

78+
// When no llms.txt found, check if any candidates redirected cross-host.
79+
// If so, try {redirected_origin}/llms.txt as a fallback.
80+
const redirectedOrigins: string[] = [];
81+
if (discovered.length === 0) {
82+
const checkedSet = new Set(checkedUrls.map((u) => u.url));
83+
const seenOrigins = new Set<string>();
84+
for (const checked of checkedUrls) {
85+
if (checked.finalUrl && isCrossHostRedirect(checked.url, checked.finalUrl)) {
86+
try {
87+
const redirectedOrigin = new URL(checked.finalUrl).origin;
88+
const fallbackUrl = `${redirectedOrigin}/llms.txt`;
89+
if (!checkedSet.has(fallbackUrl) && !seenOrigins.has(redirectedOrigin)) {
90+
seenOrigins.add(redirectedOrigin);
91+
checkedSet.add(fallbackUrl);
92+
redirectedOrigins.push(redirectedOrigin);
93+
}
94+
} catch {
95+
/* malformed URL */
96+
}
97+
}
98+
}
99+
100+
for (const redirectedOrigin of redirectedOrigins) {
101+
const fallbackUrl = `${redirectedOrigin}/llms.txt`;
102+
try {
103+
const response = await ctx.http.fetch(fallbackUrl);
104+
const contentType = response.headers.get('content-type') ?? '';
105+
const isText = contentType.includes('text/');
106+
107+
checkedUrls.push({
108+
url: fallbackUrl,
109+
status: response.status,
110+
redirected: response.redirected,
111+
finalUrl: response.redirected ? response.url : undefined,
112+
});
113+
114+
if (response.ok && isText) {
115+
const content = await response.text();
116+
const trimmed = content.trimStart().toLowerCase();
117+
const looksLikeHtml = trimmed.startsWith('<!') || trimmed.startsWith('<html');
118+
if (!looksLikeHtml && content.trim().length > 0) {
119+
discovered.push({
120+
url: fallbackUrl,
121+
content,
122+
status: response.status,
123+
redirected: false,
124+
crossHostRedirect: true,
125+
});
126+
}
127+
}
128+
} catch (err) {
129+
checkedUrls.push({
130+
url: fallbackUrl,
131+
status: 0,
132+
redirected: false,
133+
error: err instanceof Error ? err.message : String(err),
134+
});
135+
}
136+
}
137+
}
138+
139+
const fetchErrors = checkedUrls.filter((u) => u.error).length;
140+
const rateLimited = checkedUrls.filter((u) => u.status === 429).length;
141+
142+
const suffix =
143+
(fetchErrors > 0 ? `; ${fetchErrors} failed to fetch` : '') +
144+
(rateLimited > 0 ? `; ${rateLimited} rate-limited (HTTP 429)` : '');
145+
72146
// Store discovered files for downstream checks
73147
const details: Record<string, unknown> = {
74148
candidateUrls: checkedUrls,
75149
discoveredFiles: discovered,
150+
fetchErrors,
151+
rateLimited,
76152
};
77153

154+
if (redirectedOrigins.length > 0) {
155+
details.redirectedOrigins = redirectedOrigins;
156+
}
157+
78158
if (discovered.length === 0) {
159+
const redirectNote =
160+
redirectedOrigins.length > 0
161+
? `; candidates redirected cross-host to ${redirectedOrigins.join(', ')} (agents can't follow cross-host redirects)`
162+
: '';
79163
return {
80164
id: 'llms-txt-exists',
81165
category: 'llms-txt',
82166
status: 'fail',
83-
message: `No llms.txt found at any candidate location (${candidates.join(', ')})`,
167+
message: `No llms.txt found at any candidate location (${candidates.join(', ')})${redirectNote}${suffix}`,
84168
details,
85169
};
86170
}
@@ -92,8 +176,7 @@ async function checkLlmsTxtExists(ctx: CheckContext): Promise<CheckResult> {
92176
id: 'llms-txt-exists',
93177
category: 'llms-txt',
94178
status: 'warn',
95-
message:
96-
'llms.txt found but only reachable via cross-host redirect (agents may not follow it)',
179+
message: `llms.txt found but only reachable via cross-host redirect (agents may not follow it)${suffix}`,
97180
details,
98181
};
99182
}
@@ -110,7 +193,7 @@ async function checkLlmsTxtExists(ctx: CheckContext): Promise<CheckResult> {
110193
id: 'llms-txt-exists',
111194
category: 'llms-txt',
112195
status: 'pass',
113-
message: `llms.txt found at ${discovered.length} location(s)`,
196+
message: `llms.txt found at ${discovered.length} location(s)${suffix}`,
114197
details,
115198
};
116199
}

src/checks/llms-txt/llms-txt-links-markdown.ts

Lines changed: 49 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
11
import { registerCheck } from '../registry.js';
22
import { extractMarkdownLinks } from './llms-txt-valid.js';
3+
import { toMdUrls } from '../../helpers/to-md-urls.js';
34
import type { CheckContext, CheckResult, DiscoveredFile } from '../../types.js';
45

56
interface LinkMarkdownResult {
67
url: string;
78
hasMarkdownExtension: boolean;
89
servesMarkdown: boolean;
10+
status?: number;
911
mdVariantAvailable?: boolean;
12+
error?: string;
1013
}
1114

1215
function hasMarkdownExtension(url: string): boolean {
@@ -55,7 +58,8 @@ async function checkLlmsTxtLinksMarkdown(ctx: CheckContext): Promise<CheckResult
5558
// Sample if too many
5659
let linksToTest = Array.from(allLinks);
5760
const totalLinks = linksToTest.length;
58-
if (totalLinks > ctx.options.maxLinksToTest) {
61+
const wasSampled = totalLinks > ctx.options.maxLinksToTest;
62+
if (wasSampled) {
5963
for (let i = linksToTest.length - 1; i > 0; i--) {
6064
const j = Math.floor(Math.random() * (i + 1));
6165
[linksToTest[i], linksToTest[j]] = [linksToTest[j], linksToTest[i]];
@@ -84,33 +88,48 @@ async function checkLlmsTxtLinksMarkdown(ctx: CheckContext): Promise<CheckResult
8488
});
8589
const contentType = response.headers.get('content-type') ?? '';
8690
if (contentType.includes('text/markdown')) {
87-
return { url, hasMarkdownExtension: false, servesMarkdown: true };
91+
return {
92+
url,
93+
hasMarkdownExtension: false,
94+
servesMarkdown: true,
95+
status: response.status,
96+
};
8897
}
8998

90-
// Try .md variant
91-
const mdUrl = url.replace(/\/?$/, '.md');
92-
try {
93-
const mdResponse = await ctx.http.fetch(mdUrl, { method: 'HEAD' });
94-
if (mdResponse.ok) {
95-
return {
96-
url,
97-
hasMarkdownExtension: false,
98-
servesMarkdown: false,
99-
mdVariantAvailable: true,
100-
};
99+
// Try .md variant candidates
100+
const candidates = toMdUrls(url);
101+
for (const mdUrl of candidates) {
102+
try {
103+
const mdResponse = await ctx.http.fetch(mdUrl, { method: 'HEAD' });
104+
if (mdResponse.ok) {
105+
return {
106+
url,
107+
hasMarkdownExtension: false,
108+
servesMarkdown: false,
109+
status: response.status,
110+
mdVariantAvailable: true,
111+
};
112+
}
113+
} catch {
114+
// Try next candidate
101115
}
102-
} catch {
103-
// .md variant not available
104116
}
105117

106118
return {
107119
url,
108120
hasMarkdownExtension: false,
109121
servesMarkdown: false,
122+
status: response.status,
110123
mdVariantAvailable: false,
111124
};
112-
} catch {
113-
return { url, hasMarkdownExtension: false, servesMarkdown: false };
125+
} catch (err) {
126+
return {
127+
url,
128+
hasMarkdownExtension: false,
129+
servesMarkdown: false,
130+
status: 0,
131+
error: err instanceof Error ? err.message : String(err),
132+
};
114133
}
115134
}),
116135
);
@@ -120,22 +139,32 @@ async function checkLlmsTxtLinksMarkdown(ctx: CheckContext): Promise<CheckResult
120139
const markdownLinks = results.filter((r) => r.hasMarkdownExtension || r.servesMarkdown).length;
121140
const mdVariantsAvailable = results.filter((r) => r.mdVariantAvailable).length;
122141
const markdownRate = results.length > 0 ? markdownLinks / results.length : 0;
142+
const fetchErrors = results.filter((r) => r.error).length;
143+
const rateLimited = results.filter((r) => r.status === 429).length;
144+
145+
const linkLabel = wasSampled ? 'sampled links' : 'links';
146+
const suffix =
147+
(fetchErrors > 0 ? `; ${fetchErrors} failed to fetch` : '') +
148+
(rateLimited > 0 ? `; ${rateLimited} rate-limited (HTTP 429)` : '');
123149

124150
const details: Record<string, unknown> = {
125151
totalLinks,
126152
testedLinks: results.length,
153+
sampled: wasSampled,
127154
markdownLinks,
128155
htmlLinks: results.length - markdownLinks,
129156
mdVariantsAvailable,
130157
markdownRate: Math.round(markdownRate * 100),
158+
fetchErrors,
159+
rateLimited,
131160
};
132161

133162
if (markdownRate >= 0.9) {
134163
return {
135164
id: 'llms-txt-links-markdown',
136165
category: 'llms-txt',
137166
status: 'pass',
138-
message: `${markdownLinks}/${results.length} links point to markdown content (${Math.round(markdownRate * 100)}%)`,
167+
message: `${markdownLinks}/${results.length} ${linkLabel} point to markdown content (${Math.round(markdownRate * 100)}%)${suffix}`,
139168
details,
140169
};
141170
}
@@ -145,7 +174,7 @@ async function checkLlmsTxtLinksMarkdown(ctx: CheckContext): Promise<CheckResult
145174
id: 'llms-txt-links-markdown',
146175
category: 'llms-txt',
147176
status: 'warn',
148-
message: `Links point to HTML, but ${mdVariantsAvailable} have .md variants available`,
177+
message: `Links point to HTML, but ${mdVariantsAvailable} have .md variants available${suffix}`,
149178
details,
150179
};
151180
}
@@ -154,7 +183,7 @@ async function checkLlmsTxtLinksMarkdown(ctx: CheckContext): Promise<CheckResult
154183
id: 'llms-txt-links-markdown',
155184
category: 'llms-txt',
156185
status: 'fail',
157-
message: 'Links point to HTML and no markdown alternatives detected',
186+
message: `Links point to HTML and no markdown alternatives detected${suffix}`,
158187
details,
159188
};
160189
}

src/checks/llms-txt/llms-txt-links-resolve.ts

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,13 @@ async function checkLlmsTxtLinksResolve(ctx: CheckContext): Promise<CheckResult>
8989
const resolved = results.filter((r) => r.ok).length;
9090
const broken = results.filter((r) => !r.ok);
9191
const resolveRate = resolved / results.length;
92+
const fetchErrors = results.filter((r) => r.error).length;
93+
const rateLimited = results.filter((r) => r.status === 429).length;
94+
95+
const linkLabel = wasSampled ? 'sampled links' : 'links';
96+
const suffix =
97+
(fetchErrors > 0 ? `; ${fetchErrors} failed to fetch` : '') +
98+
(rateLimited > 0 ? `; ${rateLimited} rate-limited (HTTP 429)` : '');
9299

93100
const details: Record<string, unknown> = {
94101
totalLinks,
@@ -97,14 +104,16 @@ async function checkLlmsTxtLinksResolve(ctx: CheckContext): Promise<CheckResult>
97104
resolved,
98105
broken: broken.map((b) => ({ url: b.url, status: b.status, error: b.error })),
99106
resolveRate: Math.round(resolveRate * 100),
107+
fetchErrors,
108+
rateLimited,
100109
};
101110

102111
if (resolveRate === 1) {
103112
return {
104113
id: 'llms-txt-links-resolve',
105114
category: 'llms-txt',
106115
status: 'pass',
107-
message: `All ${results.length} tested links resolve (${totalLinks} total links)`,
116+
message: `All ${results.length} tested ${linkLabel} resolve (${totalLinks} total links)${suffix}`,
108117
details,
109118
};
110119
}
@@ -114,7 +123,7 @@ async function checkLlmsTxtLinksResolve(ctx: CheckContext): Promise<CheckResult>
114123
id: 'llms-txt-links-resolve',
115124
category: 'llms-txt',
116125
status: 'warn',
117-
message: `${resolved}/${results.length} links resolve (${Math.round(resolveRate * 100)}%); ${broken.length} broken`,
126+
message: `${resolved}/${results.length} ${linkLabel} resolve (${Math.round(resolveRate * 100)}%); ${broken.length} broken${suffix}`,
118127
details,
119128
};
120129
}
@@ -123,7 +132,7 @@ async function checkLlmsTxtLinksResolve(ctx: CheckContext): Promise<CheckResult>
123132
id: 'llms-txt-links-resolve',
124133
category: 'llms-txt',
125134
status: 'fail',
126-
message: `Only ${resolved}/${results.length} links resolve (${Math.round(resolveRate * 100)}%); ${broken.length} broken`,
135+
message: `Only ${resolved}/${results.length} ${linkLabel} resolve (${Math.round(resolveRate * 100)}%); ${broken.length} broken${suffix}`,
127136
details,
128137
};
129138
}

0 commit comments

Comments
 (0)