Skip to content

Commit 75cd0e0

Browse files
authored
Merge pull request #41 from philip/canonical-origin-rewrite
feat: add --canonical-origin flag for preview/staging testing
2 parents 7fb6ee1 + 38439ef commit 75cd0e0

7 files changed

Lines changed: 147 additions & 4 deletions

File tree

docs/reference/cli.md

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -137,10 +137,11 @@ afdocs check https://docs.example.com --doc-locale ja --doc-version 2.x
137137

138138
### Request behavior
139139

140-
| Flag | Default | Description |
141-
| ----------------------- | ------- | -------------------------------------- |
142-
| `--max-concurrency <n>` | `3` | Maximum concurrent HTTP requests |
143-
| `--request-delay <ms>` | `200` | Delay between requests in milliseconds |
140+
| Flag | Default | Description |
141+
| -------------------------- | ------- | ------------------------------------------- |
142+
| `--max-concurrency <n>` | `3` | Maximum concurrent HTTP requests |
143+
| `--request-delay <ms>` | `200` | Delay between requests in milliseconds |
144+
| `--canonical-origin <url>` | | The production domain your content links to |
144145

145146
AFDocs enforces delays between requests and caps concurrent connections to avoid overloading your server. Adjust these if you need gentler or faster runs:
146147

@@ -152,6 +153,13 @@ afdocs check https://docs.example.com --request-delay 500 --max-concurrency 1
152153
afdocs check https://docs.example.com --request-delay 50 --max-concurrency 10
153154
```
154155

156+
Use `--canonical-origin` when your site's URLs in `sitemap.xml` and `llms.txt` don't match the domain you're testing, such as preview deployments or localhost.
157+
158+
```bash
159+
# Test a preview deployment
160+
afdocs check https://preview-xyz-example.app/docs --canonical-origin https://example.com
161+
```
162+
155163
### Size thresholds
156164

157165
| Flag | Default | Description |

docs/reference/config-file.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ options:
2727
requestDelay: 100
2828
preferredLocale: en
2929
preferredVersion: v3
30+
canonicalOrigin: https://example.com
3031
thresholds:
3132
pass: 50000
3233
fail: 100000
@@ -63,6 +64,7 @@ Override default runner options. All fields are optional:
6364
| `requestTimeout` | `30000` | Timeout for individual HTTP requests in milliseconds |
6465
| `preferredLocale` | auto-detect | Preferred locale for URL discovery (e.g. `en`, `fr`, `ja`) |
6566
| `preferredVersion` | auto-detect | Preferred version for URL discovery (e.g. `v3`, `2.x`) |
67+
| `canonicalOrigin` | | The production domain your content links to |
6668
| `thresholds.pass` | `50000` | Page size pass threshold in characters |
6769
| `thresholds.fail` | `100000` | Page size fail threshold in characters |
6870

src/cli/commands/check.ts

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,10 @@ export function registerCheckCommand(program: Command): void {
3737
.option('-v, --verbose', 'Show per-page details for checks with issues')
3838
.option('--fixes', 'Show fix suggestions for warn/fail checks')
3939
.option('--score', 'Include scoring data in JSON output')
40+
.option(
41+
'--canonical-origin <url>',
42+
'Rewrite this origin to the target in fetched content (for preview/staging testing)',
43+
)
4044
.action(async (rawUrl: string | undefined, opts: Record<string, unknown>) => {
4145
// Load config: explicit path or auto-discover
4246
let config;
@@ -170,6 +174,19 @@ export function registerCheckCommand(program: Command): void {
170174
const preferredVersion =
171175
(opts.docVersion as string | undefined) ?? config?.options?.preferredVersion;
172176

177+
let canonicalOrigin: string | undefined;
178+
const rawCanonical =
179+
(opts.canonicalOrigin as string | undefined) ?? config?.options?.canonicalOrigin;
180+
if (rawCanonical) {
181+
try {
182+
canonicalOrigin = new URL(normalizeUrl(rawCanonical)).origin;
183+
} catch {
184+
process.stderr.write(`Error: Invalid --canonical-origin URL "${rawCanonical}".\n`);
185+
process.exitCode = 1;
186+
return;
187+
}
188+
}
189+
173190
const report = await runChecks(url, {
174191
checkIds,
175192
maxConcurrency,
@@ -183,6 +200,7 @@ export function registerCheckCommand(program: Command): void {
183200
},
184201
...(preferredLocale && { preferredLocale }),
185202
...(preferredVersion && { preferredVersion }),
203+
...(canonicalOrigin && { canonicalOrigin }),
186204
});
187205

188206
let output: string;

src/http.ts

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,23 @@ interface RateLimitedHttpClientOptions {
1111
requestDelay: number;
1212
requestTimeout: number;
1313
maxConcurrency: number;
14+
canonicalOrigin?: string;
15+
targetOrigin?: string;
1416
}
1517

1618
const MAX_RETRIES = 2;
1719

20+
function escapeRegExp(s: string): string {
21+
return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
22+
}
23+
1824
export function createHttpClient(options: RateLimitedHttpClientOptions): HttpClient {
1925
let lastRequestTime = 0;
2026
let activeRequests = 0;
27+
const originPattern =
28+
options.canonicalOrigin && options.targetOrigin
29+
? new RegExp(escapeRegExp(options.canonicalOrigin), 'g')
30+
: null;
2131

2232
async function waitForSlot(): Promise<void> {
2333
// Wait for concurrency slot
@@ -66,6 +76,24 @@ export function createHttpClient(options: RateLimitedHttpClientOptions): HttpCli
6676
}
6777
}
6878

79+
if (originPattern && options.targetOrigin) {
80+
const ct = response.headers.get('content-type') ?? '';
81+
if (/text|xml|json|markdown/.test(ct)) {
82+
const body = await response.text();
83+
originPattern.lastIndex = 0;
84+
const rewritten = body.replace(originPattern, options.targetOrigin);
85+
return {
86+
ok: response.ok,
87+
status: response.status,
88+
statusText: response.statusText,
89+
headers: response.headers,
90+
url: response.url,
91+
redirected: response.redirected,
92+
text: async () => rewritten,
93+
} as HttpResponse;
94+
}
95+
}
96+
6997
return response as HttpResponse;
7098
} finally {
7199
activeRequests--;

src/runner.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@ export function createContext(baseUrl: string, options?: Partial<RunnerOptions>)
5353
requestDelay: merged.requestDelay,
5454
requestTimeout: merged.requestTimeout,
5555
maxConcurrency: merged.maxConcurrency,
56+
canonicalOrigin: merged.canonicalOrigin,
57+
targetOrigin: merged.canonicalOrigin ? url.origin : undefined,
5658
}),
5759
options: merged,
5860
pageCache: new Map(),

src/types.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,8 @@ export interface CheckOptions {
8282
preferredLocale?: string;
8383
/** Preferred version for URL discovery (e.g. 'v3', '2.x', 'latest'). Overrides auto-detection from baseUrl. */
8484
preferredVersion?: string;
85+
/** Canonical origin to rewrite in fetched content (for preview/staging testing). */
86+
canonicalOrigin?: string;
8587
}
8688

8789
export interface SizeThresholds {

test/unit/helpers/http.test.ts

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,4 +89,87 @@ describe('createHttpClient', () => {
8989
expect(response.status).toBe(429);
9090
expect(callCount).toBe(1);
9191
});
92+
93+
describe('origin rewriting', () => {
94+
function makeTextResponse(
95+
body: string,
96+
opts: { status?: number; contentType?: string; url?: string; redirected?: boolean } = {},
97+
): Response {
98+
const status = opts.status ?? 200;
99+
return {
100+
ok: status >= 200 && status < 300,
101+
status,
102+
statusText: 'OK',
103+
headers: new Headers(opts.contentType ? { 'content-type': opts.contentType } : undefined),
104+
url: opts.url ?? 'http://preview.local/docs/llms.txt',
105+
redirected: opts.redirected ?? false,
106+
text: async () => body,
107+
} as unknown as Response;
108+
}
109+
110+
it('rewrites all occurrences of canonicalOrigin in text responses', async () => {
111+
const body = [
112+
'- [Guide](https://prod.example.com/docs/guide)',
113+
'- [API](https://prod.example.com/docs/api)',
114+
'All pages: https://prod.example.com/docs/llms.txt',
115+
].join('\n');
116+
globalThis.fetch = vi.fn(async () => makeTextResponse(body, { contentType: 'text/plain' }));
117+
118+
const client = createHttpClient({
119+
requestDelay: 0,
120+
requestTimeout: 5000,
121+
maxConcurrency: 10,
122+
canonicalOrigin: 'https://prod.example.com',
123+
targetOrigin: 'https://preview.local',
124+
});
125+
const response = await client.fetch('http://preview.local/docs/llms.txt');
126+
const text = await response.text();
127+
128+
expect(text).not.toContain('prod.example.com');
129+
expect(text).toContain('https://preview.local/docs/guide');
130+
expect(text).toContain('https://preview.local/docs/api');
131+
expect(text).toContain('https://preview.local/docs/llms.txt');
132+
});
133+
134+
it('preserves url and redirected from original response', async () => {
135+
globalThis.fetch = vi.fn(async () =>
136+
makeTextResponse('https://prod.example.com/page', {
137+
contentType: 'text/plain',
138+
url: 'http://preview.local/docs/llms.txt',
139+
redirected: true,
140+
}),
141+
);
142+
143+
const client = createHttpClient({
144+
requestDelay: 0,
145+
requestTimeout: 5000,
146+
maxConcurrency: 10,
147+
canonicalOrigin: 'https://prod.example.com',
148+
targetOrigin: 'https://preview.local',
149+
});
150+
const response = await client.fetch('http://preview.local/docs/llms.txt');
151+
152+
expect(response.url).toBe('http://preview.local/docs/llms.txt');
153+
expect(response.redirected).toBe(true);
154+
});
155+
156+
it('skips rewrite for non-text content types', async () => {
157+
const original = 'https://prod.example.com/binary-data';
158+
globalThis.fetch = vi.fn(async () =>
159+
makeTextResponse(original, { contentType: 'application/octet-stream' }),
160+
);
161+
162+
const client = createHttpClient({
163+
requestDelay: 0,
164+
requestTimeout: 5000,
165+
maxConcurrency: 10,
166+
canonicalOrigin: 'https://prod.example.com',
167+
targetOrigin: 'https://preview.local',
168+
});
169+
const response = await client.fetch('http://preview.local/file.bin');
170+
const text = await response.text();
171+
172+
expect(text).toBe(original);
173+
});
174+
});
92175
});

0 commit comments

Comments
 (0)