Skip to content

Commit 62cc3d8

Browse files
authored
Merge pull request #45 from mvvmm/feat/auto-detect-md-url-order
feat: auto-detect .md URL pattern to reduce wasted requests
2 parents f727c79 + 841a7e3 commit 62cc3d8

2 files changed

Lines changed: 116 additions & 2 deletions

File tree

src/checks/markdown-availability/markdown-url-support.ts

Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,41 @@ interface PageResult {
1414
error?: string;
1515
}
1616

17+
/**
18+
* Detect whether the site prefers `page.md` (direct) or `page/index.md` (index)
19+
* based on which candidate succeeded in previous results.
20+
* Returns 'index' if `page/index.md` wins, 'direct' if `page.md` wins, or null if
21+
* there's no clear winner yet.
22+
*/
23+
function detectPreferredMdForm(results: PageResult[]): 'direct' | 'index' | null {
24+
let directWins = 0;
25+
let indexWins = 0;
26+
for (const r of results) {
27+
if (!r.supported || !r.mdUrl) continue;
28+
if (r.mdUrl.endsWith('/index.md') || r.mdUrl.endsWith('/index.mdx')) {
29+
indexWins++;
30+
} else {
31+
directWins++;
32+
}
33+
}
34+
const total = directWins + indexWins;
35+
if (total < 2) return null;
36+
if (indexWins / total >= 0.8) return 'index';
37+
if (directWins / total >= 0.8) return 'direct';
38+
return null;
39+
}
40+
41+
/**
42+
* Reorder toMdUrls() candidates based on the detected site preference.
43+
* 'index' puts `page/index.md` first; 'direct' keeps the default order (`page.md` first).
44+
*/
45+
function orderCandidates(candidates: string[], preference: 'direct' | 'index' | null): string[] {
46+
if (preference === 'index') {
47+
return [...candidates].reverse();
48+
}
49+
return candidates;
50+
}
51+
1752
async function check(ctx: CheckContext): Promise<CheckResult> {
1853
const id = 'markdown-url-support';
1954
const category = 'markdown-availability';
@@ -27,6 +62,7 @@ async function check(ctx: CheckContext): Promise<CheckResult> {
2762

2863
const results: PageResult[] = [];
2964
const concurrency = ctx.options.maxConcurrency;
65+
let mdFormPreference: 'direct' | 'index' | null = null;
3066

3167
for (let i = 0; i < pageUrls.length; i += concurrency) {
3268
const batch = pageUrls.slice(i, i + concurrency);
@@ -38,8 +74,9 @@ async function check(ctx: CheckContext): Promise<CheckResult> {
3874
return { url, mdUrl: url, supported: false, skipped: true, status: 0 };
3975
}
4076
const alreadyMd = /\.mdx?$/i.test(new URL(url).pathname);
77+
const ordered = orderCandidates(candidates, mdFormPreference);
4178
let lastError: string | undefined;
42-
for (const mdUrl of candidates) {
79+
for (const mdUrl of ordered) {
4380
try {
4481
const response = await ctx.http.fetch(mdUrl);
4582
const body = await response.text();
@@ -62,7 +99,7 @@ async function check(ctx: CheckContext): Promise<CheckResult> {
6299
}
63100
return {
64101
url,
65-
mdUrl: candidates[0],
102+
mdUrl: ordered[0],
66103
supported: false,
67104
alreadyMd,
68105
status: 0,
@@ -71,6 +108,13 @@ async function check(ctx: CheckContext): Promise<CheckResult> {
71108
}),
72109
);
73110
results.push(...batchResults);
111+
112+
// After each batch, re-evaluate the preferred .md URL form.
113+
// Once a clear pattern emerges (80%+ one form), subsequent batches
114+
// try the preferred form first, saving one request per page.
115+
if (mdFormPreference === null) {
116+
mdFormPreference = detectPreferredMdForm(results);
117+
}
74118
}
75119

76120
const testedResults = results.filter((r) => !r.skipped);

test/unit/checks/markdown-url-support.test.ts

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -409,4 +409,74 @@ describe('markdown-url-support', () => {
409409
expect(cached?.markdown?.content).toBe(mdContent);
410410
expect(cached?.markdown?.source).toBe('md-url');
411411
});
412+
413+
it('auto-detects page/index.md preference and tries it first in later batches', async () => {
414+
// 3 pages, all served at page/index.md (not page.md). With concurrency=1,
415+
// each page is a separate batch, so after page 1+2 the check should
416+
// detect the page/index.md pattern and try it first for page 3.
417+
const md = '# Page\n\nContent here.';
418+
const requestLog: string[] = [];
419+
420+
server.use(
421+
// page.md forms — all 404
422+
http.get('http://test.local/docs/a.md', () => {
423+
requestLog.push('/docs/a.md');
424+
return new HttpResponse('Not found', { status: 404 });
425+
}),
426+
http.get('http://test.local/docs/b.md', () => {
427+
requestLog.push('/docs/b.md');
428+
return new HttpResponse('Not found', { status: 404 });
429+
}),
430+
http.get('http://test.local/docs/c.md', () => {
431+
requestLog.push('/docs/c.md');
432+
return new HttpResponse('Not found', { status: 404 });
433+
}),
434+
// index.md forms — all succeed
435+
http.get('http://test.local/docs/a/index.md', () => {
436+
requestLog.push('/docs/a/index.md');
437+
return new HttpResponse(md, {
438+
status: 200,
439+
headers: { 'Content-Type': 'text/markdown' },
440+
});
441+
}),
442+
http.get('http://test.local/docs/b/index.md', () => {
443+
requestLog.push('/docs/b/index.md');
444+
return new HttpResponse(md, {
445+
status: 200,
446+
headers: { 'Content-Type': 'text/markdown' },
447+
});
448+
}),
449+
http.get('http://test.local/docs/c/index.md', () => {
450+
requestLog.push('/docs/c/index.md');
451+
return new HttpResponse(md, {
452+
status: 200,
453+
headers: { 'Content-Type': 'text/markdown' },
454+
});
455+
}),
456+
);
457+
458+
const content = `# Docs
459+
> Summary
460+
## Links
461+
- [A](http://test.local/docs/a): A
462+
- [B](http://test.local/docs/b): B
463+
- [C](http://test.local/docs/c): C
464+
`;
465+
const ctx = makeCtx({ content });
466+
// Force concurrency=1 so each page is its own batch
467+
ctx.options.maxConcurrency = 1;
468+
const result = await check.run(ctx);
469+
470+
expect(result.status).toBe('pass');
471+
472+
// Pages A and B: tried page.md first (default order), got 404, then page/index.md
473+
// Page C: after detecting page/index.md preference, should try page/index.md first
474+
// So /docs/c.md should NOT appear in the request log
475+
expect(requestLog).toContain('/docs/a.md');
476+
expect(requestLog).toContain('/docs/a/index.md');
477+
expect(requestLog).toContain('/docs/b.md');
478+
expect(requestLog).toContain('/docs/b/index.md');
479+
expect(requestLog).not.toContain('/docs/c.md');
480+
expect(requestLog).toContain('/docs/c/index.md');
481+
});
412482
});

0 commit comments

Comments
 (0)