11import { registerCheck } from '../registry.js' ;
22import { looksLikeMarkdown , looksLikeHtml } from '../../helpers/detect-markdown.js' ;
3+ import { isSoft404Body } from '../../helpers/detect-soft-404.js' ;
34import { discoverAndSamplePages } from '../../helpers/get-page-urls.js' ;
4- import { isNonPageUrl } from '../../helpers/to-md-urls.js' ;
5+ import { isNonPageUrl , isMdUrl , toHtmlUrl } from '../../helpers/to-md-urls.js' ;
56import type { CheckContext , CheckResult } from '../../types.js' ;
67
78type Classification = 'markdown-with-correct-type' | 'markdown-with-wrong-type' | 'html' ;
89
910interface PageResult {
1011 url : string ;
12+ /** The URL actually fetched (may differ from url if .md was normalized). */
13+ testedUrl ?: string ;
1114 classification : Classification ;
1215 skipped ?: boolean ;
16+ softError ?: boolean ;
1317 contentType : string ;
1418 status : number ;
1519 error ?: string ;
@@ -37,12 +41,34 @@ async function check(ctx: CheckContext): Promise<CheckResult> {
3741 if ( isNonPageUrl ( url ) ) {
3842 return { url, classification : 'html' , skipped : true , contentType : '' , status : 0 } ;
3943 }
44+
45+ // Pre-request: normalize .md/.mdx URLs to their canonical HTML form (#33).
46+ // Testing content negotiation against a .md URL is meaningless because the
47+ // server already serves markdown at that path by definition.
48+ const fetchUrl = isMdUrl ( url ) ? toHtmlUrl ( url ) : url ;
49+ const testedUrl = fetchUrl !== url ? fetchUrl : undefined ;
50+
4051 try {
41- const response = await ctx . http . fetch ( url , {
52+ const response = await ctx . http . fetch ( fetchUrl , {
4253 headers : { Accept : 'text/markdown' } ,
4354 } ) ;
4455 const body = await response . text ( ) ;
4556 const contentType = response . headers . get ( 'content-type' ) ?? '' ;
57+
58+ // Post-response: reject soft-404 error pages (#29).
59+ // Some servers return 200 with text/markdown for error pages
60+ // (e.g. "# Page Not Found"), which would inflate scores.
61+ if ( isSoft404Body ( body ) ) {
62+ return {
63+ url,
64+ testedUrl,
65+ classification : 'html' ,
66+ softError : true ,
67+ contentType,
68+ status : response . status ,
69+ } ;
70+ }
71+
4672 const isMarkdownType = contentType . includes ( 'text/markdown' ) ;
4773 const isMarkdownBody = looksLikeMarkdown ( body ) ;
4874
@@ -68,10 +94,11 @@ async function check(ctx: CheckContext): Promise<CheckResult> {
6894 classification = 'html' ;
6995 }
7096
71- return { url, classification, contentType, status : response . status } ;
97+ return { url, testedUrl , classification, contentType, status : response . status } ;
7298 } catch ( err ) {
7399 return {
74100 url,
101+ testedUrl,
75102 classification : 'html' ,
76103 contentType : '' ,
77104 status : 0 ,
@@ -85,6 +112,8 @@ async function check(ctx: CheckContext): Promise<CheckResult> {
85112
86113 const testedResults = results . filter ( ( r ) => ! r . skipped ) ;
87114 const skippedCount = results . length - testedResults . length ;
115+ const normalizedCount = testedResults . filter ( ( r ) => r . testedUrl ) . length ;
116+ const softErrorCount = testedResults . filter ( ( r ) => r . softError ) . length ;
88117 const markdownWithCorrectType = testedResults . filter (
89118 ( r ) => r . classification === 'markdown-with-correct-type' ,
90119 ) . length ;
@@ -102,12 +131,16 @@ async function check(ctx: CheckContext): Promise<CheckResult> {
102131 const pageLabel = wasSampled ? 'sampled pages' : 'pages' ;
103132 const suffix =
104133 ( fetchErrors > 0 ? `; ${ fetchErrors } failed to fetch` : '' ) +
105- ( rateLimited > 0 ? `; ${ rateLimited } rate-limited (HTTP 429)` : '' ) ;
134+ ( rateLimited > 0 ? `; ${ rateLimited } rate-limited (HTTP 429)` : '' ) +
135+ ( softErrorCount > 0 ? `; ${ softErrorCount } returned error pages` : '' ) +
136+ ( normalizedCount > 0 ? `; ${ normalizedCount } .md URLs normalized` : '' ) ;
106137
107138 const details : Record < string , unknown > = {
108139 totalPages,
109140 testedPages : testedResults . length ,
110141 skippedPages : skippedCount ,
142+ normalizedMdUrls : normalizedCount ,
143+ softErrorPages : softErrorCount ,
111144 sampled : wasSampled ,
112145 markdownWithCorrectType,
113146 markdownWithWrongType,
0 commit comments