Skip to content

Commit 8864618

Browse files
authored
Merge pull request #46 from agent-ecosystem/fix/head-fallback-to-get
fix: head 404/405 falls back to GET
2 parents 9eaa319 + 5d0e862 commit 8864618

2 files changed

Lines changed: 128 additions & 6 deletions

File tree

links/check.go

Lines changed: 36 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -75,21 +75,51 @@ func checkHTTPLink(rctx types.ResultContext, client *http.Client, url string) ty
7575
return rctx.Errorf("%s (invalid URL: %v)", url, err)
7676
}
7777
req.Header.Set("User-Agent", "skill-validator/1.0")
78+
req.Header.Set("Accept", "text/html, */*;q=0.1")
7879

7980
resp, err := client.Do(req)
8081
if err != nil {
8182
return rctx.Errorf("%s (request failed: %v)", url, err)
8283
}
8384
defer func() { _ = resp.Body.Close() }()
8485

85-
if resp.StatusCode >= 200 && resp.StatusCode < 300 {
86-
return rctx.Passf("%s (HTTP %d)", url, resp.StatusCode)
86+
// Some sites don't handle HEAD correctly (e.g. SPAs like crates.io return
87+
// 404 for HEAD even though the page exists). Fall back to GET when HEAD
88+
// returns 404 or 405, which is the standard approach used by lychee,
89+
// markdown-link-check, and other link validators.
90+
if resp.StatusCode == http.StatusNotFound || resp.StatusCode == http.StatusMethodNotAllowed {
91+
return checkHTTPLinkGET(rctx, client, url)
8792
}
88-
if resp.StatusCode >= 300 && resp.StatusCode < 400 {
89-
return rctx.Passf("%s (HTTP %d redirect)", url, resp.StatusCode)
93+
94+
return classifyResponse(rctx, url, resp.StatusCode)
95+
}
96+
97+
func checkHTTPLinkGET(rctx types.ResultContext, client *http.Client, url string) types.Result {
98+
req, err := http.NewRequest("GET", url, nil)
99+
if err != nil {
100+
return rctx.Errorf("%s (invalid URL: %v)", url, err)
101+
}
102+
req.Header.Set("User-Agent", "skill-validator/1.0")
103+
req.Header.Set("Accept", "text/html, */*;q=0.1")
104+
105+
resp, err := client.Do(req)
106+
if err != nil {
107+
return rctx.Errorf("%s (request failed: %v)", url, err)
108+
}
109+
defer func() { _ = resp.Body.Close() }()
110+
111+
return classifyResponse(rctx, url, resp.StatusCode)
112+
}
113+
114+
func classifyResponse(rctx types.ResultContext, url string, statusCode int) types.Result {
115+
if statusCode >= 200 && statusCode < 300 {
116+
return rctx.Passf("%s (HTTP %d)", url, statusCode)
117+
}
118+
if statusCode >= 300 && statusCode < 400 {
119+
return rctx.Passf("%s (HTTP %d redirect)", url, statusCode)
90120
}
91-
if resp.StatusCode == http.StatusForbidden {
121+
if statusCode == http.StatusForbidden {
92122
return rctx.Infof("%s (HTTP 403 — may block automated requests)", url)
93123
}
94-
return rctx.Errorf("%s (HTTP %d)", url, resp.StatusCode)
124+
return rctx.Errorf("%s (HTTP %d)", url, statusCode)
95125
}

links/check_test.go

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,20 @@ func TestCheckLinks_HTTP(t *testing.T) {
9898
mux.HandleFunc("/not-found", func(w http.ResponseWriter, r *http.Request) {
9999
w.WriteHeader(http.StatusNotFound)
100100
})
101+
mux.HandleFunc("/head-404-get-200", func(w http.ResponseWriter, r *http.Request) {
102+
if r.Method == http.MethodHead {
103+
w.WriteHeader(http.StatusNotFound)
104+
return
105+
}
106+
w.WriteHeader(http.StatusOK)
107+
})
108+
mux.HandleFunc("/head-405-get-200", func(w http.ResponseWriter, r *http.Request) {
109+
if r.Method == http.MethodHead {
110+
w.WriteHeader(http.StatusMethodNotAllowed)
111+
return
112+
}
113+
w.WriteHeader(http.StatusOK)
114+
})
101115
mux.HandleFunc("/forbidden", func(w http.ResponseWriter, r *http.Request) {
102116
w.WriteHeader(http.StatusForbidden)
103117
})
@@ -135,6 +149,20 @@ func TestCheckLinks_HTTP(t *testing.T) {
135149
requireResultContaining(t, results, types.Error, "HTTP 500")
136150
})
137151

152+
t.Run("HEAD 404 falls back to GET 200", func(t *testing.T) {
153+
dir := t.TempDir()
154+
body := "[spa](" + server.URL + "/head-404-get-200)"
155+
results := CheckLinks(t.Context(), dir, body)
156+
requireResultContaining(t, results, types.Pass, "HTTP 200")
157+
})
158+
159+
t.Run("HEAD 405 falls back to GET 200", func(t *testing.T) {
160+
dir := t.TempDir()
161+
body := "[nohead](" + server.URL + "/head-405-get-200)"
162+
results := CheckLinks(t.Context(), dir, body)
163+
requireResultContaining(t, results, types.Pass, "HTTP 200")
164+
})
165+
138166
t.Run("mixed relative and HTTP only checks HTTP", func(t *testing.T) {
139167
dir := t.TempDir()
140168
writeFile(t, dir, "references/guide.md", "content")
@@ -225,6 +253,70 @@ func TestCheckHTTPLink(t *testing.T) {
225253
requireContains(t, result.Message, "HTTP 403")
226254
})
227255

256+
t.Run("HEAD 404 retries with GET", func(t *testing.T) {
257+
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
258+
if r.Method == http.MethodHead {
259+
w.WriteHeader(http.StatusNotFound)
260+
return
261+
}
262+
w.WriteHeader(http.StatusOK)
263+
}))
264+
defer server.Close()
265+
266+
result := checkHTTPLink(types.ResultContext{Category: "Links", File: "SKILL.md"}, client, server.URL)
267+
if result.Level != types.Pass {
268+
t.Errorf("expected Pass after GET fallback, got level=%d message=%q", result.Level, result.Message)
269+
}
270+
})
271+
272+
t.Run("HEAD 405 retries with GET", func(t *testing.T) {
273+
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
274+
if r.Method == http.MethodHead {
275+
w.WriteHeader(http.StatusMethodNotAllowed)
276+
return
277+
}
278+
w.WriteHeader(http.StatusOK)
279+
}))
280+
defer server.Close()
281+
282+
result := checkHTTPLink(types.ResultContext{Category: "Links", File: "SKILL.md"}, client, server.URL)
283+
if result.Level != types.Pass {
284+
t.Errorf("expected Pass after GET fallback, got level=%d message=%q", result.Level, result.Message)
285+
}
286+
})
287+
288+
t.Run("SPA requiring Accept text/html resolves via GET fallback", func(t *testing.T) {
289+
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
290+
if r.Header.Get("Accept") == "" || r.Method == http.MethodHead {
291+
w.WriteHeader(http.StatusNotFound)
292+
return
293+
}
294+
if strings.Contains(r.Header.Get("Accept"), "text/html") {
295+
w.WriteHeader(http.StatusOK)
296+
return
297+
}
298+
w.WriteHeader(http.StatusNotFound)
299+
}))
300+
defer server.Close()
301+
302+
result := checkHTTPLink(types.ResultContext{Category: "Links", File: "SKILL.md"}, client, server.URL)
303+
if result.Level != types.Pass {
304+
t.Errorf("expected Pass for SPA with Accept header, got level=%d message=%q", result.Level, result.Message)
305+
}
306+
})
307+
308+
t.Run("genuine 404 still errors after GET fallback", func(t *testing.T) {
309+
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
310+
w.WriteHeader(http.StatusNotFound)
311+
}))
312+
defer server.Close()
313+
314+
result := checkHTTPLink(types.ResultContext{Category: "Links", File: "SKILL.md"}, client, server.URL)
315+
if result.Level != types.Error {
316+
t.Errorf("expected Error for genuine 404, got level=%d message=%q", result.Level, result.Message)
317+
}
318+
})
319+
228320
t.Run("invalid URL", func(t *testing.T) {
229321
result := checkHTTPLink(types.ResultContext{Category: "Links", File: "SKILL.md"}, client, "http://invalid host with spaces/")
230322
if result.Level != types.Error {

0 commit comments

Comments
 (0)