Skip to content

Commit 0c73fac

Browse files
committed
feat: add documentation link checking and auto-fix tooling
Add automated link validation using htmltest to detect broken internal and external links in the documentation. Introduce a check-links.sh script that builds the Hugo site, runs htmltest, and validates that all internal markdown references use Hugo's relref shortcode instead of bare relative paths. Add a --fix mode that automatically converts bare relative links to relref shortcodes. Integrate into the CI pipeline (.tekton/linter.yaml), pre-commit hooks, and Makefile targets (check-links, fix-links). Signed-off-by: Chmouel Boudjnah <chmouel@redhat.com>
1 parent 7165494 commit 0c73fac

File tree

7 files changed

+333
-4
lines changed

7 files changed

+333
-4
lines changed

.pre-commit-config.yaml

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ repos:
1010
- id: check-toml
1111
- id: check-shebang-scripts-are-executable
1212
- id: check-json
13+
exclude: "^docs/_vendor/"
1314
- id: check-vcs-permalinks
1415
- id: detect-private-key
1516
exclude: ".*_test.go"
@@ -28,7 +29,7 @@ repos:
2829
types: [go]
2930
pass_filenames: false
3031
- id: lint
31-
name: "Linting"
32+
name: "Go Linting"
3233
entry: make
3334
args: ["lint"]
3435
language: system
@@ -51,4 +52,11 @@ repos:
5152
language: system
5253
types: [text]
5354
args: ["gitlint"]
55+
- id: check-links
56+
name: "Documentation link check"
57+
entry: make
58+
args: ["check-links"]
59+
language: system
60+
files: "^docs/"
61+
pass_filenames: false
5462
# TODO: add a lint-sh when we have the errors fix

.tekton/doc.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ spec:
7777
}
7878
cd $(git rev-parse --show-toplevel)/docs
7979
url="${UPLOADER_PUBLIC_URL}/docs/{{ pull_request_number }}"
80-
${hugobin} --gc --minify --canonifyURLs -d {{ pull_request_number }} -b ${url}
80+
${hugobin} build --gc --minify -d {{ pull_request_number }} -b ${url}
8181
echo "Preview URL: ${url}"
8282
- name: upload-to-static-server
8383
# it has curl and we already pulled it

.tekton/linter.yaml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,21 @@ spec:
252252
export NO_COLOR=1
253253
/tmp/vale docs/content --minAlertLevel=error --output=line
254254
255+
- name: check-links
256+
displayName: "Documentation Link Checker"
257+
image: docker.io/golang:1.25
258+
workingDir: $(workspaces.source.path)
259+
env:
260+
- name: HUB_TOKEN
261+
valueFrom:
262+
secretKeyRef:
263+
name: "nightly-ci-github-hub-token"
264+
key: "hub-token"
265+
script: |
266+
set -euxo pipefail
267+
git config --global --add safe.directory $(workspaces.source.path)
268+
./hack/check-links.sh
269+
255270
- name: goreleaser-check
256271
displayName: "Goreleaser Check"
257272
image: registry.access.redhat.com/ubi9/python-312

Makefile

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
TARGET_NAMESPACE=pipelines-as-code
22
HUGO_VERSION=0.146.0
3+
HTMLTEST_VERSION=0.17.0
34
GOLANGCI_LINT=golangci-lint
45
GOFUMPT=gofumpt
56
TKN_BINARY_NAME := tkn
@@ -16,6 +17,7 @@ SHELL := bash
1617
TOPDIR := $(shell git rev-parse --show-toplevel)
1718
TMPDIR := $(TOPDIR)/tmp
1819
HUGO_BIN := $(TMPDIR)/hugo/hugo
20+
HTMLTEST_BIN := $(TMPDIR)/htmltest/htmltest
1921

2022
# Safe file list helpers using null-delimited output
2123
# Usage: $(call GIT_LS_FILES,<patterns>,<command>)
@@ -83,7 +85,7 @@ html-coverage: ## generate html coverage
8385

8486
##@ Linting
8587
.PHONY: lint
86-
lint: lint-go lint-yaml lint-md lint-python lint-shell lint-e2e-naming ## run all linters
88+
lint: lint-go lint-yaml lint-md lint-python lint-shell lint-e2e-naming check-links ## run all linters
8789

8890
.PHONY: lint-e2e-naming
8991
lint-e2e-naming: ## check e2e test naming conventions
@@ -134,7 +136,7 @@ pre-commit: ## Run pre-commit hooks script manually
134136

135137
##@ Linters Fixing
136138
.PHONY: fix-linters
137-
fix-linters: fix-golangci-lint fix-python-errors fix-markdownlint fix-trailing-spaces fumpt ## run all linters fixes
139+
fix-linters: fix-golangci-lint fix-python-errors fix-markdownlint fix-trailing-spaces fumpt fix-links ## fix all linters issues we can automatically fix
138140

139141
.PHONY: fix-markdownlint
140142
fix-markdownlint: ## run markdownlint and fix on all markdown file
@@ -198,6 +200,18 @@ generated: update-golden fumpt ## generate all files that needs to be generated
198200
download-hugo: ## Download hugo software
199201
./hack/download-hugo.sh $(HUGO_VERSION) $(TMPDIR)/hugo
200202

203+
.PHONY: download-htmltest
204+
download-htmltest: ## Download htmltest binary
205+
./hack/download-htmltest.sh $(HTMLTEST_VERSION) $(TMPDIR)/htmltest
206+
207+
.PHONY: check-links
208+
check-links: ## Check documentation links (set CHECK_EXTERNAL=false to skip external URLs)
209+
./hack/check-links.sh
210+
211+
.PHONY: fix-links
212+
fix-links: ## Auto-fix bare relative links to use relref shortcode
213+
./hack/check-links.sh --fix
214+
201215
.PHONY: dev-docs
202216
dev-docs: download-hugo ## preview live your docs with hugo
203217
@$(HUGO_BIN) server -s docs/ &

docs/.htmltest.yml

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
DirectoryPath: "tmp/docs-build-test"
2+
CheckExternal: true
3+
CheckInternal: true
4+
CheckInternalHash: true
5+
CheckMailto: false
6+
CheckTel: false
7+
CheckFavicon: false
8+
IgnoreURLs:
9+
# localhost/loopback references are not real external links
10+
- "localhost"
11+
- "127\\.0\\.0\\.1"
12+
# GitHub rate-limits unauthenticated requests; these are template-generated known-valid URLs
13+
- "github\\.com/.*/edit/"
14+
- "github\\.com/.*/blob/"
15+
- "github\\.com/.*/tree/"
16+
# Google Fonts are always valid and can be slow to respond
17+
- "fonts\\.googleapis\\.com"
18+
- "fonts\\.gstatic\\.com"
19+
# Placeholder/example URLs used in documentation
20+
- "github\\.com/yourusername/"
21+
- "github\\.com/my-org/"
22+
- "github\\.com/organization/"
23+
- "your\\.forgejo\\.domain"
24+
# Sites that block automated requests with 403
25+
- "developers\\.redhat\\.com"
26+
- "platform\\.openai\\.com"
27+
# GitLab blocks automated requests
28+
- "gitlab\\.com/organization/"
29+
# VS Code marketplace returns 404 for automated requests
30+
- "marketplace\\.visualstudio\\.com"
31+
IgnoreDirectoryMissingTrailingSlash: true
32+
ExternalTimeout: 30
33+
CacheExpires: "6h"
34+
OutputDir: "tmp/.htmltest"
35+
LogLevel: 2

hack/check-links.sh

Lines changed: 199 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,199 @@
1+
#!/usr/bin/env bash
2+
# description: Check documentation links for 404s using htmltest,
3+
# and verify that internal links use Hugo's relref shortcode.
4+
#
5+
# Usage:
6+
# ./hack/check-links.sh # Check only (exits 1 if bare relative links found)
7+
# ./hack/check-links.sh --fix # Auto-fix bare relative links to use relref, then check
8+
#
9+
# This script first checks that markdown files don't use bare relative
10+
# links (which break in Hugo's nested index.html output), then builds
11+
# the Hugo docs site and runs htmltest against the generated HTML to
12+
# detect broken internal and external links.
13+
set -eufo pipefail
14+
15+
FIX_MODE=false
16+
if [[ "${1:-}" == "--fix" ]]; then
17+
FIX_MODE=true
18+
fi
19+
20+
TOPDIR=$(git rev-parse --show-toplevel)
21+
TMPDIR=${TOPDIR}/tmp
22+
HUGO_VERSION=${HUGO_VERSION:-0.146.0}
23+
HTMLTEST_VERSION=${HTMLTEST_VERSION:-0.17.0}
24+
HUGO_BIN=${TMPDIR}/hugo/hugo
25+
HTMLTEST_BIN=${TMPDIR}/htmltest/htmltest
26+
DOCS_BUILD_DIR=${TMPDIR}/docs-build-test
27+
DOCS_CONTENT_DIR=${TOPDIR}/docs/content
28+
CHECK_EXTERNAL=${CHECK_EXTERNAL:-true}
29+
30+
# --- Step 1: Check (and optionally fix) that internal links use relref shortcode ---
31+
# Bare relative links like [text](some-page) break when Hugo generates
32+
# nested index.html files. Authors should use {{< relref "/docs/path" >}}.
33+
# Excluded from checking:
34+
# - External URLs (http://, https://)
35+
# - Anchor-only links (#section)
36+
# - Links already using relref or other Hugo shortcodes
37+
# - Image references ![alt](path)
38+
# - mailto: and tel: links
39+
# - Absolute paths starting with / (valid Hugo site-root paths)
40+
# - Lines containing Hugo shortcodes (card, relref, etc.)
41+
# - Lines with raw HTML (href=)
42+
if [[ "$FIX_MODE" == "true" ]]; then
43+
echo "==> Fixing bare relative links to use relref shortcode..."
44+
else
45+
echo "==> Checking documentation links use relref shortcode..."
46+
fi
47+
relref_count=0
48+
49+
while IFS= read -r -d '' file; do
50+
# Compute the directory of this file relative to docs/content/
51+
# e.g., docs/content/docs/operations/settings.md -> docs/operations
52+
file_rel="${file#"${DOCS_CONTENT_DIR}/"}"
53+
file_dir=$(dirname "$file_rel")
54+
# Normalize: "." means root of content
55+
if [[ "$file_dir" == "." ]]; then
56+
file_dir=""
57+
fi
58+
59+
if [[ "$FIX_MODE" == "true" ]]; then
60+
# Use perl to find and replace bare relative links in-place
61+
# Pass the file's directory so we can compute absolute relref paths
62+
FILE_DIR="$file_dir" perl -pi -e '
63+
BEGIN { $dir = $ENV{FILE_DIR}; }
64+
# Skip lines with Hugo shortcodes
65+
next if /\{\{[<%]/;
66+
# Skip lines with raw HTML href
67+
next if /href=/;
68+
# Replace [text](bare-target) but NOT ![text](target)
69+
s{(?<!!)\[([^\]]*)\]\(([^)]+)\)}{
70+
my ($text, $target) = ($1, $2);
71+
# Only fix bare relative links
72+
if ($target =~ m{^https?://} ||
73+
$target =~ m{^#} ||
74+
$target =~ m{^(mailto|tel):} ||
75+
$target =~ m/\{\{/ ||
76+
$target =~ m{^/}) {
77+
"[$text]($target)";
78+
} else {
79+
# Separate anchor from path
80+
my ($path, $anchor) = $target =~ m{^([^#]*)(.*)$};
81+
# Resolve relative path against file directory
82+
my $abs;
83+
if ($dir eq "") {
84+
$abs = $path;
85+
} else {
86+
$abs = "$dir/$path";
87+
}
88+
# Normalize: collapse foo/../bar -> bar
89+
while ($abs =~ s{[^/]+/\.\./}{}) {}
90+
$abs =~ s{/\.\./}{/}g;
91+
# Remove trailing /
92+
$abs =~ s{/$}{};
93+
# Remove .md suffix
94+
$abs =~ s{\.md$}{};
95+
# Prepend /
96+
$abs = "/$abs" unless $abs =~ m{^/};
97+
"[$text]({{< relref \"${abs}${anchor}\" >}})";
98+
}
99+
}ge;
100+
' "$file"
101+
fi
102+
103+
# Now scan the (possibly modified) file for remaining bare relative links
104+
line_num=0
105+
while IFS= read -r line; do
106+
line_num=$((line_num + 1))
107+
108+
# Skip lines containing Hugo shortcodes (card, relref, etc.)
109+
if [[ "$line" =~ \{\{[\<\%] ]]; then
110+
continue
111+
fi
112+
113+
# Skip lines with raw HTML tags
114+
if [[ "$line" =~ href= ]]; then
115+
continue
116+
fi
117+
118+
# Skip lines with no markdown link syntax
119+
if ! [[ "$line" =~ \]\( ]]; then
120+
continue
121+
fi
122+
123+
# Extract bare relative link targets using perl
124+
matches=$(echo "$line" | perl -ne '
125+
while (/(?<!!)\[([^\]]*)\]\(([^)]+)\)/g) {
126+
my $target = $2;
127+
next if $target =~ m{^https?://};
128+
next if $target =~ m{^#};
129+
next if $target =~ m{^(mailto|tel):};
130+
next if $target =~ m{\{\{};
131+
next if $target =~ m{^/};
132+
print "$target\n";
133+
}
134+
' 2>/dev/null || true)
135+
136+
if [[ -n "$matches" ]]; then
137+
while IFS= read -r target; do
138+
if [[ "$FIX_MODE" == "true" ]]; then
139+
echo " WARNING: ${file}:${line_num}: could not auto-fix bare relative link '${target}'"
140+
else
141+
echo " ERROR: ${file}:${line_num}: bare relative link '${target}' should use {{< relref >}}"
142+
fi
143+
relref_count=$((relref_count + 1))
144+
done <<<"$matches"
145+
fi
146+
done <"$file"
147+
done < <(find "$DOCS_CONTENT_DIR" -name '*.md' -print0)
148+
149+
if [[ "$FIX_MODE" == "true" ]]; then
150+
if [[ $relref_count -gt 0 ]]; then
151+
echo ""
152+
echo "WARNING: ${relref_count} link(s) could not be auto-fixed. Please fix manually."
153+
else
154+
# Show what changed
155+
changed=$(git -C "${TOPDIR}" diff --name-only -- docs/content/ 2>/dev/null || true)
156+
if [[ -n "$changed" ]]; then
157+
echo " Fixed files:"
158+
echo "$changed" | while IFS= read -r f; do echo " $f"; done
159+
echo ""
160+
echo " Run 'make check-links' to verify the fixes."
161+
else
162+
echo " No bare relative links found, nothing to fix."
163+
fi
164+
fi
165+
else
166+
if [[ $relref_count -gt 0 ]]; then
167+
echo ""
168+
echo "Found ${relref_count} bare relative link(s). Use {{< relref \"/docs/path\" >}} instead."
169+
echo "Run 'make fix-links' to auto-fix them."
170+
exit 1
171+
fi
172+
echo " All internal documentation links use relref correctly."
173+
fi
174+
175+
# --- Step 2: Build Hugo site and run htmltest ---
176+
# Download Hugo if not present
177+
echo "==> Ensuring Hugo ${HUGO_VERSION} is available..."
178+
"${TOPDIR}/hack/download-hugo.sh" "${HUGO_VERSION}" "${TMPDIR}/hugo"
179+
180+
# Download htmltest if not present
181+
echo "==> Ensuring htmltest ${HTMLTEST_VERSION} is available..."
182+
"${TOPDIR}/hack/download-htmltest.sh" "${HTMLTEST_VERSION}" "${TMPDIR}/htmltest"
183+
184+
# Clean stale output directories that cause Hugo to deadlock
185+
rm -rf "${DOCS_BUILD_DIR}" "${TOPDIR}/docs/public"
186+
187+
# Build Hugo site
188+
echo "==> Building Hugo documentation site..."
189+
"${HUGO_BIN}" build --gc --minify -s "${TOPDIR}/docs/" -d "${DOCS_BUILD_DIR}"
190+
191+
# Run htmltest
192+
echo "==> Running htmltest link checker..."
193+
HTMLTEST_CONF="${TOPDIR}/docs/.htmltest.yml"
194+
if [[ "${CHECK_EXTERNAL}" == "false" || "${CHECK_EXTERNAL}" == "0" ]]; then
195+
echo "==> External link checking disabled (CHECK_EXTERNAL=${CHECK_EXTERNAL})"
196+
HTMLTEST_CONF="${TMPDIR}/.htmltest-internal.yml"
197+
sed 's/^CheckExternal:.*/CheckExternal: false/' "${TOPDIR}/docs/.htmltest.yml" > "${HTMLTEST_CONF}"
198+
fi
199+
"${HTMLTEST_BIN}" -c "${HTMLTEST_CONF}"

0 commit comments

Comments
 (0)