Skip to content

Commit 7faa392

Browse files
committed
feat: add documentation link checking and auto-fix tooling
Add automated link validation using htmltest to detect broken internal and external links in the documentation. Introduce a check-links.sh script that builds the Hugo site, runs htmltest, and validates that all internal markdown references use Hugo's relref shortcode instead of bare relative paths. Add a --fix mode that automatically converts bare relative links to relref shortcodes. Integrate into the CI pipeline (.tekton/linter.yaml), pre-commit hooks, and Makefile targets (check-links, fix-links). Signed-off-by: Chmouel Boudjnah <chmouel@redhat.com>
1 parent 8f41e16 commit 7faa392

File tree

6 files changed

+319
-2
lines changed

6 files changed

+319
-2
lines changed

.pre-commit-config.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,4 +51,11 @@ repos:
5151
language: system
5252
types: [text]
5353
args: ["gitlint"]
54+
- id: check-links
55+
name: "Documentation link check"
56+
entry: make
57+
args: ["check-links"]
58+
language: system
59+
files: "^docs/"
60+
pass_filenames: false
5461
# TODO: add a lint-sh when we have the errors fix

.tekton/linter.yaml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,21 @@ spec:
252252
export NO_COLOR=1
253253
/tmp/vale docs/content --minAlertLevel=error --output=line
254254
255+
- name: check-links
256+
displayName: "Documentation Link Checker"
257+
image: docker.io/golang:1.25
258+
workingDir: $(workspaces.source.path)
259+
env:
260+
- name: HUB_TOKEN
261+
valueFrom:
262+
secretKeyRef:
263+
name: "nightly-ci-github-hub-token"
264+
key: "hub-token"
265+
script: |
266+
set -euxo pipefail
267+
git config --global --add safe.directory $(workspaces.source.path)
268+
./hack/check-links.sh
269+
255270
- name: goreleaser-check
256271
displayName: "Goreleaser Check"
257272
image: registry.access.redhat.com/ubi9/python-312

Makefile

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
TARGET_NAMESPACE=pipelines-as-code
22
HUGO_VERSION=0.146.0
3+
HTMLTEST_VERSION=0.17.0
34
GOLANGCI_LINT=golangci-lint
45
GOFUMPT=gofumpt
56
TKN_BINARY_NAME := tkn
@@ -16,6 +17,7 @@ SHELL := bash
1617
TOPDIR := $(shell git rev-parse --show-toplevel)
1718
TMPDIR := $(TOPDIR)/tmp
1819
HUGO_BIN := $(TMPDIR)/hugo/hugo
20+
HTMLTEST_BIN := $(TMPDIR)/htmltest/htmltest
1921

2022
# Safe file list helpers using null-delimited output
2123
# Usage: $(call GIT_LS_FILES,<patterns>,<command>)
@@ -83,7 +85,7 @@ html-coverage: ## generate html coverage
8385

8486
##@ Linting
8587
.PHONY: lint
86-
lint: lint-go lint-yaml lint-md lint-python lint-shell lint-e2e-naming ## run all linters
88+
lint: lint-go lint-yaml lint-md lint-python lint-shell lint-e2e-naming check-links ## run all linters
8789

8890
.PHONY: lint-e2e-naming
8991
lint-e2e-naming: ## check e2e test naming conventions
@@ -134,7 +136,7 @@ pre-commit: ## Run pre-commit hooks script manually
134136

135137
##@ Linters Fixing
136138
.PHONY: fix-linters
137-
fix-linters: fix-golangci-lint fix-python-errors fix-markdownlint fix-trailing-spaces fumpt ## run all linters fixes
139+
fix-linters: fix-golangci-lint fix-python-errors fix-markdownlint fix-trailing-spaces fumpt fix-links ## fix all linters issues we can automatically fix
138140

139141
.PHONY: fix-markdownlint
140142
fix-markdownlint: ## run markdownlint and fix on all markdown file
@@ -198,6 +200,18 @@ generated: update-golden fumpt ## generate all files that needs to be generated
198200
download-hugo: ## Download hugo software
199201
./hack/download-hugo.sh $(HUGO_VERSION) $(TMPDIR)/hugo
200202

203+
.PHONY: download-htmltest
204+
download-htmltest: ## Download htmltest binary
205+
./hack/download-htmltest.sh $(HTMLTEST_VERSION) $(TMPDIR)/htmltest
206+
207+
.PHONY: check-links
208+
check-links: ## Check documentation links for 404s and relref usage
209+
./hack/check-links.sh
210+
211+
.PHONY: fix-links
212+
fix-links: ## Auto-fix bare relative links to use relref shortcode
213+
./hack/check-links.sh --fix
214+
201215
.PHONY: dev-docs
202216
dev-docs:
203217
@hugo server -s docs/ &

docs/.htmltest.yml

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
DirectoryPath: "tmp/docs-build-test"
2+
CheckExternal: true
3+
CheckInternal: true
4+
CheckInternalHash: true
5+
CheckMailto: false
6+
CheckTel: false
7+
CheckFavicon: false
8+
IgnoreURLs:
9+
# localhost/loopback references are not real external links
10+
- "localhost"
11+
- "127\\.0\\.0\\.1"
12+
# GitHub rate-limits unauthenticated requests; these are template-generated known-valid URLs
13+
- "github\\.com/.*/edit/"
14+
- "github\\.com/.*/blob/"
15+
- "github\\.com/.*/tree/"
16+
# Google Fonts are always valid and can be slow to respond
17+
- "fonts\\.googleapis\\.com"
18+
- "fonts\\.gstatic\\.com"
19+
# Placeholder/example URLs used in documentation
20+
- "github\\.com/yourusername/"
21+
- "github\\.com/my-org/"
22+
- "github\\.com/organization/"
23+
- "your\\.forgejo\\.domain"
24+
# Sites that block automated requests with 403
25+
- "developers\\.redhat\\.com"
26+
# GitLab blocks automated requests
27+
- "gitlab\\.com/organization/"
28+
# VS Code marketplace returns 404 for automated requests
29+
- "marketplace\\.visualstudio\\.com"
30+
IgnoreDirectoryMissingTrailingSlash: true
31+
ExternalTimeout: 30
32+
CacheExpires: "6h"
33+
OutputDir: "tmp/.htmltest"
34+
LogLevel: 2

hack/check-links.sh

Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
#!/usr/bin/env bash
2+
# description: Check documentation links for 404s using htmltest,
3+
# and verify that internal links use Hugo's relref shortcode.
4+
#
5+
# Usage:
6+
# ./hack/check-links.sh # Check only (exits 1 if bare relative links found)
7+
# ./hack/check-links.sh --fix # Auto-fix bare relative links to use relref, then check
8+
#
9+
# This script first checks that markdown files don't use bare relative
10+
# links (which break in Hugo's nested index.html output), then builds
11+
# the Hugo docs site and runs htmltest against the generated HTML to
12+
# detect broken internal and external links.
13+
set -eufo pipefail
14+
15+
FIX_MODE=false
16+
if [[ "${1:-}" == "--fix" ]]; then
17+
FIX_MODE=true
18+
fi
19+
20+
TOPDIR=$(git rev-parse --show-toplevel)
21+
TMPDIR=${TOPDIR}/tmp
22+
HUGO_VERSION=${HUGO_VERSION:-0.146.0}
23+
HTMLTEST_VERSION=${HTMLTEST_VERSION:-0.17.0}
24+
HUGO_BIN=${TMPDIR}/hugo/hugo
25+
HTMLTEST_BIN=${TMPDIR}/htmltest/htmltest
26+
DOCS_BUILD_DIR=${TMPDIR}/docs-build-test
27+
DOCS_CONTENT_DIR=${TOPDIR}/docs/content
28+
29+
# --- Step 1: Check (and optionally fix) that internal links use relref shortcode ---
30+
# Bare relative links like [text](some-page) break when Hugo generates
31+
# nested index.html files. Authors should use {{< relref "/docs/path" >}}.
32+
# Excluded from checking:
33+
# - External URLs (http://, https://)
34+
# - Anchor-only links (#section)
35+
# - Links already using relref or other Hugo shortcodes
36+
# - Image references ![alt](path)
37+
# - mailto: and tel: links
38+
# - Absolute paths starting with / (valid Hugo site-root paths)
39+
# - Lines containing Hugo shortcodes (card, relref, etc.)
40+
# - Lines with raw HTML (href=)
41+
if [[ "$FIX_MODE" == "true" ]]; then
42+
echo "==> Fixing bare relative links to use relref shortcode..."
43+
else
44+
echo "==> Checking documentation links use relref shortcode..."
45+
fi
46+
relref_count=0
47+
48+
while IFS= read -r -d '' file; do
49+
# Compute the directory of this file relative to docs/content/
50+
# e.g., docs/content/docs/operations/settings.md -> docs/operations
51+
file_rel="${file#"${DOCS_CONTENT_DIR}/"}"
52+
file_dir=$(dirname "$file_rel")
53+
# Normalize: "." means root of content
54+
if [[ "$file_dir" == "." ]]; then
55+
file_dir=""
56+
fi
57+
58+
if [[ "$FIX_MODE" == "true" ]]; then
59+
# Use perl to find and replace bare relative links in-place
60+
# Pass the file's directory so we can compute absolute relref paths
61+
FILE_DIR="$file_dir" perl -pi -e '
62+
BEGIN { $dir = $ENV{FILE_DIR}; }
63+
# Skip lines with Hugo shortcodes
64+
next if /\{\{[<%]/;
65+
# Skip lines with raw HTML href
66+
next if /href=/;
67+
# Replace [text](bare-target) but NOT ![text](target)
68+
s{(?<!!)\[([^\]]*)\]\(([^)]+)\)}{
69+
my ($text, $target) = ($1, $2);
70+
# Only fix bare relative links
71+
if ($target =~ m{^https?://} ||
72+
$target =~ m{^#} ||
73+
$target =~ m{^(mailto|tel):} ||
74+
$target =~ m/\{\{/ ||
75+
$target =~ m{^/}) {
76+
"[$text]($target)";
77+
} else {
78+
# Separate anchor from path
79+
my ($path, $anchor) = $target =~ m{^([^#]*)(.*)$};
80+
# Resolve relative path against file directory
81+
my $abs;
82+
if ($dir eq "") {
83+
$abs = $path;
84+
} else {
85+
$abs = "$dir/$path";
86+
}
87+
# Normalize: collapse foo/../bar -> bar
88+
while ($abs =~ s{[^/]+/\.\./}{}) {}
89+
$abs =~ s{/\.\./}{/}g;
90+
# Remove trailing /
91+
$abs =~ s{/$}{};
92+
# Remove .md suffix
93+
$abs =~ s{\.md$}{};
94+
# Prepend /
95+
$abs = "/$abs" unless $abs =~ m{^/};
96+
"[$text]({{< relref \"${abs}${anchor}\" >}})";
97+
}
98+
}ge;
99+
' "$file"
100+
fi
101+
102+
# Now scan the (possibly modified) file for remaining bare relative links
103+
line_num=0
104+
while IFS= read -r line; do
105+
line_num=$((line_num + 1))
106+
107+
# Skip lines containing Hugo shortcodes (card, relref, etc.)
108+
if [[ "$line" =~ \{\{[\<\%] ]]; then
109+
continue
110+
fi
111+
112+
# Skip lines with raw HTML tags
113+
if [[ "$line" =~ href= ]]; then
114+
continue
115+
fi
116+
117+
# Skip lines with no markdown link syntax
118+
if ! [[ "$line" =~ \]\( ]]; then
119+
continue
120+
fi
121+
122+
# Extract bare relative link targets using perl
123+
matches=$(echo "$line" | perl -ne '
124+
while (/(?<!!)\[([^\]]*)\]\(([^)]+)\)/g) {
125+
my $target = $2;
126+
next if $target =~ m{^https?://};
127+
next if $target =~ m{^#};
128+
next if $target =~ m{^(mailto|tel):};
129+
next if $target =~ m{\{\{};
130+
next if $target =~ m{^/};
131+
print "$target\n";
132+
}
133+
' 2>/dev/null || true)
134+
135+
if [[ -n "$matches" ]]; then
136+
while IFS= read -r target; do
137+
if [[ "$FIX_MODE" == "true" ]]; then
138+
echo " WARNING: ${file}:${line_num}: could not auto-fix bare relative link '${target}'"
139+
else
140+
echo " ERROR: ${file}:${line_num}: bare relative link '${target}' should use {{< relref >}}"
141+
fi
142+
relref_count=$((relref_count + 1))
143+
done <<<"$matches"
144+
fi
145+
done <"$file"
146+
done < <(find "$DOCS_CONTENT_DIR" -name '*.md' -print0)
147+
148+
if [[ "$FIX_MODE" == "true" ]]; then
149+
if [[ $relref_count -gt 0 ]]; then
150+
echo ""
151+
echo "WARNING: ${relref_count} link(s) could not be auto-fixed. Please fix manually."
152+
else
153+
# Show what changed
154+
changed=$(git -C "${TOPDIR}" diff --name-only -- docs/content/ 2>/dev/null || true)
155+
if [[ -n "$changed" ]]; then
156+
echo " Fixed files:"
157+
echo "$changed" | while IFS= read -r f; do echo " $f"; done
158+
echo ""
159+
echo " Run 'make check-links' to verify the fixes."
160+
else
161+
echo " No bare relative links found, nothing to fix."
162+
fi
163+
fi
164+
else
165+
if [[ $relref_count -gt 0 ]]; then
166+
echo ""
167+
echo "Found ${relref_count} bare relative link(s). Use {{< relref \"/docs/path\" >}} instead."
168+
echo "Run 'make fix-links' to auto-fix them."
169+
exit 1
170+
fi
171+
echo " All internal documentation links use relref correctly."
172+
fi
173+
174+
# --- Step 2: Build Hugo site and run htmltest ---
175+
# Download Hugo if not present
176+
echo "==> Ensuring Hugo ${HUGO_VERSION} is available..."
177+
"${TOPDIR}/hack/download-hugo.sh" "${HUGO_VERSION}" "${TMPDIR}/hugo"
178+
179+
# Download htmltest if not present
180+
echo "==> Ensuring htmltest ${HTMLTEST_VERSION} is available..."
181+
"${TOPDIR}/hack/download-htmltest.sh" "${HTMLTEST_VERSION}" "${TMPDIR}/htmltest"
182+
183+
# Build Hugo site
184+
echo "==> Building Hugo documentation site..."
185+
"${HUGO_BIN}" --gc --minify -s "${TOPDIR}/docs/" -d "${DOCS_BUILD_DIR}"
186+
187+
# Run htmltest
188+
echo "==> Running htmltest link checker..."
189+
"${HTMLTEST_BIN}" -c "${TOPDIR}/docs/.htmltest.yml"

hack/download-htmltest.sh

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
#!/usr/bin/env bash
2+
# description: Download htmltest binary from github directly.
3+
# this let us pin the version the way we want it.
4+
set -eufo pipefail
5+
set -x
6+
7+
TARGET_VERSION=${1:-}
8+
TARGETDIR=${2:-}
9+
10+
[[ -z ${TARGET_VERSION} || -z ${TARGETDIR} ]] && { echo "Usage: $0 <version> [targetdir]" && exit 1; }
11+
[[ -d ${TARGETDIR} ]] || mkdir -p ${TARGETDIR}
12+
[[ -x ${TARGETDIR}/htmltest ]] && {
13+
${TARGETDIR}/htmltest --version 2>&1 | grep -q "${TARGET_VERSION}" && {
14+
exit 0
15+
}
16+
rm -f ${TARGETDIR}/htmltest
17+
}
18+
19+
detect_os_arch() {
20+
local os
21+
local arch
22+
23+
# Detect OS
24+
case "$(uname -s)" in
25+
Linux*) os=linux ;;
26+
Darwin*) os=darwin ;;
27+
*) os="UNKNOWN" ;;
28+
esac
29+
30+
# Detect architecture
31+
case "$(uname -m)" in
32+
x86_64) arch=amd64 ;;
33+
arm64) arch=arm64 ;;
34+
aarch64) arch=arm64 ;;
35+
*) arch="UNKNOWN" ;;
36+
esac
37+
38+
[[ ${os} == "UNKNOWN" ]] && echo "Unknown OS" && exit 1
39+
[[ ${arch} == "UNKNOWN" ]] && echo "Unknown Arch" && exit 1
40+
41+
echo "${os}_${arch}"
42+
}
43+
44+
os_arch=$(detect_os_arch)
45+
46+
# htmltest release naming convention: htmltest_<version>_<os>_<arch>.tar.gz
47+
# version in the URL does not have the 'v' prefix in the filename but has it in the tag
48+
download_url=https://github.com/wjdp/htmltest/releases/download/v${TARGET_VERSION}/htmltest_${TARGET_VERSION}_${os_arch}.tar.gz
49+
50+
# Use HUB_TOKEN for authenticated requests if available (avoids GitHub rate limits in CI)
51+
curl_auth=()
52+
if [[ -n ${HUB_TOKEN:-} ]]; then
53+
curl_auth=(-H "Authorization: Bearer ${HUB_TOKEN}")
54+
fi
55+
56+
echo -n "Downloading ${download_url} to ${TARGETDIR}: "
57+
curl -s -L --fail-early -f "${curl_auth[@]+"${curl_auth[@]}"}" -o- "${download_url}" | tar -xz -C "${TARGETDIR}" htmltest
58+
echo "Done"

0 commit comments

Comments
 (0)