|
| 1 | +#!/usr/bin/env bash |
| 2 | +# description: Check documentation links for 404s using htmltest, |
| 3 | +# and verify that internal links use Hugo's relref shortcode. |
| 4 | +# |
| 5 | +# Usage: |
| 6 | +# ./hack/check-links.sh # Check only (exits 1 if bare relative links found) |
| 7 | +# ./hack/check-links.sh --fix # Auto-fix bare relative links to use relref, then check |
| 8 | +# |
| 9 | +# This script first checks that markdown files don't use bare relative |
| 10 | +# links (which break in Hugo's nested index.html output), then builds |
| 11 | +# the Hugo docs site and runs htmltest against the generated HTML to |
| 12 | +# detect broken internal and external links. |
| 13 | +set -eufo pipefail |
| 14 | + |
| 15 | +FIX_MODE=false |
| 16 | +if [[ "${1:-}" == "--fix" ]]; then |
| 17 | + FIX_MODE=true |
| 18 | +fi |
| 19 | + |
| 20 | +TOPDIR=$(git rev-parse --show-toplevel) |
| 21 | +TMPDIR=${TOPDIR}/tmp |
| 22 | +HUGO_VERSION=${HUGO_VERSION:-0.146.0} |
| 23 | +HTMLTEST_VERSION=${HTMLTEST_VERSION:-0.17.0} |
| 24 | +HUGO_BIN=${TMPDIR}/hugo/hugo |
| 25 | +HTMLTEST_BIN=${TMPDIR}/htmltest/htmltest |
| 26 | +DOCS_BUILD_DIR=${TMPDIR}/docs-build-test |
| 27 | +DOCS_CONTENT_DIR=${TOPDIR}/docs/content |
| 28 | +CHECK_EXTERNAL=${CHECK_EXTERNAL:-true} |
| 29 | + |
| 30 | +# --- Step 1: Check (and optionally fix) that internal links use relref shortcode --- |
| 31 | +# Bare relative links like [text](some-page) break when Hugo generates |
| 32 | +# nested index.html files. Authors should use {{< relref "/docs/path" >}}. |
| 33 | +# Excluded from checking: |
| 34 | +# - External URLs (http://, https://) |
| 35 | +# - Anchor-only links (#section) |
| 36 | +# - Links already using relref or other Hugo shortcodes |
| 37 | +# - Image references  |
| 38 | +# - mailto: and tel: links |
| 39 | +# - Absolute paths starting with / (valid Hugo site-root paths) |
| 40 | +# - Lines containing Hugo shortcodes (card, relref, etc.) |
| 41 | +# - Lines with raw HTML (href=) |
| 42 | +if [[ "$FIX_MODE" == "true" ]]; then |
| 43 | + echo "==> Fixing bare relative links to use relref shortcode..." |
| 44 | +else |
| 45 | + echo "==> Checking documentation links use relref shortcode..." |
| 46 | +fi |
| 47 | +relref_count=0 |
| 48 | + |
| 49 | +while IFS= read -r -d '' file; do |
| 50 | + # Compute the directory of this file relative to docs/content/ |
| 51 | + # e.g., docs/content/docs/operations/settings.md -> docs/operations |
| 52 | + file_rel="${file#"${DOCS_CONTENT_DIR}/"}" |
| 53 | + file_dir=$(dirname "$file_rel") |
| 54 | + # Normalize: "." means root of content |
| 55 | + if [[ "$file_dir" == "." ]]; then |
| 56 | + file_dir="" |
| 57 | + fi |
| 58 | + |
| 59 | + if [[ "$FIX_MODE" == "true" ]]; then |
| 60 | + # Use perl to find and replace bare relative links in-place |
| 61 | + # Pass the file's directory so we can compute absolute relref paths |
| 62 | + FILE_DIR="$file_dir" perl -pi -e ' |
| 63 | + BEGIN { $dir = $ENV{FILE_DIR}; } |
| 64 | + # Skip lines with Hugo shortcodes |
| 65 | + next if /\{\{[<%]/; |
| 66 | + # Skip lines with raw HTML href |
| 67 | + next if /href=/; |
| 68 | + # Replace [text](bare-target) but NOT  |
| 69 | + s{(?<!!)\[([^\]]*)\]\(([^)]+)\)}{ |
| 70 | + my ($text, $target) = ($1, $2); |
| 71 | + # Only fix bare relative links |
| 72 | + if ($target =~ m{^https?://} || |
| 73 | + $target =~ m{^#} || |
| 74 | + $target =~ m{^(mailto|tel):} || |
| 75 | + $target =~ m/\{\{/ || |
| 76 | + $target =~ m{^/}) { |
| 77 | + "[$text]($target)"; |
| 78 | + } else { |
| 79 | + # Separate anchor from path |
| 80 | + my ($path, $anchor) = $target =~ m{^([^#]*)(.*)$}; |
| 81 | + # Resolve relative path against file directory |
| 82 | + my $abs; |
| 83 | + if ($dir eq "") { |
| 84 | + $abs = $path; |
| 85 | + } else { |
| 86 | + $abs = "$dir/$path"; |
| 87 | + } |
| 88 | + # Normalize: collapse foo/../bar -> bar |
| 89 | + while ($abs =~ s{[^/]+/\.\./}{}) {} |
| 90 | + $abs =~ s{/\.\./}{/}g; |
| 91 | + # Remove trailing / |
| 92 | + $abs =~ s{/$}{}; |
| 93 | + # Remove .md suffix |
| 94 | + $abs =~ s{\.md$}{}; |
| 95 | + # Prepend / |
| 96 | + $abs = "/$abs" unless $abs =~ m{^/}; |
| 97 | + "[$text]({{< relref \"${abs}${anchor}\" >}})"; |
| 98 | + } |
| 99 | + }ge; |
| 100 | + ' "$file" |
| 101 | + fi |
| 102 | + |
| 103 | + # Now scan the (possibly modified) file for remaining bare relative links |
| 104 | + line_num=0 |
| 105 | + while IFS= read -r line; do |
| 106 | + line_num=$((line_num + 1)) |
| 107 | + |
| 108 | + # Skip lines containing Hugo shortcodes (card, relref, etc.) |
| 109 | + if [[ "$line" =~ \{\{[\<\%] ]]; then |
| 110 | + continue |
| 111 | + fi |
| 112 | + |
| 113 | + # Skip lines with raw HTML tags |
| 114 | + if [[ "$line" =~ href= ]]; then |
| 115 | + continue |
| 116 | + fi |
| 117 | + |
| 118 | + # Skip lines with no markdown link syntax |
| 119 | + if ! [[ "$line" =~ \]\( ]]; then |
| 120 | + continue |
| 121 | + fi |
| 122 | + |
| 123 | + # Extract bare relative link targets using perl |
| 124 | + matches=$(echo "$line" | perl -ne ' |
| 125 | + while (/(?<!!)\[([^\]]*)\]\(([^)]+)\)/g) { |
| 126 | + my $target = $2; |
| 127 | + next if $target =~ m{^https?://}; |
| 128 | + next if $target =~ m{^#}; |
| 129 | + next if $target =~ m{^(mailto|tel):}; |
| 130 | + next if $target =~ m{\{\{}; |
| 131 | + next if $target =~ m{^/}; |
| 132 | + print "$target\n"; |
| 133 | + } |
| 134 | + ' 2>/dev/null || true) |
| 135 | + |
| 136 | + if [[ -n "$matches" ]]; then |
| 137 | + while IFS= read -r target; do |
| 138 | + if [[ "$FIX_MODE" == "true" ]]; then |
| 139 | + echo " WARNING: ${file}:${line_num}: could not auto-fix bare relative link '${target}'" |
| 140 | + else |
| 141 | + echo " ERROR: ${file}:${line_num}: bare relative link '${target}' should use {{< relref >}}" |
| 142 | + fi |
| 143 | + relref_count=$((relref_count + 1)) |
| 144 | + done <<<"$matches" |
| 145 | + fi |
| 146 | + done <"$file" |
| 147 | +done < <(find "$DOCS_CONTENT_DIR" -name '*.md' -print0) |
| 148 | + |
| 149 | +if [[ "$FIX_MODE" == "true" ]]; then |
| 150 | + if [[ $relref_count -gt 0 ]]; then |
| 151 | + echo "" |
| 152 | + echo "WARNING: ${relref_count} link(s) could not be auto-fixed. Please fix manually." |
| 153 | + else |
| 154 | + # Show what changed |
| 155 | + changed=$(git -C "${TOPDIR}" diff --name-only -- docs/content/ 2>/dev/null || true) |
| 156 | + if [[ -n "$changed" ]]; then |
| 157 | + echo " Fixed files:" |
| 158 | + echo "$changed" | while IFS= read -r f; do echo " $f"; done |
| 159 | + echo "" |
| 160 | + echo " Run 'make check-links' to verify the fixes." |
| 161 | + else |
| 162 | + echo " No bare relative links found, nothing to fix." |
| 163 | + fi |
| 164 | + fi |
| 165 | +else |
| 166 | + if [[ $relref_count -gt 0 ]]; then |
| 167 | + echo "" |
| 168 | + echo "Found ${relref_count} bare relative link(s). Use {{< relref \"/docs/path\" >}} instead." |
| 169 | + echo "Run 'make fix-links' to auto-fix them." |
| 170 | + exit 1 |
| 171 | + fi |
| 172 | + echo " All internal documentation links use relref correctly." |
| 173 | +fi |
| 174 | + |
| 175 | +# --- Step 2: Build Hugo site and run htmltest --- |
| 176 | +# Download Hugo if not present |
| 177 | +echo "==> Ensuring Hugo ${HUGO_VERSION} is available..." |
| 178 | +"${TOPDIR}/hack/download-hugo.sh" "${HUGO_VERSION}" "${TMPDIR}/hugo" |
| 179 | + |
| 180 | +# Download htmltest if not present |
| 181 | +echo "==> Ensuring htmltest ${HTMLTEST_VERSION} is available..." |
| 182 | +"${TOPDIR}/hack/download-htmltest.sh" "${HTMLTEST_VERSION}" "${TMPDIR}/htmltest" |
| 183 | + |
| 184 | +# Clean stale output directories that cause Hugo to deadlock |
| 185 | +rm -rf "${DOCS_BUILD_DIR}" "${TOPDIR}/docs/public" |
| 186 | + |
| 187 | +# Build Hugo site |
| 188 | +echo "==> Building Hugo documentation site..." |
| 189 | +"${HUGO_BIN}" build --gc --minify -s "${TOPDIR}/docs/" -d "${DOCS_BUILD_DIR}" |
| 190 | + |
| 191 | +# Run htmltest |
| 192 | +echo "==> Running htmltest link checker..." |
| 193 | +HTMLTEST_CONF="${TOPDIR}/docs/.htmltest.yml" |
| 194 | +if [[ "${CHECK_EXTERNAL}" == "false" || "${CHECK_EXTERNAL}" == "0" ]]; then |
| 195 | + echo "==> External link checking disabled (CHECK_EXTERNAL=${CHECK_EXTERNAL})" |
| 196 | + HTMLTEST_CONF="${TMPDIR}/.htmltest-internal.yml" |
| 197 | + sed 's/^CheckExternal:.*/CheckExternal: false/' "${TOPDIR}/docs/.htmltest.yml" > "${HTMLTEST_CONF}" |
| 198 | +fi |
| 199 | +"${HTMLTEST_BIN}" -c "${HTMLTEST_CONF}" |
0 commit comments