Skip to content

Commit 1db8392

Browse files
Check for backtick-quoted shortcut links in CI (#16114)
## Summary Follow-up to #16035. `check_docs_formatted.py` will now report backtick-quoted shortcut links in rule documentation. It uses a regular expression to find them. Such a link: * Starts with `[`, followed by <code>\`</code>, then a "name" sequence of at least one non-backtick non-newline character, followed by another <code>\`</code>, then ends with `]`. * Is not followed by either a `[` or a `(`. * Is not placed within a code block. If the name is a known Ruff option name, that link is not considered a violation. ## Test Plan Manual.
1 parent 81e202e commit 1db8392

1 file changed

Lines changed: 70 additions & 2 deletions

File tree

scripts/check_docs_formatted.py

Lines changed: 70 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from __future__ import annotations
55

66
import argparse
7+
import json
78
import os
89
import re
910
import subprocess
@@ -16,12 +17,26 @@
1617
from collections.abc import Sequence
1718

1819
SNIPPED_RE = re.compile(
19-
r"(?P<before>^(?P<indent> *)```(?:\s*(?P<language>\w+))?\n)"
20+
r"(?P<before>^(?P<indent>\x20*)```(?:\s*(?P<language>\w+))?\n)"
2021
r"(?P<code>.*?)"
2122
r"(?P<after>^(?P=indent)```\s*$)",
2223
re.DOTALL | re.MULTILINE,
2324
)
2425

26+
# Long explanation: https://www.rexegg.com/regex-best-trick.html
27+
#
28+
# Short explanation:
29+
# Match both code blocks and shortcut links, then discard the former.
30+
# Whatever matched by the second branch is guaranteed to never be
31+
# part of a code block, as that would already be caught by the first.
32+
BACKTICKED_SHORTCUT_LINK_RE = re.compile(
33+
rf"""(?msx)
34+
(?:{SNIPPED_RE}
35+
| \[`(?P<name>[^`\n]+)`](?![\[(])
36+
)
37+
"""
38+
)
39+
2540
# For some rules, we don't want Ruff to fix the formatting as this would "fix" the
2641
# example.
2742
KNOWN_FORMATTING_VIOLATIONS = [
@@ -238,6 +253,28 @@ def format_file(file: Path, error_known: bool, args: argparse.Namespace) -> int:
238253
return 0
239254

240255

256+
def find_backticked_shortcut_links(
257+
path: Path, all_config_names: dict[str, object]
258+
) -> set[str]:
259+
"""Check for links of the form: [`foobar`].
260+
261+
See explanation at #16010.
262+
"""
263+
264+
with path.open() as file:
265+
contents = file.read()
266+
267+
broken_link_names: set[str] = set()
268+
269+
for match in BACKTICKED_SHORTCUT_LINK_RE.finditer(contents):
270+
name = match["name"]
271+
272+
if name is not None and name not in all_config_names:
273+
broken_link_names.add(name)
274+
275+
return broken_link_names
276+
277+
241278
def main(argv: Sequence[str] | None = None) -> int:
242279
"""Check code snippets in docs are formatted by Ruff."""
243280
parser = argparse.ArgumentParser(
@@ -291,8 +328,14 @@ def main(argv: Sequence[str] | None = None) -> int:
291328
print("Please remove them and re-run.")
292329
return 1
293330

331+
ruff_config_output = subprocess.check_output(
332+
["ruff", "config", "--output-format", "json"], encoding="utf-8"
333+
)
334+
all_config_names = json.loads(ruff_config_output)
335+
294336
violations = 0
295337
errors = 0
338+
broken_links: dict[str, set[str]] = {}
296339
print("Checking docs formatting...")
297340
for file in [*static_docs, *generated_docs]:
298341
rule_name = file.name.split(".")[0]
@@ -307,13 +350,38 @@ def main(argv: Sequence[str] | None = None) -> int:
307350
elif result == 2 and not error_known:
308351
errors += 1
309352

353+
broken_links_in_file = find_backticked_shortcut_links(file, all_config_names)
354+
355+
if broken_links_in_file:
356+
broken_links[file.name] = broken_links_in_file
357+
310358
if violations > 0:
311359
print(f"Formatting violations identified: {violations}")
312360

313361
if errors > 0:
314362
print(f"New code block parse errors identified: {errors}")
315363

316-
if violations > 0 or errors > 0:
364+
if broken_links:
365+
print()
366+
print("Do not use backticked shortcut links: [`foobar`]")
367+
print(
368+
"They work with Mkdocs but cannot be rendered by CommonMark and GFM-compliant implementers."
369+
)
370+
print("Instead, use an explicit label:")
371+
print("```markdown")
372+
print("[`lorem.ipsum`][lorem-ipsum]")
373+
print()
374+
print("[lorem-ipsum]: https://example.com/")
375+
print("```")
376+
377+
print()
378+
print("The following links are found to be broken:")
379+
380+
for filename, link_names in broken_links.items():
381+
print(f"- {filename}:")
382+
print("\n".join(f" - {name}" for name in link_names))
383+
384+
if violations > 0 or errors > 0 or broken_links:
317385
return 1
318386

319387
print("All docs are formatted correctly.")

0 commit comments

Comments
 (0)