44from __future__ import annotations
55
66import argparse
7+ import json
78import os
89import re
910import subprocess
1617 from collections .abc import Sequence
1718
1819SNIPPED_RE = re .compile (
19- r"(?P<before>^(?P<indent> *)```(?:\s*(?P<language>\w+))?\n)"
20+ r"(?P<before>^(?P<indent>\x20 *)```(?:\s*(?P<language>\w+))?\n)"
2021 r"(?P<code>.*?)"
2122 r"(?P<after>^(?P=indent)```\s*$)" ,
2223 re .DOTALL | re .MULTILINE ,
2324)
2425
26+ # Long explanation: https://www.rexegg.com/regex-best-trick.html
27+ #
28+ # Short explanation:
29+ # Match both code blocks and shortcut links, then discard the former.
30+ # Whatever matched by the second branch is guaranteed to never be
31+ # part of a code block, as that would already be caught by the first.
32+ BACKTICKED_SHORTCUT_LINK_RE = re .compile (
33+ rf"""(?msx)
34+ (?:{ SNIPPED_RE }
35+ | \[`(?P<name>[^`\n]+)`](?![\[(])
36+ )
37+ """
38+ )
39+
2540# For some rules, we don't want Ruff to fix the formatting as this would "fix" the
2641# example.
2742KNOWN_FORMATTING_VIOLATIONS = [
@@ -238,6 +253,28 @@ def format_file(file: Path, error_known: bool, args: argparse.Namespace) -> int:
238253 return 0
239254
240255
256+ def find_backticked_shortcut_links (
257+ path : Path , all_config_names : dict [str , object ]
258+ ) -> set [str ]:
259+ """Check for links of the form: [`foobar`].
260+
261+ See explanation at #16010.
262+ """
263+
264+ with path .open () as file :
265+ contents = file .read ()
266+
267+ broken_link_names : set [str ] = set ()
268+
269+ for match in BACKTICKED_SHORTCUT_LINK_RE .finditer (contents ):
270+ name = match ["name" ]
271+
272+ if name is not None and name not in all_config_names :
273+ broken_link_names .add (name )
274+
275+ return broken_link_names
276+
277+
241278def main (argv : Sequence [str ] | None = None ) -> int :
242279 """Check code snippets in docs are formatted by Ruff."""
243280 parser = argparse .ArgumentParser (
@@ -291,8 +328,14 @@ def main(argv: Sequence[str] | None = None) -> int:
291328 print ("Please remove them and re-run." )
292329 return 1
293330
331+ ruff_config_output = subprocess .check_output (
332+ ["ruff" , "config" , "--output-format" , "json" ], encoding = "utf-8"
333+ )
334+ all_config_names = json .loads (ruff_config_output )
335+
294336 violations = 0
295337 errors = 0
338+ broken_links : dict [str , set [str ]] = {}
296339 print ("Checking docs formatting..." )
297340 for file in [* static_docs , * generated_docs ]:
298341 rule_name = file .name .split ("." )[0 ]
@@ -307,13 +350,38 @@ def main(argv: Sequence[str] | None = None) -> int:
307350 elif result == 2 and not error_known :
308351 errors += 1
309352
353+ broken_links_in_file = find_backticked_shortcut_links (file , all_config_names )
354+
355+ if broken_links_in_file :
356+ broken_links [file .name ] = broken_links_in_file
357+
310358 if violations > 0 :
311359 print (f"Formatting violations identified: { violations } " )
312360
313361 if errors > 0 :
314362 print (f"New code block parse errors identified: { errors } " )
315363
316- if violations > 0 or errors > 0 :
364+ if broken_links :
365+ print ()
366+ print ("Do not use backticked shortcut links: [`foobar`]" )
367+ print (
368+ "They work with Mkdocs but cannot be rendered by CommonMark and GFM-compliant implementers."
369+ )
370+ print ("Instead, use an explicit label:" )
371+ print ("```markdown" )
372+ print ("[`lorem.ipsum`][lorem-ipsum]" )
373+ print ()
374+ print ("[lorem-ipsum]: https://example.com/" )
375+ print ("```" )
376+
377+ print ()
378+ print ("The following links are found to be broken:" )
379+
380+ for filename , link_names in broken_links .items ():
381+ print (f"- { filename } :" )
382+ print ("\n " .join (f" - { name } " for name in link_names ))
383+
384+ if violations > 0 or errors > 0 or broken_links :
317385 return 1
318386
319387 print ("All docs are formatted correctly." )
0 commit comments