pythongh-148991: Add colour to tokenize CLI output (python#148992)

hugovk · StanFromIreland · web-flow · commit 1e7dfbce930d · 2026-04-26T22:14:33.000+03:00
Co-authored-by: Stan Ulbrych &lt;stan@python.org&gt;
diff --git a/Doc/library/tokenize.rst b/Doc/library/tokenize.rst
@@ -28,7 +28,7 @@ type can be determined by checking the ``exact_type`` property on the
    **undefined** when providing invalid Python code and it can change at any
    point.
 
-Tokenizing Input
+Tokenizing input
 ----------------
 
 The primary entry point is a :term:`generator`:
@@ -146,7 +146,7 @@ function it uses to do this is available:
 
 .. _tokenize-cli:
 
-Command-Line Usage
+Command-line usage
 ------------------
 
 .. versionadded:: 3.3
@@ -173,8 +173,12 @@ The following options are accepted:
 If :file:`filename.py` is specified its contents are tokenized to stdout.
 Otherwise, tokenization is performed on stdin.
 
+.. versionadded:: next
+   Output is in color by default and can be
+   :ref:`controlled using environment variables <using-on-controlling-color>`.
+
 Examples
-------------------
+--------
 
 Example of a script rewriter that transforms float literals into Decimal
 objects::
@@ -227,7 +231,7 @@ Example of tokenizing from the command line.  The script::
 
 will be tokenized to the following output where the first column is the range
 of the line/column coordinates where the token is found, the second column is
-the name of the token, and the final column is the value of the token (if any)
+the name of the token, and the final column is the value of the token (if any):
 
 .. code-block:: shell-session
 
diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst
@@ -1244,6 +1244,15 @@ tkinter
   (Contributed by Matthias Kievernagel and Serhiy Storchaka in :gh:`47655`.)
 
 
+tokenize
+--------
+
+* The output of the :mod:`tokenize` :ref:`command-line interface
+  <tokenize-cli>` is colored by default. This can be controlled with
+  :ref:`environment variables <using-on-controlling-color>`.
+  (Contributed by Hugo van Kemenade in :gh:`148991`.)
+
+
 .. _whatsnew315-tomllib-1-1-0:
 
 tomllib
diff --git a/Lib/_colorize.py b/Lib/_colorize.py
@@ -386,6 +386,14 @@ class Timeit(ThemeSection):
     reset: str = ANSIColors.RESET
 
 
+@dataclass(frozen=True, kw_only=True)
+class Tokenize(ThemeSection):
+    whitespace: str = ANSIColors.GREY
+    error: str = ANSIColors.BOLD_RED
+    position: str = ANSIColors.GREY
+    delimiter: str = ANSIColors.RESET
+
+
 @dataclass(frozen=True, kw_only=True)
 class Traceback(ThemeSection):
     type: str = ANSIColors.BOLD_MAGENTA
@@ -423,6 +431,7 @@ class Theme:
     live_profiler: LiveProfiler = field(default_factory=LiveProfiler)
     syntax: Syntax = field(default_factory=Syntax)
     timeit: Timeit = field(default_factory=Timeit)
+    tokenize: Tokenize = field(default_factory=Tokenize)
     traceback: Traceback = field(default_factory=Traceback)
     unittest: Unittest = field(default_factory=Unittest)
 
@@ -437,6 +446,7 @@ def copy_with(
         live_profiler: LiveProfiler | None = None,
         syntax: Syntax | None = None,
         timeit: Timeit | None = None,
+        tokenize: Tokenize | None = None,
         traceback: Traceback | None = None,
         unittest: Unittest | None = None,
     ) -> Self:
@@ -454,6 +464,7 @@ def copy_with(
             live_profiler=live_profiler or self.live_profiler,
             syntax=syntax or self.syntax,
             timeit=timeit or self.timeit,
+            tokenize=tokenize or self.tokenize,
             traceback=traceback or self.traceback,
             unittest=unittest or self.unittest,
         )
@@ -475,6 +486,7 @@ def no_colors(cls) -> Self:
             live_profiler=LiveProfiler.no_colors(),
             syntax=Syntax.no_colors(),
             timeit=Timeit.no_colors(),
+            tokenize=Tokenize.no_colors(),
             traceback=Traceback.no_colors(),
             unittest=Unittest.no_colors(),
         )
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
@@ -3326,6 +3326,7 @@ def test_newline_at_the_end_of_buffer(self):
             run_test_script(file_name)
 
 
+@support.force_not_colorized_test_class
 class CommandLineTest(unittest.TestCase):
     def setUp(self):
         self.filename = tempfile.mktemp()
diff --git a/Lib/tokenize.py b/Lib/tokenize.py
@@ -35,6 +35,7 @@
 from token import *
 from token import EXACT_TOKEN_TYPES
 import _tokenize
+lazy import _colorize
 
 cookie_re = re.compile(br'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII)
 blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII)
@@ -505,6 +506,56 @@ def generate_tokens(readline):
     """
     return _generate_tokens_from_c_tokenizer(readline, extra_tokens=True)
 
+
+def _get_token_colors(syntax, tokenize):
+    """Map token type numbers to theme colors."""
+    return frozendict({
+        COMMENT: syntax.comment,
+        DEDENT: tokenize.whitespace,
+        ENCODING: tokenize.whitespace,
+        ENDMARKER: tokenize.whitespace,
+        ERRORTOKEN: tokenize.error,
+        FSTRING_START: syntax.string,
+        FSTRING_MIDDLE: syntax.string,
+        FSTRING_END: syntax.string,
+        INDENT: tokenize.whitespace,
+        NAME: syntax.reset,
+        NEWLINE: tokenize.whitespace,
+        NL: tokenize.whitespace,
+        NUMBER: syntax.number,
+        OP: syntax.op,
+        SOFT_KEYWORD: syntax.soft_keyword,
+        STRING: syntax.string,
+        TSTRING_START: syntax.string,
+        TSTRING_MIDDLE: syntax.string,
+        TSTRING_END: syntax.string,
+    })
+
+
+def _format_tokens(tokens, *, color=False, exact=False):
+    theme = _colorize.get_theme(force_no_color=not color)
+    s = theme.syntax
+    t = theme.tokenize
+    token_colors = _get_token_colors(s, t)
+    for token in tokens:
+        token_range = (
+            f"{t.position}{token.start[0]}"
+            f"{t.delimiter},{t.position}{token.start[1]}"
+            f"{t.delimiter}-"
+            f"{t.position}{token.end[0]}"
+            f"{t.delimiter},{t.position}{token.end[1]}"
+            f"{t.delimiter}:"
+        )
+        token_color = token_colors.get(token.type, s.reset)
+        token_name = tok_name[token.exact_type if exact else token.type]
+        visible_range = f"{token.start[0]},{token.start[1]}-{token.end[0]},{token.end[1]}:"
+        yield (
+            f"{token_range}{' ' * (20 - len(visible_range))}"
+            f"{token_color}{token_name:<15}"
+            f"{s.reset}{token.string!r:<15}"
+        )
+
+
 def _main(args=None):
     import argparse
 
@@ -524,7 +575,7 @@ def error(message, filename=None, location=None):
         sys.exit(1)
 
     # Parse the arguments and options
-    parser = argparse.ArgumentParser(color=True)
+    parser = argparse.ArgumentParser()
     parser.add_argument(dest='filename', nargs='?',
                         metavar='filename.py',
                         help='the file to tokenize; defaults to stdin')
@@ -545,13 +596,8 @@ def error(message, filename=None, location=None):
 
 
         # Output the tokenization
-        for token in tokens:
-            token_type = token.type
-            if args.exact:
-                token_type = token.exact_type
-            token_range = "%d,%d-%d,%d:" % (token.start + token.end)
-            print("%-20s%-15s%-15r" %
-                  (token_range, tok_name[token_type], token.string))
+        for line in _format_tokens(tokens, color=True, exact=args.exact):
+            print(line)
     except IndentationError as err:
         line, column = err.args[1][1:3]
         error(err.args[0], filename, (line, column))
diff --git a/Misc/NEWS.d/next/Library/2026-04-25-18-09-16.gh-issue-148991.AZ64Et.rst b/Misc/NEWS.d/next/Library/2026-04-25-18-09-16.gh-issue-148991.AZ64Et.rst
@@ -0,0 +1 @@
+Add colour to :mod:`tokenize` CLI output. Patch by Hugo van Kemenade.

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	+Add colour to :mod:`tokenize` CLI output. Patch by Hugo van Kemenade.