Skip to content

Commit 1e7dfbc

Browse files
pythongh-148991: Add colour to tokenize CLI output (python#148992)
Co-authored-by: Stan Ulbrych <stan@python.org>
1 parent 5d41632 commit 1e7dfbc

6 files changed

Lines changed: 85 additions & 12 deletions

File tree

Doc/library/tokenize.rst

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ type can be determined by checking the ``exact_type`` property on the
2828
**undefined** when providing invalid Python code and it can change at any
2929
point.
3030

31-
Tokenizing Input
31+
Tokenizing input
3232
----------------
3333

3434
The primary entry point is a :term:`generator`:
@@ -146,7 +146,7 @@ function it uses to do this is available:
146146

147147
.. _tokenize-cli:
148148

149-
Command-Line Usage
149+
Command-line usage
150150
------------------
151151

152152
.. versionadded:: 3.3
@@ -173,8 +173,12 @@ The following options are accepted:
173173
If :file:`filename.py` is specified its contents are tokenized to stdout.
174174
Otherwise, tokenization is performed on stdin.
175175

176+
.. versionadded:: next
177+
Output is in color by default and can be
178+
:ref:`controlled using environment variables <using-on-controlling-color>`.
179+
176180
Examples
177-
------------------
181+
--------
178182

179183
Example of a script rewriter that transforms float literals into Decimal
180184
objects::
@@ -227,7 +231,7 @@ Example of tokenizing from the command line. The script::
227231

228232
will be tokenized to the following output where the first column is the range
229233
of the line/column coordinates where the token is found, the second column is
230-
the name of the token, and the final column is the value of the token (if any)
234+
the name of the token, and the final column is the value of the token (if any):
231235

232236
.. code-block:: shell-session
233237

Doc/whatsnew/3.15.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1244,6 +1244,15 @@ tkinter
12441244
(Contributed by Matthias Kievernagel and Serhiy Storchaka in :gh:`47655`.)
12451245

12461246

1247+
tokenize
1248+
--------
1249+
1250+
* The output of the :mod:`tokenize` :ref:`command-line interface
1251+
<tokenize-cli>` is colored by default. This can be controlled with
1252+
:ref:`environment variables <using-on-controlling-color>`.
1253+
(Contributed by Hugo van Kemenade in :gh:`148991`.)
1254+
1255+
12471256
.. _whatsnew315-tomllib-1-1-0:
12481257

12491258
tomllib

Lib/_colorize.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -386,6 +386,14 @@ class Timeit(ThemeSection):
386386
reset: str = ANSIColors.RESET
387387

388388

389+
@dataclass(frozen=True, kw_only=True)
390+
class Tokenize(ThemeSection):
391+
whitespace: str = ANSIColors.GREY
392+
error: str = ANSIColors.BOLD_RED
393+
position: str = ANSIColors.GREY
394+
delimiter: str = ANSIColors.RESET
395+
396+
389397
@dataclass(frozen=True, kw_only=True)
390398
class Traceback(ThemeSection):
391399
type: str = ANSIColors.BOLD_MAGENTA
@@ -423,6 +431,7 @@ class Theme:
423431
live_profiler: LiveProfiler = field(default_factory=LiveProfiler)
424432
syntax: Syntax = field(default_factory=Syntax)
425433
timeit: Timeit = field(default_factory=Timeit)
434+
tokenize: Tokenize = field(default_factory=Tokenize)
426435
traceback: Traceback = field(default_factory=Traceback)
427436
unittest: Unittest = field(default_factory=Unittest)
428437

@@ -437,6 +446,7 @@ def copy_with(
437446
live_profiler: LiveProfiler | None = None,
438447
syntax: Syntax | None = None,
439448
timeit: Timeit | None = None,
449+
tokenize: Tokenize | None = None,
440450
traceback: Traceback | None = None,
441451
unittest: Unittest | None = None,
442452
) -> Self:
@@ -454,6 +464,7 @@ def copy_with(
454464
live_profiler=live_profiler or self.live_profiler,
455465
syntax=syntax or self.syntax,
456466
timeit=timeit or self.timeit,
467+
tokenize=tokenize or self.tokenize,
457468
traceback=traceback or self.traceback,
458469
unittest=unittest or self.unittest,
459470
)
@@ -475,6 +486,7 @@ def no_colors(cls) -> Self:
475486
live_profiler=LiveProfiler.no_colors(),
476487
syntax=Syntax.no_colors(),
477488
timeit=Timeit.no_colors(),
489+
tokenize=Tokenize.no_colors(),
478490
traceback=Traceback.no_colors(),
479491
unittest=Unittest.no_colors(),
480492
)

Lib/test/test_tokenize.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3326,6 +3326,7 @@ def test_newline_at_the_end_of_buffer(self):
33263326
run_test_script(file_name)
33273327

33283328

3329+
@support.force_not_colorized_test_class
33293330
class CommandLineTest(unittest.TestCase):
33303331
def setUp(self):
33313332
self.filename = tempfile.mktemp()

Lib/tokenize.py

Lines changed: 54 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
from token import *
3636
from token import EXACT_TOKEN_TYPES
3737
import _tokenize
38+
lazy import _colorize
3839

3940
cookie_re = re.compile(br'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII)
4041
blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII)
@@ -505,6 +506,56 @@ def generate_tokens(readline):
505506
"""
506507
return _generate_tokens_from_c_tokenizer(readline, extra_tokens=True)
507508

509+
510+
def _get_token_colors(syntax, tokenize):
511+
"""Map token type numbers to theme colors."""
512+
return frozendict({
513+
COMMENT: syntax.comment,
514+
DEDENT: tokenize.whitespace,
515+
ENCODING: tokenize.whitespace,
516+
ENDMARKER: tokenize.whitespace,
517+
ERRORTOKEN: tokenize.error,
518+
FSTRING_START: syntax.string,
519+
FSTRING_MIDDLE: syntax.string,
520+
FSTRING_END: syntax.string,
521+
INDENT: tokenize.whitespace,
522+
NAME: syntax.reset,
523+
NEWLINE: tokenize.whitespace,
524+
NL: tokenize.whitespace,
525+
NUMBER: syntax.number,
526+
OP: syntax.op,
527+
SOFT_KEYWORD: syntax.soft_keyword,
528+
STRING: syntax.string,
529+
TSTRING_START: syntax.string,
530+
TSTRING_MIDDLE: syntax.string,
531+
TSTRING_END: syntax.string,
532+
})
533+
534+
535+
def _format_tokens(tokens, *, color=False, exact=False):
536+
theme = _colorize.get_theme(force_no_color=not color)
537+
s = theme.syntax
538+
t = theme.tokenize
539+
token_colors = _get_token_colors(s, t)
540+
for token in tokens:
541+
token_range = (
542+
f"{t.position}{token.start[0]}"
543+
f"{t.delimiter},{t.position}{token.start[1]}"
544+
f"{t.delimiter}-"
545+
f"{t.position}{token.end[0]}"
546+
f"{t.delimiter},{t.position}{token.end[1]}"
547+
f"{t.delimiter}:"
548+
)
549+
token_color = token_colors.get(token.type, s.reset)
550+
token_name = tok_name[token.exact_type if exact else token.type]
551+
visible_range = f"{token.start[0]},{token.start[1]}-{token.end[0]},{token.end[1]}:"
552+
yield (
553+
f"{token_range}{' ' * (20 - len(visible_range))}"
554+
f"{token_color}{token_name:<15}"
555+
f"{s.reset}{token.string!r:<15}"
556+
)
557+
558+
508559
def _main(args=None):
509560
import argparse
510561

@@ -524,7 +575,7 @@ def error(message, filename=None, location=None):
524575
sys.exit(1)
525576

526577
# Parse the arguments and options
527-
parser = argparse.ArgumentParser(color=True)
578+
parser = argparse.ArgumentParser()
528579
parser.add_argument(dest='filename', nargs='?',
529580
metavar='filename.py',
530581
help='the file to tokenize; defaults to stdin')
@@ -545,13 +596,8 @@ def error(message, filename=None, location=None):
545596

546597

547598
# Output the tokenization
548-
for token in tokens:
549-
token_type = token.type
550-
if args.exact:
551-
token_type = token.exact_type
552-
token_range = "%d,%d-%d,%d:" % (token.start + token.end)
553-
print("%-20s%-15s%-15r" %
554-
(token_range, tok_name[token_type], token.string))
599+
for line in _format_tokens(tokens, color=True, exact=args.exact):
600+
print(line)
555601
except IndentationError as err:
556602
line, column = err.args[1][1:3]
557603
error(err.args[0], filename, (line, column))
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add colour to :mod:`tokenize` CLI output. Patch by Hugo van Kemenade.

0 commit comments

Comments
 (0)