-
Notifications
You must be signed in to change notification settings - Fork 91
Expand file tree
/
Copy pathlexers.py
More file actions
162 lines (137 loc) · 5.36 KB
/
lexers.py
File metadata and controls
162 lines (137 loc) · 5.36 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
"""Pygments lexers"""
from __future__ import annotations
import re
# this is not added as an entry point in ipython, so we add it in this package
from IPython.lib.lexers import IPythonTracebackLexer # noqa: F401
import pygments.lexer
import pygments.token
_ansi_code_to_color = {
0: "Black",
1: "Red",
2: "Green",
3: "Yellow",
4: "Blue",
5: "Magenta",
6: "Cyan",
7: "White",
}
def _token_from_lexer_state(bold: bool, faint: bool, fg_color: str | None, bg_color: str | None):
"""Construct a token given the current lexer state.
We can only emit one token even though we have a multiple-tuple state.
To do work around this, we construct tokens like "Bold.Red".
"""
components: tuple[str, ...] = ()
if bold:
components += ("Bold",)
if faint:
components += ("Faint",)
if fg_color:
components += (fg_color,)
if bg_color:
components += ("BG" + bg_color,)
if len(components) == 0:
return pygments.token.Text
else:
token = pygments.token.Token.Color
for component in components:
token = getattr(token, component)
return token
class AnsiColorLexer(pygments.lexer.RegexLexer):
"""Pygments lexer for text containing ANSI color codes.
Adapted from https://github.com/chriskuehl/pygments-ansi-color
"""
name = "ANSI Color"
aliases = ("myst-ansi",)
flags = re.DOTALL | re.MULTILINE
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.reset_state()
def reset_state(self):
self.bold = False
self.faint = False
self.fg_color = None
self.bg_color = None
@property
def current_token(self):
return _token_from_lexer_state(
self.bold,
self.faint,
self.fg_color,
self.bg_color,
)
def process(self, match):
"""Produce the next token and bit of text.
Interprets the ANSI code (which may be a color code or some other
code), changing the lexer state and producing a new token. If it's not
a color code, we just strip it out and move on.
Some useful reference for ANSI codes:
* http://ascii-table.com/ansi-escape-sequences.php
"""
# "after_escape" contains everything after the start of the escape
# sequence, up to the next escape sequence. We still need to separate
# the content from the end of the escape sequence.
after_escape = match.group(1)
# TODO: this doesn't handle the case where the values are non-numeric.
# This is rare but can happen for keyboard remapping, e.g.
# '\x1b[0;59;"A"p'
parsed = re.match(
r"([0-9;=]*?)?([a-zA-Z])(.*)$",
after_escape,
re.DOTALL | re.MULTILINE,
)
if parsed is None:
# This shouldn't ever happen if we're given valid text + ANSI, but
# people can provide us with utter junk, and we should tolerate it.
text = after_escape
else:
value, code, text = parsed.groups()
if code == "m": # "m" is "Set Graphics Mode"
# Special case \x1b[m is a reset code
if value == "":
self.reset_state()
else:
try:
values = [int(v) for v in value.split(";")]
except ValueError:
# Shouldn't ever happen, but could with invalid ANSI.
values = []
while len(values) > 0:
value = values.pop(0)
fg_color = _ansi_code_to_color.get(value - 30)
bg_color = _ansi_code_to_color.get(value - 40)
if fg_color:
self.fg_color = fg_color
elif bg_color:
self.bg_color = bg_color
elif value == 1:
self.bold = True
elif value == 2:
self.faint = True
elif value == 22:
self.bold = False
self.faint = False
elif value == 39:
self.fg_color = None
elif value == 49:
self.bg_color = None
elif value == 0:
self.reset_state()
elif value in (38, 48):
try:
five = values.pop(0)
color = values.pop(0)
except IndexError:
continue
else:
if five != 5:
continue
if not 0 <= color <= 255:
continue
if value == 38:
self.fg_color = f"C{color}"
else:
self.bg_color = f"C{color}"
yield match.start(), self.current_token, text
tokens = {
"root": [(r"\x1b\[([^\x1b]*)", process), (r"[^\x1b]+", pygments.token.Text)],
}