Skip to content

Commit 802bc72

Browse files
Python: Support t-string templates and parsing (PEP 750) (#6724)
* Python: Support t-string templates (PEP 750, Python 3.14+) Overload template() and pattern() factory functions to accept Python 3.14 t-strings, eliminating redundant keyword arguments when passing captures. A thin conversion layer extracts the code string and captures dict from t-string interpolations, then delegates to the existing constructors. * Python: Fix t-string template support to use actual Python 3.14 Template API The Template iteration protocol uses `for arg in tpl` (via __iter__), not `tpl.args` which was based on a draft API. Update convert_tstring and the MockTemplate test helper accordingly. * Python: Parse t-string syntax (PEP 750, Python 3.14+) Generalize the f-string parser to also handle t-string tokens (TSTRING_START/MIDDLE/END) and AST nodes (TemplateStr, Interpolation). T-strings reuse the existing FormattedString LST node with a t" delimiter, requiring no changes to the printer, RPC, or Java model.
1 parent 8c219bb commit 802bc72

9 files changed

Lines changed: 551 additions & 36 deletions

File tree

rewrite-python/rewrite/src/rewrite/python/_parser_visitor.py

Lines changed: 121 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,15 @@
2727
FSTRING_MIDDLE = getattr(token, 'FSTRING_MIDDLE', -3)
2828
FSTRING_END = getattr(token, 'FSTRING_END', -4)
2929

30+
# T-string token types (Python 3.14+) - define fallbacks for older versions
31+
TSTRING_START = getattr(token, 'TSTRING_START', -5)
32+
TSTRING_MIDDLE = getattr(token, 'TSTRING_MIDDLE', -6)
33+
TSTRING_END = getattr(token, 'TSTRING_END', -7)
34+
35+
# AST types for t-strings (Python 3.14+); sentinels on older versions
36+
_TemplateStr = getattr(ast, 'TemplateStr', type('_TemplateStr', (), {}))
37+
_Interpolation = getattr(ast, 'Interpolation', type('_Interpolation', (), {}))
38+
3039
# Token types to skip when looking for significant (non-whitespace) tokens
3140
_SKIP_TOKEN_TYPES = (token.NL, token.NEWLINE, token.INDENT, token.DEDENT,
3241
token.COMMENT, token.ENCODING, token.ENDMARKER, WHITESPACE_TOKEN)
@@ -118,7 +127,7 @@ def _build_tokens(self, raw_tokens: Iterable[TokenInfo]) -> Tuple[List[TokenInfo
118127
row = 1 # current row (1-based like tokenize)
119128
col = 0 # current column (0-based like tokenize)
120129
in_from_import = False # track if we're between 'from' and 'import' keywords
121-
fstring_depth = 0 # track nested f-string depth
130+
formatted_string_depth = 0 # track nested f-string/t-string depth
122131

123132
for tok in raw_tokens:
124133
# ENCODING token is virtual (doesn't consume source text)
@@ -151,7 +160,7 @@ def _build_tokens(self, raw_tokens: Iterable[TokenInfo]) -> Tuple[List[TokenInfo
151160
if tok_start > prev_end:
152161
ws_text = self._source[prev_end:tok_start]
153162
# Skip single-character brace gaps inside f-strings (escaped {{ or }})
154-
is_escaped_brace = fstring_depth > 0 and ws_text in ('{', '}')
163+
is_escaped_brace = formatted_string_depth > 0 and ws_text in ('{', '}')
155164
if not is_escaped_brace:
156165
ws_tok = TokenInfo(
157166
WHITESPACE_TOKEN,
@@ -162,11 +171,11 @@ def _build_tokens(self, raw_tokens: Iterable[TokenInfo]) -> Tuple[List[TokenInfo
162171
)
163172
result.append(ws_tok)
164173

165-
# Track f-string depth for whitespace injection.
166-
if tok.type == FSTRING_START:
167-
fstring_depth += 1
168-
elif tok.type == FSTRING_END:
169-
fstring_depth -= 1
174+
# Track f-string/t-string depth for whitespace injection.
175+
if tok.type in (FSTRING_START, TSTRING_START):
176+
formatted_string_depth += 1
177+
elif tok.type in (FSTRING_END, TSTRING_END):
178+
formatted_string_depth -= 1
170179

171180
# Track paren pairs
172181
if tok.type == token.OP:
@@ -2361,9 +2370,79 @@ def visit_JoinedStr(self, node):
23612370

23622371
return res
23632372

2373+
def visit_TemplateStr(self, node):
2374+
leading_prefix = self.__whitespace()
2375+
2376+
tok = self._skip_whitespace_tokens()
2377+
while tok.type not in (TSTRING_START, token.STRING):
2378+
tok = self._advance_token()
2379+
2380+
value_idx = 0
2381+
res = None
2382+
is_first = True
2383+
# Loop while we have STRING or TSTRING_START tokens to process
2384+
while True:
2385+
if is_first:
2386+
prefix = leading_prefix
2387+
tok = self._skip_whitespace_tokens()
2388+
else:
2389+
# Peek at next token to check for string concatenation
2390+
save_idx = self._token_idx
2391+
saw_statement_end = False
2392+
while self._token_idx < len(self._tokens):
2393+
peek_tok = self._tokens[self._token_idx]
2394+
if peek_tok.type == token.NEWLINE:
2395+
saw_statement_end = True
2396+
self._token_idx += 1
2397+
elif peek_tok.type in (token.NL, token.INDENT, token.DEDENT, token.COMMENT,
2398+
token.ENCODING, token.ENDMARKER, WHITESPACE_TOKEN):
2399+
self._token_idx += 1
2400+
else:
2401+
break
2402+
if saw_statement_end or peek_tok.type not in (token.STRING, TSTRING_START):
2403+
self._token_idx = save_idx
2404+
break
2405+
self._token_idx = save_idx
2406+
prefix = self.__whitespace()
2407+
tok = self._skip_whitespace_tokens()
2408+
2409+
if tok.type == token.STRING:
2410+
ast_value = node.values[value_idx] if value_idx < len(node.values) else ast.Constant(value=ast.literal_eval(tok.string))
2411+
current, tok = self.__map_literal(ast_value, tok)
2412+
current = current.replace(prefix=prefix)
2413+
if value_idx < len(node.values) and isinstance(node.values[value_idx], ast.Constant):
2414+
expected_value = cast(ast.Constant, node.values[value_idx]).value
2415+
if isinstance(expected_value, str) and current.value == expected_value:
2416+
value_idx += 1
2417+
elif tok.type == TSTRING_START:
2418+
current, tok, value_idx = self.__map_fstring(node, prefix, tok, value_idx)
2419+
else:
2420+
break
2421+
2422+
if res is None:
2423+
res = current
2424+
else:
2425+
res = py.Binary(
2426+
random_id(),
2427+
Space.EMPTY,
2428+
Markers.EMPTY,
2429+
res,
2430+
self.__pad_left(Space.EMPTY, py.Binary.Type.StringConcatenation),
2431+
None,
2432+
current,
2433+
self._type_mapping.type(node)
2434+
)
2435+
2436+
is_first = False
2437+
2438+
return res
2439+
23642440
def visit_FormattedValue(self, node):
23652441
raise ValueError("This method should not be called directly")
23662442

2443+
def visit_Interpolation(self, node):
2444+
raise ValueError("This method should not be called directly")
2445+
23672446
def visit_Lambda(self, node):
23682447
return j.Lambda(
23692448
random_id(),
@@ -3238,13 +3317,23 @@ def __map_fstring_as_literal(self, node: ast.JoinedStr, leading_prefix: Space, t
32383317
is_first = False
32393318
return res
32403319

3241-
def __map_fstring(self, node: ast.JoinedStr, prefix: Space, tok: TokenInfo, value_idx: int = 0) -> \
3320+
def __map_fstring(self, node, prefix: Space, tok: TokenInfo, value_idx: int = 0, *,
3321+
_start=None, _middle=None, _end=None) -> \
32423322
Tuple[J, TokenInfo, int]:
3243-
"""Map an f-string to a FormattedString AST node.
3323+
"""Map an f-string or t-string to a FormattedString AST node.
32443324
32453325
Uses _token_idx directly to iterate through tokens.
3326+
Token type parameters (_start/_middle/_end) are auto-detected from the
3327+
current token when not provided, allowing this method to handle both
3328+
f-strings and t-strings.
32463329
"""
3247-
if tok.type != FSTRING_START:
3330+
if _start is None:
3331+
if tok.type == TSTRING_START:
3332+
_start, _middle, _end = TSTRING_START, TSTRING_MIDDLE, TSTRING_END
3333+
else:
3334+
_start, _middle, _end = FSTRING_START, FSTRING_MIDDLE, FSTRING_END
3335+
3336+
if tok.type != _start:
32483337
if len(node.values) == 1 and isinstance(node.values[0], ast.Constant):
32493338
# format specifiers are stored as f-strings in the AST; e.g. `f'{1:n}'`
32503339
format_val = node.values[0].value
@@ -3267,13 +3356,13 @@ def __map_fstring(self, node: ast.JoinedStr, prefix: Space, tok: TokenInfo, valu
32673356
consume_end_delim = False
32683357
else:
32693358
delimiter = tok.string
3270-
tok = self._advance_token() # consume FSTRING_START, get next
3359+
tok = self._advance_token() # consume start token, get next
32713360
consume_end_delim = True
32723361

3273-
# tokenizer tokens: FSTRING_START, FSTRING_MIDDLE, OP, ..., OP, FSTRING_MIDDLE, FSTRING_END
3362+
# tokenizer tokens: START, MIDDLE, OP, ..., OP, MIDDLE, END
32743363
parts = []
32753364
prev_token_idx = -1
3276-
while tok.type != FSTRING_END and value_idx < len(node.values):
3365+
while tok.type != _end and value_idx < len(node.values):
32773366
# Safety check: ensure loop is making progress
32783367
if self._token_idx == prev_token_idx:
32793368
raise RuntimeError(
@@ -3289,11 +3378,11 @@ def __map_fstring(self, node: ast.JoinedStr, prefix: Space, tok: TokenInfo, valu
32893378
continue
32903379

32913380
value = node.values[value_idx]
3292-
if tok.type == FSTRING_MIDDLE:
3293-
# Accumulate text from consecutive FSTRING_MIDDLE tokens
3381+
if tok.type == _middle:
3382+
# Accumulate text from consecutive MIDDLE tokens
32943383
s = tok.string
3295-
tok = self._advance_token() # consume first FSTRING_MIDDLE, get next
3296-
while tok.type == FSTRING_MIDDLE:
3384+
tok = self._advance_token() # consume first MIDDLE, get next
3385+
while tok.type == _middle:
32973386
s += tok.string
32983387
tok = self._advance_token() # consume and get next
32993388
# For value_source, escape braces so the printer outputs them correctly
@@ -3315,23 +3404,24 @@ def __map_fstring(self, node: ast.JoinedStr, prefix: Space, tok: TokenInfo, valu
33153404
value_idx += 1
33163405
elif tok.type == token.OP and tok.string == '{':
33173406
tok = self._advance_token() # consume '{', get next
3318-
if not isinstance(value, ast.FormattedValue):
3407+
if not isinstance(value, (ast.FormattedValue, _Interpolation)):
33193408
# this is the case when using the `=` "debug specifier"
33203409
value_idx += 1
33213410
value = node.values[value_idx]
33223411

3323-
if isinstance(cast(ast.FormattedValue, value).value, ast.JoinedStr):
3324-
joined = cast(ast.JoinedStr, cast(ast.FormattedValue, value).value)
3412+
value_inner = value.value if isinstance(value, (ast.FormattedValue, _Interpolation)) else None
3413+
if isinstance(value_inner, (ast.JoinedStr, _TemplateStr)):
3414+
joined = value_inner
33253415
nested, tok, inner_vi = self.__map_fstring(joined, Space.EMPTY, tok)
33263416

33273417
# Handle concatenated f-strings/strings within this expression
33283418
while True:
33293419
peek_tok, _ = self._peek_significant_token()
3330-
if peek_tok.type not in (FSTRING_START, token.STRING):
3420+
if peek_tok.type not in (FSTRING_START, TSTRING_START, token.STRING):
33313421
break
33323422
concat_prefix = self.__whitespace()
33333423
tok = self._tokens[self._token_idx]
3334-
if tok.type == FSTRING_START:
3424+
if tok.type in (FSTRING_START, TSTRING_START):
33353425
right, tok, inner_vi = self.__map_fstring(joined, concat_prefix, tok, inner_vi)
33363426
else:
33373427
ast_val = (joined.values[inner_vi]
@@ -3350,18 +3440,18 @@ def __map_fstring(self, node: ast.JoinedStr, prefix: Space, tok: TokenInfo, valu
33503440
expr = self.__pad_right(nested, Space.EMPTY)
33513441
else:
33523442
expr = self.__pad_right(
3353-
self.__convert(cast(ast.FormattedValue, value).value),
3443+
self.__convert(value_inner),
33543444
self.__whitespace()
33553445
)
33563446

33573447
# Scan for specifiers (debug, conversion, format) - applies to both nested f-string and regular expressions
3358-
while self._token_idx < len(self._tokens) and self._tokens[self._token_idx].type not in (FSTRING_END, FSTRING_MIDDLE):
3448+
while self._token_idx < len(self._tokens) and self._tokens[self._token_idx].type not in (_end, _middle):
33593449
tok = self._next_token() # get current and advance (we need to examine current token)
33603450
if tok.type == token.OP and tok.string in ('!'):
33613451
break
33623452
la_tok = self._tokens[self._token_idx]
33633453
if tok.type == token.OP and tok.string == '}' and (
3364-
la_tok.type in (FSTRING_END, FSTRING_MIDDLE) or (
3454+
la_tok.type in (_end, _middle) or (
33653455
la_tok.type == token.OP and la_tok.string == '{')):
33663456
break
33673457
# Debug specifier '=' - break regardless of what follows (whitespace is valid after '=')
@@ -3396,7 +3486,8 @@ def __map_fstring(self, node: ast.JoinedStr, prefix: Space, tok: TokenInfo, valu
33963486
if conv is not None:
33973487
self._token_idx += 1 # advance past ':' (only needed after conversion)
33983488
format_spec, tok, _ = self.__map_fstring(
3399-
cast(ast.JoinedStr, cast(ast.FormattedValue, value).format_spec), Space.EMPTY, self._tokens[self._token_idx])
3489+
cast(ast.JoinedStr, value.format_spec), Space.EMPTY, self._tokens[self._token_idx],
3490+
_start=_start, _middle=_middle, _end=_end)
34003491
else:
34013492
format_spec = None
34023493

@@ -3415,13 +3506,13 @@ def __map_fstring(self, node: ast.JoinedStr, prefix: Space, tok: TokenInfo, valu
34153506
if (format_spec is not None or conv is not None or debug is not None) and self._tokens[self._token_idx].string == '}':
34163507
self._token_idx += 1
34173508
tok = self._tokens[self._token_idx]
3418-
elif tok.type == FSTRING_END:
3509+
elif tok.type == _end:
34193510
raise NotImplementedError("Unsupported: String concatenation with f-strings")
34203511

34213512
if consume_end_delim:
3422-
tok = self._advance_token() # consume FSTRING_END, get next
3423-
elif tok.type == FSTRING_MIDDLE and len(tok.string) == 0:
3424-
tok = self._advance_token() # consume empty FSTRING_MIDDLE, get next
3513+
tok = self._advance_token() # consume end token, get next
3514+
elif tok.type == _middle and len(tok.string) == 0:
3515+
tok = self._advance_token() # consume empty MIDDLE token, get next
34253516

34263517
return (py.FormattedString(
34273518
random_id(),
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
# Copyright 2025 the original author or authors.
2+
# <p>
3+
# Licensed under the Moderne Source Available License (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
# <p>
7+
# https://docs.moderne.io/licensing/moderne-source-available-license
8+
# <p>
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""Support for Python 3.14+ t-string templates."""
16+
17+
from __future__ import annotations
18+
19+
from typing import Any, Dict, Tuple
20+
21+
from .capture import Capture, RawCode
22+
23+
_TemplateType = None
24+
25+
26+
def _get_template_type():
27+
global _TemplateType
28+
if _TemplateType is None:
29+
try:
30+
from string.templatelib import Template
31+
_TemplateType = Template
32+
except (ImportError, ModuleNotFoundError):
33+
_TemplateType = type('_NoMatch', (), {}) # sentinel: never matches
34+
return _TemplateType
35+
36+
37+
def is_tstring(obj: Any) -> bool:
38+
"""Check if obj is a string.templatelib.Template (Python 3.14+ t-string).
39+
40+
Returns False on Python < 3.14 or if templatelib is unavailable.
41+
"""
42+
return isinstance(obj, _get_template_type())
43+
44+
45+
def convert_tstring(tpl: Any) -> Tuple[str, Dict[str, Capture]]:
46+
"""Convert a t-string Template into a (code, captures) tuple.
47+
48+
Iterates the t-string's args: static strings are concatenated as-is,
49+
Capture interpolations become ``{name}`` placeholders, and RawCode
50+
interpolations are spliced directly into the code string.
51+
52+
Raises:
53+
TypeError: If an interpolation is not a Capture or RawCode.
54+
"""
55+
parts: list[str] = []
56+
captures: Dict[str, Capture] = {}
57+
58+
for arg in tpl:
59+
if isinstance(arg, str):
60+
parts.append(arg)
61+
elif hasattr(arg, 'value'):
62+
# Interpolation object: has .value, .expression, .conversion, .format_spec
63+
value = arg.value
64+
if isinstance(value, Capture):
65+
parts.append('{' + value.name + '}')
66+
captures[value.name] = value
67+
elif isinstance(value, RawCode):
68+
parts.append(value.code)
69+
else:
70+
raise TypeError(
71+
f"t-string interpolations must be Capture or RawCode instances, "
72+
f"got {type(value).__name__}: {value!r}"
73+
)
74+
else:
75+
raise TypeError(
76+
f"Unexpected t-string component: {type(arg).__name__}"
77+
)
78+
79+
return ''.join(parts), captures

0 commit comments

Comments
 (0)