Skip to content

Commit 5a57a9d

Browse files
Python: Fix formatter bugs and add comprehensive format tests (#6727)
* Split auto_format_battle_test.py into 3 files by test category Organize 70 formatter battle tests into separate files matching the project's one-file-per-concern pattern: - auto_format_idempotency_test.py (30 tests) - auto_format_corrections_test.py (20 tests) - auto_format_edge_cases_test.py (20 tests) * Python: Fix empty diff detection and eliminate spurious AST identity changes Add empty diff detection to the test framework so that recipes which modify the AST without changing printed output are caught as failures. Fix six root causes of empty diffs in the formatting pipeline: - Parser: place whitespace on inner expression instead of ControlParentheses prefix for if/while/match (Python has no actual parens, so SpacesVisitor was moving whitespace between equivalent positions) - Parser: consume whitespace into BoolOp prefix and MultiImport container.before - NormalizeFormat: early-return from _concatenate_prefix when prefix is empty to avoid creating new list objects - SpacesVisitor: skip before_colon on return type TypeHints (already handled by visit_method_declaration) and skip space removal on ExpressionTypeTree inside ClassDeclaration (already handled by visit_class_declaration) - PythonVisitor: guard visit_space(binary.negation) with None check since non-negated operators (in, is) have no negation space - StatementExpression.replace: use replace_if_changed instead of always calling dataclass_replace - utils: _is_changed uses equality for primitives, identity for complex objects Remove 12 xfail markers from tests that now pass. * Python: Fix 6 formatter bugs in indentation, whitespace, and spacing visitors - Fix try/except/else indentation by adding visit_trailing_else_wrapper to TabsAndIndentsVisitor - Fix backslash line continuation stripping in RemoveTrailingWhitespaceVisitor - Fix delete statement trailing whitespace by using pad_last=False in parser - Fix multiline class bases losing indentation via ClassDeclaration CONTINUATION_INDENT - Fix decorator closing paren over-indentation by treating Annotation like MethodInvocation - Fix Binary continuation indent inside Parentheses with visit_binary/visit_parentheses overrides - Fix empty diff in assignment with parenthesized RHS by removing redundant space update * Python: Fix continuation indentation and string concatenation spacing Fix 3 remaining formatter bugs for parenthesized expressions: - Add _compute_column_of() to align Binary continuation lines to the opening content column inside parentheses - Detect method chains (via select type and cursor path) to use continuation indent instead of column alignment for chained calls - Remove incorrect space injection for implicit string concatenation (StringConcatenation has no visible operator)
1 parent c69bd6f commit 5a57a9d

14 files changed

Lines changed: 1808 additions & 42 deletions

rewrite-python/rewrite/src/rewrite/python/_parser_visitor.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -511,7 +511,7 @@ def visit_Delete(self, node):
511511
random_id(),
512512
self.__source_before('del'),
513513
Markers.EMPTY,
514-
[self.__pad_list_element(self.__convert(e), last=i == len(node.targets) - 1) for i, e in
514+
[self.__pad_list_element(self.__convert(e), last=i == len(node.targets) - 1, pad_last=False) for i, e in
515515
enumerate(node.targets)]
516516
)
517517

@@ -614,7 +614,7 @@ def visit_While(self, node):
614614
Markers.EMPTY,
615615
j.ControlParentheses(
616616
random_id(),
617-
self.__whitespace(),
617+
Space.EMPTY,
618618
Markers.EMPTY,
619619
self.__pad_right(self.__convert(node.test), Space.EMPTY)
620620
),
@@ -638,7 +638,7 @@ def visit_If(self, node):
638638
prefix = self.__source_before('elif')
639639
else:
640640
prefix = self.__source_before('if')
641-
condition = j.ControlParentheses(random_id(), self.__whitespace(), Markers.EMPTY,
641+
condition = j.ControlParentheses(random_id(), Space.EMPTY, Markers.EMPTY,
642642
self.__pad_right(self.__convert(node.test), Space.EMPTY))
643643
then = self.__pad_right(self.__convert_block(node.body), Space.EMPTY)
644644
elze = None
@@ -790,14 +790,16 @@ def visit_Try(self, node):
790790

791791
def visit_Import(self, node):
792792
# TODO only use `MultiImport` when necessary (requires corresponding changes to printer)
793+
prefix = self.__source_before('import')
794+
names_prefix = self.__whitespace()
793795
return py.MultiImport(
794796
random_id(),
795-
self.__source_before('import'),
797+
prefix,
796798
Markers.EMPTY,
797799
None,
798800
False,
799801
JContainer(
800-
Space.EMPTY,
802+
names_prefix,
801803
[self.__pad_list_element(self.__convert(n), i == len(node.names) - 1, pad_last=False) for i, n in
802804
enumerate(node.names)],
803805
Markers.EMPTY
@@ -1125,7 +1127,7 @@ def visit_Match(self, node):
11251127
Markers.EMPTY,
11261128
j.ControlParentheses(
11271129
random_id(),
1128-
self.__whitespace(),
1130+
Space.EMPTY,
11291131
Markers.EMPTY,
11301132
self.__pad_right(self.__convert(node.subject), Space.EMPTY)
11311133
),
@@ -1697,6 +1699,7 @@ def visit_BinOp(self, node):
16971699
)
16981700

16991701
def visit_BoolOp(self, node):
1702+
prefix = self.__whitespace()
17001703
left = self.__convert(node.values[0])
17011704
for right_expr in node.values[1:]:
17021705
left = j.Binary(
@@ -1708,7 +1711,7 @@ def visit_BoolOp(self, node):
17081711
self.__convert(right_expr),
17091712
self._type_mapping.type(node)
17101713
)
1711-
return left
1714+
return left.replace(prefix=prefix)
17121715

17131716
def visit_Call(self, node):
17141717
prefix = self.__whitespace()

rewrite-python/rewrite/src/rewrite/python/format/normalize_format.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,9 @@ def _common_margin(s1, s2):
6363

6464

6565
def _concatenate_prefix(j: J, prefix: Space) -> J2:
66+
if prefix.is_empty():
67+
return j
68+
6669
shift = _common_margin(None, j.prefix.whitespace)
6770

6871
def modify_comment(c: PyComment) -> PyComment:

rewrite-python/rewrite/src/rewrite/python/format/remove_trailing_whitespace_visitor.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ def visit_marker(self, marker: Marker, p: P) -> Marker:
3737

3838
@staticmethod
3939
def _normalize_whitespace(s):
40+
if '\\' in s.whitespace:
41+
return s
4042
last_newline = s.whitespace.rfind('\n')
4143
if last_newline > 0:
4244
ws = [c for i, c in enumerate(s.whitespace) if i >= last_newline or c in {'\r', '\n'}]

rewrite-python/rewrite/src/rewrite/python/format/spaces_visitor.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -279,8 +279,6 @@ def visit_python_binary(self, binary: Binary, p: P) -> J:
279279
b = self._apply_binary_space_around(b, True)
280280
elif op in [Binary.Type.FloorDivision, Binary.Type.MatrixMultiplication]:
281281
b = self._apply_binary_space_around(b, self._style.around_operators.multiplicative)
282-
elif op == Binary.Type.StringConcatenation:
283-
b = self._apply_binary_space_around(b, self._style.around_operators.additive)
284282
elif op == Binary.Type.Power:
285283
b = self._apply_binary_space_around(b, self._style.around_operators.power)
286284

@@ -366,7 +364,6 @@ def _process_element(index, arg, last, use_space):
366364

367365
def visit_parentheses(self, parentheses: Parentheses, p: P) -> J:
368366
p2 = cast(Parentheses, super().visit_parentheses(parentheses, p))
369-
p2 = p2.replace(prefix=update_space(p2.prefix, False))
370367
p2 = p2.padding.replace(tree=p2.padding.tree.replace(after=update_space(p2.padding.tree.after, False)))
371368
return p2
372369

@@ -467,13 +464,19 @@ def visit_import(self, import_: Import, p: P) -> J:
467464

468465
def visit_type_hint(self, type_hint: TypeHint, p: P) -> J:
469466
th: TypeHint = cast(TypeHint, super().visit_type_hint(type_hint, p))
470-
th = space_before(th, self._style.other.before_colon)
467+
# Don't apply before_colon to return type annotations (handled by visit_method_declaration)
468+
parent = self.cursor.parent_tree_cursor()
469+
if not (parent and isinstance(parent.value, MethodDeclaration)):
470+
th = space_before(th, self._style.other.before_colon)
471471
th = th.replace(type_tree=space_before(th.type_tree, self._style.other.after_colon))
472472
return th
473473

474474
def visit_expression_type_tree(self, expression_type_tree: ExpressionTypeTree, p: P) -> J:
475475
ett = cast(ExpressionTypeTree, super().visit_expression_type_tree(expression_type_tree, p))
476-
ett = space_before(ett, False)
476+
# Don't remove space when inside ClassDeclaration (handled by visit_class_declaration)
477+
parent = self.cursor.parent_tree_cursor()
478+
if not (parent and isinstance(parent.value, ClassDeclaration)):
479+
ett = space_before(ett, False)
477480
return ett
478481

479482
def visit_comprehension_expression(self, comprehension_expression: ComprehensionExpression, p: P) -> J:

rewrite-python/rewrite/src/rewrite/python/format/tabs_and_indents_visitor.py

Lines changed: 169 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from typing import TypeVar, Optional, cast, List
77

88
from rewrite.java import tree as j
9-
from rewrite.java.support_types import J, Space, Comment
9+
from rewrite.java.support_types import J, Space, Comment, Statement
1010
from rewrite.java.tree import (
1111
JRightPadded, JLeftPadded, JContainer, Block, ArrayDimension,
1212
ClassDeclaration, Empty, MethodDeclaration, MethodInvocation,
@@ -17,6 +17,7 @@
1717
from rewrite.python import (
1818
PythonVisitor, TabsAndIndentsStyle, DictLiteral, CollectionLiteral,
1919
ExpressionStatement, OtherStyle, IntelliJ, ComprehensionExpression, PyComment,
20+
TrailingElseWrapper, Binary as PyBinary,
2021
)
2122
from rewrite.tree import Tree
2223
from rewrite.visitor import P, T, Cursor
@@ -79,6 +80,11 @@ def pre_visit(self, tree: T, p: P) -> Optional[T]:
7980
if self._other.use_continuation_indent.collections_and_comprehensions else self.IndentType.INDENT)
8081
elif isinstance(tree, ExpressionStatement):
8182
pass
83+
elif isinstance(tree, (Binary, PyBinary)):
84+
if self.cursor.first_enclosing(j.Parentheses) is not None:
85+
self.cursor.put_message("indent_type", self.IndentType.ALIGN)
86+
else:
87+
self.cursor.put_message("indent_type", self.IndentType.INDENT)
8288
elif isinstance(tree, j.Expression):
8389
self.cursor.put_message("indent_type", self.IndentType.INDENT)
8490

@@ -274,7 +280,7 @@ def visit_container(self, container: Optional[JContainer], p: P) -> Optional[JCo
274280
if isinstance(parent_tree, MethodDeclaration):
275281
rp_context = "method_declaration_parameter"
276282
needs_continuation_on_before = True
277-
elif isinstance(parent_tree, MethodInvocation):
283+
elif isinstance(parent_tree, (MethodInvocation, Annotation)):
278284
rp_context = "method_invocation_argument"
279285
elif isinstance(parent_tree, DictLiteral):
280286
rp_context = "dict_literal_element"
@@ -300,10 +306,12 @@ def visit_container(self, container: Optional[JContainer], p: P) -> Optional[JCo
300306
self.cursor.put_message("indent_type",
301307
self.IndentType.CONTINUATION_INDENT if self._other.use_continuation_indent.method_declaration_parameters
302308
else self.IndentType.INDENT)
303-
elif isinstance(parent_tree, MethodInvocation):
309+
elif isinstance(parent_tree, (MethodInvocation, Annotation)):
304310
self.cursor.put_message("indent_type",
305311
self.IndentType.CONTINUATION_INDENT if self._other.use_continuation_indent.method_call_arguments
306312
else self.IndentType.INDENT)
313+
elif isinstance(parent_tree, ClassDeclaration):
314+
self.cursor.put_message("indent_type", self.IndentType.CONTINUATION_INDENT)
307315
before = self.visit_space(container.before, p)
308316
js = list_map(lambda t: self.visit_right_padded(t, p), container.padding.elements)
309317

@@ -370,6 +378,26 @@ def visit_try(self, try_: j.Try, p: P) -> J:
370378

371379
return try_
372380

381+
def visit_trailing_else_wrapper(self, wrapper: TrailingElseWrapper, p: P) -> J:
382+
wrapper = wrapper.replace(prefix=self.visit_space(wrapper.prefix, p))
383+
384+
temp_stmt = cast(Statement, self.visit_statement(wrapper, p))
385+
if not isinstance(temp_stmt, type(wrapper)):
386+
return temp_stmt
387+
wrapper = temp_stmt
388+
389+
wrapper = wrapper.replace(markers=self.visit_markers(wrapper.markers, p))
390+
wrapper = wrapper.replace(
391+
statement=self.visit_and_cast(wrapper.statement, Statement, p)
392+
)
393+
394+
self.cursor.put_message("space_context", "else_prefix")
395+
wrapper = wrapper.padding.replace(
396+
else_block=self.visit_left_padded(wrapper.padding.else_block, p)
397+
)
398+
self.cursor.put_message("space_context", None)
399+
return wrapper
400+
373401
def visit_else(self, else_: j.If.Else, p: P) -> J:
374402
self.cursor.put_message("space_context", "else_prefix")
375403
else_ = else_.replace(prefix=self.visit_space(else_.prefix, p))
@@ -429,14 +457,149 @@ def visit_class_declaration(self, class_decl: ClassDeclaration, p: P) -> J:
429457
class_decl.body, Block, p))
430458
return class_decl
431459

460+
def visit_parentheses(self, parens: j.Parentheses, p: P) -> J:
461+
parens = parens.replace(prefix=self.visit_space(parens.prefix, p))
462+
temp_expr = self.visit_expression(parens, p)
463+
if not isinstance(temp_expr, j.Parentheses):
464+
return temp_expr
465+
parens = temp_expr
466+
parens = parens.replace(markers=self.visit_markers(parens.markers, p))
467+
468+
tree_rp = parens.padding.tree
469+
elem = tree_rp.element
470+
471+
# When a Binary expression starts on the same line as '(', compute
472+
# its column and set last_indent so continuation lines align there
473+
if isinstance(elem, (Binary, PyBinary)) and '\n' not in elem.prefix.last_whitespace:
474+
col = self._compute_column_of(elem)
475+
if col >= 0:
476+
self.cursor.put_message("last_indent", col)
477+
478+
if isinstance(elem, J):
479+
elem = self.visit_and_cast(elem, J, p)
480+
481+
after = tree_rp.after
482+
if '\n' in after.last_whitespace:
483+
# Close paren should align to the enclosing indent level
484+
indent = cast(int, self.cursor.parent_or_throw.get_nearest_message("last_indent")) or 0
485+
after = self._indent_to(after, indent)
486+
else:
487+
after = self.visit_space(after, p)
488+
489+
if elem is not tree_rp.element or after is not tree_rp.after:
490+
parens = parens.padding.replace(tree=tree_rp.replace(element=elem, after=after))
491+
return parens
492+
493+
def visit_binary(self, binary: Binary, p: P) -> J:
494+
if self.cursor.first_enclosing(j.Parentheses) is not None:
495+
# Inside parentheses: operator before spaces align to same indent as first line
496+
binary = binary.replace(prefix=self.visit_space(binary.prefix, p))
497+
temp_expr = self.visit_expression(binary, p)
498+
if not isinstance(temp_expr, Binary):
499+
return temp_expr
500+
binary = temp_expr
501+
binary = binary.replace(markers=self.visit_markers(binary.markers, p))
502+
binary = binary.replace(left=self.visit_and_cast(binary.left, j.Expression, p))
503+
504+
op = binary.padding.operator
505+
if '\n' in op.before.last_whitespace:
506+
indent = cast(int, self.cursor.get_nearest_message("last_indent")) or 0
507+
before = self._indent_to(op.before, indent)
508+
if before is not op.before:
509+
binary = binary.padding.replace(operator=op.replace(before=before))
510+
else:
511+
binary = binary.padding.replace(
512+
operator=self.visit_left_padded(binary.padding.operator, p))
513+
514+
binary = binary.replace(right=self.visit_and_cast(binary.right, j.Expression, p))
515+
return binary
516+
return super().visit_binary(binary, p)
517+
432518
def visit_method_invocation(self, method: MethodInvocation, p: P) -> J:
433519
select = method.padding.select
434520
if select is not None and '\n' in select.after.last_whitespace:
435-
col = self._compute_select_column(method)
436-
if col >= 0:
437-
self.cursor.put_message("method_select_indent", col)
521+
# Skip column alignment for method chains inside parentheses.
522+
# A chain exists when the method's select is itself a
523+
# MethodInvocation, or when there's an enclosing
524+
# MethodInvocation between this node and the Parentheses.
525+
skip = isinstance(select.element, MethodInvocation)
526+
if not skip:
527+
for c in self.cursor.get_path_as_cursors():
528+
v = c.value
529+
if v is method:
530+
continue
531+
if isinstance(v, j.Parentheses):
532+
break
533+
if isinstance(v, MethodInvocation):
534+
skip = True
535+
break
536+
if not skip:
537+
col = self._compute_select_column(method)
538+
if col >= 0:
539+
self.cursor.put_message("method_select_indent", col)
438540
return super().visit_method_invocation(method, p)
439541

542+
def _compute_column_of(self, target: Tree) -> int:
543+
"""Compute the column position of a target tree element by printing from line start."""
544+
from rewrite.python.printer import PythonPrinter, PrintOutputCapture
545+
546+
line_start = None
547+
line_start_cursor = None
548+
for c in self.cursor.get_path_as_cursors():
549+
v = c.value
550+
if isinstance(v, J):
551+
line_start = v
552+
line_start_cursor = c
553+
if '\n' in v.prefix.whitespace:
554+
break
555+
if line_start is None:
556+
return -1
557+
558+
class _ColumnCounter(PrintOutputCapture):
559+
def __init__(self):
560+
super().__init__()
561+
self.col = 0
562+
self.found = False
563+
564+
def append(self, text):
565+
if text and not self.found:
566+
for ch in text:
567+
self.col = 0 if ch == '\n' else self.col + 1
568+
return self
569+
570+
class _Printer(PythonPrinter):
571+
def __init__(self):
572+
super().__init__()
573+
orig_visit = self._delegate.visit
574+
def _check(tree, p):
575+
if tree is target:
576+
p.found = True
577+
return tree
578+
return orig_visit(tree, p) if not p.found else tree
579+
self._delegate.visit = _check
580+
581+
def visit(self, tree, p, parent=None):
582+
if p.found or tree is target:
583+
p.found = True
584+
return tree
585+
return super().visit(tree, p)
586+
587+
counter = _ColumnCounter()
588+
printer = _Printer()
589+
# Build initial cursor with proper J parent so context-dependent
590+
# printing (e.g., = vs :=) works correctly
591+
initial = Cursor(None, Cursor.ROOT_VALUE)
592+
for c in line_start_cursor.get_path_as_cursors():
593+
v = c.value
594+
if v is line_start:
595+
continue
596+
if isinstance(v, J):
597+
initial = Cursor(Cursor(None, Cursor.ROOT_VALUE), v)
598+
break
599+
printer.set_cursor(initial)
600+
printer.visit(line_start, counter)
601+
return counter.col if counter.found else -1
602+
440603
def _compute_select_column(self, method: MethodInvocation) -> int:
441604
from rewrite.python.printer import PythonPrinter, PrintOutputCapture
442605

rewrite-python/rewrite/src/rewrite/python/tree.py

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -612,16 +612,7 @@ def replace(self, **kwargs) -> 'StatementExpression':
612612
if 'markers' in kwargs:
613613
new_statement = kwargs.get('statement', self._statement).replace(markers=kwargs.pop('markers'))
614614
kwargs['statement'] = new_statement
615-
# Map remaining kwargs
616-
mapped = {}
617-
for key, value in kwargs.items():
618-
if not key.startswith('_') and hasattr(self, f'_{key}'):
619-
mapped[f'_{key}'] = value
620-
else:
621-
mapped[key] = value
622-
if not mapped:
623-
return self
624-
return dataclass_replace(self, **mapped)
615+
return replace_if_changed(self, **kwargs)
625616

626617
def accept_python(self, v: PythonVisitor[P], p: P) -> J:
627618
return v.visit_statement_expression(self, p)

rewrite-python/rewrite/src/rewrite/python/visitor.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -131,9 +131,10 @@ def visit_python_binary(self, binary: Binary, p: P) -> J:
131131
binary = binary.padding.replace(
132132
operator=self.visit_left_padded(binary.padding.operator, p)
133133
)
134-
binary = binary.replace(
135-
negation=self.visit_space(binary.negation, p)
136-
)
134+
if binary.negation is not None:
135+
binary = binary.replace(
136+
negation=self.visit_space(binary.negation, p)
137+
)
137138
binary = binary.replace(
138139
right=self.visit_and_cast(binary.right, Expression, p)
139140
)

0 commit comments

Comments
 (0)