Skip to content

Commit 6551438

Browse files
[parser] Fix indentation tracking after line continuations (#23417)
## Summary fixes: #19301 ## Test Plan Add new lexer and parser test cases. --------- Co-authored-by: Kartik Ganapathi <kartikganapathi@icloud.com>
1 parent e03748c commit 6551438

9 files changed

Lines changed: 569 additions & 1 deletion
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
if True:
2+
1
3+
\
4+
2
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
if True:
2+
\
3+
1
4+
\
5+
2
6+
else:\
7+
3

crates/ruff_python_parser/src/lexer.rs

Lines changed: 96 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,35 @@ impl<'src> Lexer<'src> {
263263
self.token_range(),
264264
)));
265265
}
266-
indentation = Indentation::root();
266+
// test_ok backslash_continuation_indentation
267+
// if True:
268+
// \
269+
// 1
270+
// \
271+
// 2
272+
// else:\
273+
// 3
274+
275+
// test_err backslash_continuation_indentation_error
276+
// if True:
277+
// 1
278+
// \
279+
// 2
280+
281+
// > Indentation cannot be split over multiple physical lines using backslashes;
282+
// > the whitespace up to the first backslash determines the indentation.
283+
// >
284+
// > https://docs.python.org/3/reference/lexical_analysis.html#indentation
285+
//
286+
// Skip whitespace after the continuation-line without accumulating it into
287+
// `indentation`. However, if the backslash is at column 0 (no prior
288+
// indentation), let the loop continue so the next line's whitespace is
289+
// accumulated normally.
290+
//
291+
// See also: https://github.com/python/cpython/issues/90249
292+
if indentation != Indentation::root() {
293+
self.cursor.eat_while(is_python_whitespace);
294+
}
267295
}
268296
// Form feed
269297
'\x0C' => {
@@ -3061,4 +3089,71 @@ t"{(lambda x:{x})}"
30613089
UnterminatedTripleQuotedString
30623090
);
30633091
}
3092+
3093+
#[test]
3094+
fn backslash_continuation_indentation() {
3095+
// The first `\` has 4 spaces before it which matches the indentation level at that point,
3096+
// so the whitespace before `2` is irrelevant and shouldn't produce an indentation error.
3097+
// Similarly, the second `\` is also at the same indentation level, so the `3` line is also
3098+
// valid.
3099+
let source = r"if True:
3100+
1
3101+
\
3102+
2
3103+
\
3104+
3
3105+
else:
3106+
pass
3107+
"
3108+
.to_string();
3109+
assert_snapshot!(lex_source(&source));
3110+
}
3111+
3112+
#[test]
3113+
fn backslash_continuation_at_root() {
3114+
// But, it's a different when the backslash character itself is at the root indentation
3115+
// level. Then, the whitespaces following it determines the indentation level of the next
3116+
// line, so `1` is indented with 4 spaces and `2` is indented with 8 spaces, and `3` is
3117+
// indented with 4 spaces, all of which are valid.
3118+
let source = r"if True:
3119+
\
3120+
1
3121+
if True:
3122+
\
3123+
2
3124+
else:\
3125+
3
3126+
"
3127+
.to_string();
3128+
assert_snapshot!(lex_source(&source));
3129+
}
3130+
3131+
#[test]
3132+
fn multiple_backslash_continuation() {
3133+
// It's only the first backslash character that determines the indentation level of the next
3134+
// line, so all the lines after the first `\` are indented with 4 spaces, and the remaining
3135+
// backslashes are just ignored and don't affect the indentation level.
3136+
let source = r"if True:
3137+
1
3138+
\
3139+
\
3140+
\
3141+
\
3142+
2
3143+
"
3144+
.to_string();
3145+
assert_snapshot!(lex_source(&source));
3146+
}
3147+
3148+
#[test]
3149+
fn backslash_continuation_mismatch_indentation() {
3150+
// Indentation doesn't match any previous indentation level
3151+
let source = r"if True:
3152+
1
3153+
\
3154+
2
3155+
"
3156+
.to_string();
3157+
assert_snapshot!(lex_invalid(&source, Mode::Module));
3158+
}
30643159
}
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
---
2+
source: crates/ruff_python_parser/src/lexer.rs
3+
expression: lex_source(&source)
4+
---
5+
## Tokens
6+
```
7+
[
8+
(
9+
If,
10+
0..2,
11+
),
12+
(
13+
True,
14+
3..7,
15+
),
16+
(
17+
Colon,
18+
7..8,
19+
),
20+
(
21+
Newline,
22+
8..9,
23+
),
24+
(
25+
Indent,
26+
9..15,
27+
),
28+
(
29+
Int(
30+
1,
31+
),
32+
15..16,
33+
),
34+
(
35+
Newline,
36+
16..17,
37+
),
38+
(
39+
If,
40+
21..23,
41+
),
42+
(
43+
True,
44+
24..28,
45+
),
46+
(
47+
Colon,
48+
28..29,
49+
),
50+
(
51+
Newline,
52+
29..30,
53+
),
54+
(
55+
Indent,
56+
30..40,
57+
),
58+
(
59+
Int(
60+
2,
61+
),
62+
40..41,
63+
),
64+
(
65+
Newline,
66+
41..42,
67+
),
68+
(
69+
Dedent,
70+
42..42,
71+
),
72+
(
73+
Dedent,
74+
42..42,
75+
),
76+
(
77+
Else,
78+
42..46,
79+
),
80+
(
81+
Colon,
82+
46..47,
83+
),
84+
(
85+
Int(
86+
3,
87+
),
88+
53..54,
89+
),
90+
(
91+
Newline,
92+
54..55,
93+
),
94+
]
95+
```
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
---
2+
source: crates/ruff_python_parser/src/lexer.rs
3+
expression: lex_source(&source)
4+
---
5+
## Tokens
6+
```
7+
[
8+
(
9+
If,
10+
0..2,
11+
),
12+
(
13+
True,
14+
3..7,
15+
),
16+
(
17+
Colon,
18+
7..8,
19+
),
20+
(
21+
Newline,
22+
8..9,
23+
),
24+
(
25+
Indent,
26+
9..13,
27+
),
28+
(
29+
Int(
30+
1,
31+
),
32+
13..14,
33+
),
34+
(
35+
Newline,
36+
14..15,
37+
),
38+
(
39+
Int(
40+
2,
41+
),
42+
29..30,
43+
),
44+
(
45+
Newline,
46+
30..31,
47+
),
48+
(
49+
Int(
50+
3,
51+
),
52+
37..38,
53+
),
54+
(
55+
Newline,
56+
38..39,
57+
),
58+
(
59+
Dedent,
60+
39..39,
61+
),
62+
(
63+
Else,
64+
39..43,
65+
),
66+
(
67+
Colon,
68+
43..44,
69+
),
70+
(
71+
Newline,
72+
44..45,
73+
),
74+
(
75+
Indent,
76+
45..49,
77+
),
78+
(
79+
Pass,
80+
49..53,
81+
),
82+
(
83+
Newline,
84+
53..54,
85+
),
86+
(
87+
Dedent,
88+
54..54,
89+
),
90+
]
91+
```
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
---
2+
source: crates/ruff_python_parser/src/lexer.rs
3+
expression: "lex_invalid(&source, Mode::Module)"
4+
---
5+
## Tokens
6+
```
7+
[
8+
(
9+
If,
10+
0..2,
11+
),
12+
(
13+
True,
14+
3..7,
15+
),
16+
(
17+
Colon,
18+
7..8,
19+
),
20+
(
21+
Newline,
22+
8..9,
23+
),
24+
(
25+
Indent,
26+
9..13,
27+
),
28+
(
29+
Int(
30+
1,
31+
),
32+
13..14,
33+
),
34+
(
35+
Newline,
36+
14..15,
37+
),
38+
(
39+
Unknown,
40+
15..23,
41+
),
42+
(
43+
Int(
44+
2,
45+
),
46+
23..24,
47+
),
48+
(
49+
Newline,
50+
24..25,
51+
),
52+
]
53+
```
54+
## Errors
55+
```
56+
[
57+
LexicalError {
58+
error: IndentationError,
59+
location: 15..23,
60+
},
61+
]
62+
```

0 commit comments

Comments
 (0)