Skip to content

Commit f837428

Browse files
[flake8-simplify] Implementation for split-of-static-string (SIM905) (#14008)
## Summary Closes #13944 ## Test Plan Standard snapshot testing flake8-simplify surprisingly only has a single test case --------- Co-authored-by: Charlie Marsh <charlie.r.marsh@gmail.com>
1 parent 0925513 commit f837428

9 files changed

Lines changed: 1185 additions & 5 deletions

File tree

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
# setup
2+
sep = ","
3+
no_sep = None
4+
5+
# positives
6+
"""
7+
itemA
8+
itemB
9+
itemC
10+
""".split()
11+
12+
"a,b,c,d".split(",")
13+
"a,b,c,d".split(None)
14+
"a,b,c,d".split(",", 1)
15+
"a,b,c,d".split(None, 1)
16+
"a,b,c,d".split(sep=",")
17+
"a,b,c,d".split(sep=None)
18+
"a,b,c,d".split(sep=",", maxsplit=1)
19+
"a,b,c,d".split(sep=None, maxsplit=1)
20+
"a,b,c,d".split(maxsplit=1, sep=",")
21+
"a,b,c,d".split(maxsplit=1, sep=None)
22+
"a,b,c,d".split(",", maxsplit=1)
23+
"a,b,c,d".split(None, maxsplit=1)
24+
"a,b,c,d".split(maxsplit=1)
25+
"a,b,c,d".split(maxsplit=1.0)
26+
"a,b,c,d".split(maxsplit=1)
27+
"a,b,c,d".split(maxsplit=0)
28+
"VERB AUX PRON ADP DET".split(" ")
29+
' 1 2 3 '.split()
30+
'1<>2<>3<4'.split('<>')
31+
32+
" a*a a*a a ".split("*", -1) # [' a', 'a a', 'a a ']
33+
"".split() # []
34+
"""
35+
""".split() # []
36+
" ".split() # []
37+
"/abc/".split() # ['/abc/']
38+
("a,b,c"
39+
# comment
40+
.split()
41+
) # ['a,b,c']
42+
("a,b,c"
43+
# comment1
44+
.split(",")
45+
) # ['a', 'b', 'c']
46+
("a,"
47+
# comment
48+
"b,"
49+
"c"
50+
.split(",")
51+
) # ['a', 'b', 'c']
52+
53+
"hello "\
54+
"world".split()
55+
# ['hello', 'world']
56+
57+
# prefixes and isc
58+
u"a b".split() # ['a', 'b']
59+
r"a \n b".split() # ['a', '\\n', 'b']
60+
("a " "b").split() # ['a', 'b']
61+
"a " "b".split() # ['a', 'b']
62+
u"a " "b".split() # ['a', 'b']
63+
"a " u"b".split() # ['a', 'b']
64+
u"a " r"\n".split() # ['a', '\\n']
65+
r"\n " u"\n".split() # ['\\n']
66+
r"\n " "\n".split() # ['\\n']
67+
"a " r"\n".split() # ['a', '\\n']
68+
69+
"a,b,c".split(',', maxsplit=0) # ['a,b,c']
70+
"a,b,c".split(',', maxsplit=-1) # ['a', 'b', 'c']
71+
"a,b,c".split(',', maxsplit=-2) # ['a', 'b', 'c']
72+
"a,b,c".split(',', maxsplit=-0) # ['a,b,c']
73+
74+
# negatives
75+
76+
# invalid values should not cause panic
77+
"a,b,c,d".split(maxsplit="hello")
78+
"a,b,c,d".split(maxsplit=-"hello")
79+
80+
# variable names not implemented
81+
"a,b,c,d".split(sep)
82+
"a,b,c,d".split(no_sep)
83+
for n in range(3):
84+
"a,b,c,d".split(",", maxsplit=n)
85+
86+
# f-strings not yet implemented
87+
world = "world"
88+
_ = f"{world}_hello_world".split("_")
89+
90+
hello = "hello_world"
91+
_ = f"{hello}_world".split("_")
92+
93+
# split on bytes not yet implemented, much less frequent
94+
b"TesT.WwW.ExamplE.CoM".split(b".")
95+
96+
# str.splitlines not yet implemented
97+
"hello\nworld".splitlines()
98+
"hello\nworld".splitlines(keepends=True)
99+
"hello\nworld".splitlines(keepends=False)

crates/ruff_linter/src/checkers/ast/analyze/expression.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,8 @@ pub(crate) fn expression(expr: &Expr, checker: &mut Checker) {
388388
Rule::StaticJoinToFString,
389389
// refurb
390390
Rule::HashlibDigestHex,
391+
// flake8-simplify
392+
Rule::SplitStaticString,
391393
]) {
392394
if let Expr::Attribute(ast::ExprAttribute { value, attr, .. }) = func.as_ref() {
393395
let attr = attr.as_str();
@@ -405,6 +407,16 @@ pub(crate) fn expression(expr: &Expr, checker: &mut Checker) {
405407
string_value.to_str(),
406408
);
407409
}
410+
} else if matches!(attr, "split" | "rsplit") {
411+
// "...".split(...) call
412+
if checker.enabled(Rule::SplitStaticString) {
413+
flake8_simplify::rules::split_static_string(
414+
checker,
415+
attr,
416+
call,
417+
string_value.to_str(),
418+
);
419+
}
408420
} else if attr == "format" {
409421
// "...".format(...) call
410422
let location = expr.range();

crates/ruff_linter/src/codes.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -480,6 +480,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> {
480480
(Flake8Simplify, "223") => (RuleGroup::Stable, rules::flake8_simplify::rules::ExprAndFalse),
481481
(Flake8Simplify, "300") => (RuleGroup::Stable, rules::flake8_simplify::rules::YodaConditions),
482482
(Flake8Simplify, "401") => (RuleGroup::Stable, rules::flake8_simplify::rules::IfElseBlockInsteadOfDictGet),
483+
(Flake8Simplify, "905") => (RuleGroup::Preview, rules::flake8_simplify::rules::SplitStaticString),
483484
(Flake8Simplify, "910") => (RuleGroup::Stable, rules::flake8_simplify::rules::DictGetWithNoneDefault),
484485
(Flake8Simplify, "911") => (RuleGroup::Stable, rules::flake8_simplify::rules::ZipDictKeysAndValues),
485486

crates/ruff_linter/src/rules/flake8_simplify/mod.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,9 @@ mod tests {
2929
#[test_case(Rule::ReimplementedBuiltin, Path::new("SIM111.py"))]
3030
#[test_case(Rule::UncapitalizedEnvironmentVariables, Path::new("SIM112.py"))]
3131
#[test_case(Rule::EnumerateForLoop, Path::new("SIM113.py"))]
32+
#[test_case(Rule::IfWithSameArms, Path::new("SIM114.py"))]
3233
#[test_case(Rule::OpenFileWithContextHandler, Path::new("SIM115.py"))]
34+
#[test_case(Rule::IfElseBlockInsteadOfDictLookup, Path::new("SIM116.py"))]
3335
#[test_case(Rule::MultipleWithStatements, Path::new("SIM117.py"))]
3436
#[test_case(Rule::InDictKeys, Path::new("SIM118.py"))]
3537
#[test_case(Rule::NegateEqualOp, Path::new("SIM201.py"))]
@@ -44,9 +46,8 @@ mod tests {
4446
#[test_case(Rule::ExprAndFalse, Path::new("SIM223.py"))]
4547
#[test_case(Rule::YodaConditions, Path::new("SIM300.py"))]
4648
#[test_case(Rule::IfElseBlockInsteadOfDictGet, Path::new("SIM401.py"))]
49+
#[test_case(Rule::SplitStaticString, Path::new("SIM905.py"))]
4750
#[test_case(Rule::DictGetWithNoneDefault, Path::new("SIM910.py"))]
48-
#[test_case(Rule::IfElseBlockInsteadOfDictLookup, Path::new("SIM116.py"))]
49-
#[test_case(Rule::IfWithSameArms, Path::new("SIM114.py"))]
5051
fn rules(rule_code: Rule, path: &Path) -> Result<()> {
5152
let snapshot = format!("{}_{}", rule_code.noqa_code(), path.to_string_lossy());
5253
let diagnostics = test_path(

crates/ruff_linter/src/rules/flake8_simplify/rules/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ pub(crate) use needless_bool::*;
1414
pub(crate) use open_file_with_context_handler::*;
1515
pub(crate) use reimplemented_builtin::*;
1616
pub(crate) use return_in_try_except_finally::*;
17+
pub(crate) use split_static_string::*;
1718
pub(crate) use suppressible_exception::*;
1819
pub(crate) use yoda_conditions::*;
1920
pub(crate) use zip_dict_keys_and_values::*;
@@ -35,6 +36,7 @@ mod needless_bool;
3536
mod open_file_with_context_handler;
3637
mod reimplemented_builtin;
3738
mod return_in_try_except_finally;
39+
mod split_static_string;
3840
mod suppressible_exception;
3941
mod yoda_conditions;
4042
mod zip_dict_keys_and_values;
Lines changed: 219 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,219 @@
1+
use std::cmp::Ordering;
2+
3+
use ruff_diagnostics::{Applicability, Diagnostic, Edit, Fix, FixAvailability, Violation};
4+
use ruff_macros::{derive_message_formats, violation};
5+
use ruff_python_ast::{
6+
Expr, ExprCall, ExprContext, ExprList, ExprStringLiteral, ExprUnaryOp, StringLiteral,
7+
StringLiteralFlags, StringLiteralValue, UnaryOp,
8+
};
9+
use ruff_text_size::{Ranged, TextRange};
10+
11+
use crate::checkers::ast::Checker;
12+
13+
/// ## What it does
14+
/// Checks for static `str.split` calls that can be replaced with list literals.
15+
///
16+
/// ## Why is this bad?
17+
/// List literals are more readable and do not require the overhead of calling `str.split`.
18+
///
19+
/// ## Example
20+
/// ```python
21+
/// "a,b,c,d".split(",")
22+
/// ```
23+
///
24+
/// Use instead:
25+
/// ```python
26+
/// ["a", "b", "c", "d"]
27+
/// ```
28+
///
29+
/// ## Fix safety
30+
/// This rule's fix is marked as unsafe for implicit string concatenations with comments interleaved
31+
/// between segments, as comments may be removed.
32+
///
33+
/// For example, the fix would be marked as unsafe in the following case:
34+
/// ```python
35+
/// (
36+
/// "a" # comment
37+
/// "," # comment
38+
/// "b" # comment
39+
/// ).split(",")
40+
/// ```
41+
///
42+
/// ## References
43+
/// - [Python documentation: `str.split`](https://docs.python.org/3/library/stdtypes.html#str.split)
44+
/// ```
45+
#[violation]
46+
pub struct SplitStaticString;
47+
48+
impl Violation for SplitStaticString {
49+
const FIX_AVAILABILITY: FixAvailability = FixAvailability::Sometimes;
50+
51+
#[derive_message_formats]
52+
fn message(&self) -> String {
53+
format!("Consider using a list literal instead of `str.split`")
54+
}
55+
56+
fn fix_title(&self) -> Option<String> {
57+
Some("Replace with list literal".to_string())
58+
}
59+
}
60+
61+
/// SIM905
62+
pub(crate) fn split_static_string(
63+
checker: &mut Checker,
64+
attr: &str,
65+
call: &ExprCall,
66+
str_value: &str,
67+
) {
68+
let ExprCall { arguments, .. } = call;
69+
70+
let maxsplit_arg = arguments.find_argument("maxsplit", 1);
71+
let Some(maxsplit_value) = get_maxsplit_value(maxsplit_arg) else {
72+
return;
73+
};
74+
75+
// `split` vs `rsplit`.
76+
let direction = if attr == "split" {
77+
Direction::Left
78+
} else {
79+
Direction::Right
80+
};
81+
82+
let sep_arg = arguments.find_argument("sep", 0);
83+
let split_replacement = if let Some(sep) = sep_arg {
84+
match sep {
85+
Expr::NoneLiteral(_) => split_default(str_value, maxsplit_value),
86+
Expr::StringLiteral(sep_value) => {
87+
let sep_value_str = sep_value.value.to_str();
88+
Some(split_sep(
89+
str_value,
90+
sep_value_str,
91+
maxsplit_value,
92+
direction,
93+
))
94+
}
95+
// Ignore names until type inference is available.
96+
_ => {
97+
return;
98+
}
99+
}
100+
} else {
101+
split_default(str_value, maxsplit_value)
102+
};
103+
104+
let mut diagnostic = Diagnostic::new(SplitStaticString, call.range());
105+
if let Some(ref replacement_expr) = split_replacement {
106+
diagnostic.set_fix(Fix::applicable_edit(
107+
Edit::range_replacement(checker.generator().expr(replacement_expr), call.range()),
108+
// The fix does not preserve comments within implicit string concatenations.
109+
if checker.comment_ranges().intersects(call.range()) {
110+
Applicability::Unsafe
111+
} else {
112+
Applicability::Safe
113+
},
114+
));
115+
}
116+
checker.diagnostics.push(diagnostic);
117+
}
118+
119+
fn construct_replacement(elts: &[&str]) -> Expr {
120+
Expr::List(ExprList {
121+
elts: elts
122+
.iter()
123+
.map(|elt| {
124+
Expr::StringLiteral(ExprStringLiteral {
125+
value: StringLiteralValue::single(StringLiteral {
126+
value: (*elt).to_string().into_boxed_str(),
127+
range: TextRange::default(),
128+
flags: StringLiteralFlags::default(),
129+
}),
130+
range: TextRange::default(),
131+
})
132+
})
133+
.collect(),
134+
ctx: ExprContext::Load,
135+
range: TextRange::default(),
136+
})
137+
}
138+
139+
fn split_default(str_value: &str, max_split: i32) -> Option<Expr> {
140+
// From the Python documentation:
141+
// > If sep is not specified or is None, a different splitting algorithm is applied: runs of
142+
// > consecutive whitespace are regarded as a single separator, and the result will contain
143+
// > no empty strings at the start or end if the string has leading or trailing whitespace.
144+
// > Consequently, splitting an empty string or a string consisting of just whitespace with
145+
// > a None separator returns [].
146+
// https://docs.python.org/3/library/stdtypes.html#str.split
147+
match max_split.cmp(&0) {
148+
Ordering::Greater => {
149+
// Autofix for `maxsplit` without separator not yet implemented, as
150+
// `split_whitespace().remainder()` is not stable:
151+
// https://doc.rust-lang.org/std/str/struct.SplitWhitespace.html#method.remainder
152+
None
153+
}
154+
Ordering::Equal => {
155+
let list_items: Vec<&str> = vec![str_value];
156+
Some(construct_replacement(&list_items))
157+
}
158+
Ordering::Less => {
159+
let list_items: Vec<&str> = str_value.split_whitespace().collect();
160+
Some(construct_replacement(&list_items))
161+
}
162+
}
163+
}
164+
165+
fn split_sep(str_value: &str, sep_value: &str, max_split: i32, direction: Direction) -> Expr {
166+
let list_items: Vec<&str> = if let Ok(split_n) = usize::try_from(max_split) {
167+
match direction {
168+
Direction::Left => str_value.splitn(split_n + 1, sep_value).collect(),
169+
Direction::Right => str_value.rsplitn(split_n + 1, sep_value).collect(),
170+
}
171+
} else {
172+
match direction {
173+
Direction::Left => str_value.split(sep_value).collect(),
174+
Direction::Right => str_value.rsplit(sep_value).collect(),
175+
}
176+
};
177+
178+
construct_replacement(&list_items)
179+
}
180+
181+
/// Returns the value of the `maxsplit` argument as an `i32`, if it is a numeric value.
182+
fn get_maxsplit_value(arg: Option<&Expr>) -> Option<i32> {
183+
if let Some(maxsplit) = arg {
184+
match maxsplit {
185+
// Negative number.
186+
Expr::UnaryOp(ExprUnaryOp {
187+
op: UnaryOp::USub,
188+
operand,
189+
..
190+
}) => {
191+
match &**operand {
192+
Expr::NumberLiteral(maxsplit_val) => maxsplit_val
193+
.value
194+
.as_int()
195+
.and_then(ruff_python_ast::Int::as_i32)
196+
.map(|f| -f),
197+
// Ignore when `maxsplit` is not a numeric value.
198+
_ => None,
199+
}
200+
}
201+
// Positive number
202+
Expr::NumberLiteral(maxsplit_val) => maxsplit_val
203+
.value
204+
.as_int()
205+
.and_then(ruff_python_ast::Int::as_i32),
206+
// Ignore when `maxsplit` is not a numeric value.
207+
_ => None,
208+
}
209+
} else {
210+
// Default value is -1 (no splits).
211+
Some(-1)
212+
}
213+
}
214+
215+
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
216+
enum Direction {
217+
Left,
218+
Right,
219+
}

0 commit comments

Comments
 (0)