|
55 | 55 | "Rb", |
56 | 56 | "RB", |
57 | 57 | } |
| 58 | +_PAREN_IGNORE_TOKEN_TYPES = ( |
| 59 | + tokenize.NEWLINE, |
| 60 | + tokenize.NL, |
| 61 | + tokenize.COMMENT, |
| 62 | +) |
58 | 63 | SINGLE_QUOTED_REGEX = re.compile(f"({'|'.join(_PREFIXES)})?'''") |
59 | 64 | DOUBLE_QUOTED_REGEX = re.compile(f"({'|'.join(_PREFIXES)})?\"\"\"") |
60 | 65 | QUOTE_DELIMITER_REGEX = re.compile(f"({'|'.join(_PREFIXES)})?(\"|')", re.DOTALL) |
@@ -710,6 +715,7 @@ def __init__(self, linter: PyLinter) -> None: |
710 | 715 | tuple[int, int], tuple[str, tokenize.TokenInfo | None] |
711 | 716 | ] = {} |
712 | 717 | """Token position -> (token value, next token).""" |
| 718 | + self._parenthesized_string_tokens: dict[tuple[int, int], bool] = {} |
713 | 719 |
|
714 | 720 | def process_module(self, node: nodes.Module) -> None: |
715 | 721 | self._unicode_literals = "unicode_literals" in node.future_imports |
@@ -738,10 +744,62 @@ def process_tokens(self, tokens: list[tokenize.TokenInfo]) -> None: |
738 | 744 | # to match with astroid `.col_offset` |
739 | 745 | start = (start[0], len(line[: start[1]].encode(encoding))) |
740 | 746 | self.string_tokens[start] = (str_eval(token), next_token) |
| 747 | + is_parenthesized = self._is_initial_string_token( |
| 748 | + i, tokens |
| 749 | + ) and self._is_parenthesized(i, tokens) |
| 750 | + self._parenthesized_string_tokens[start] = is_parenthesized |
741 | 751 |
|
742 | 752 | if self.linter.config.check_quote_consistency: |
743 | 753 | self.check_for_consistent_string_delimiters(tokens) |
744 | 754 |
|
| 755 | + def _is_initial_string_token( |
| 756 | + self, index: int, tokens: Sequence[tokenize.TokenInfo] |
| 757 | + ) -> bool: |
| 758 | + # Must NOT be preceded by a string literal |
| 759 | + prev_token = self._find_prev_token(index, tokens) |
| 760 | + if prev_token and prev_token.type == tokenize.STRING: |
| 761 | + return False |
| 762 | + # Must be followed by a string literal token. |
| 763 | + next_token = self._find_next_token(index, tokens) |
| 764 | + return bool(next_token and next_token.type == tokenize.STRING) |
| 765 | + |
| 766 | + def _is_parenthesized(self, index: int, tokens: list[tokenize.TokenInfo]) -> bool: |
| 767 | + prev_token = self._find_prev_token( |
| 768 | + index, tokens, ignore=(*_PAREN_IGNORE_TOKEN_TYPES, tokenize.STRING) |
| 769 | + ) |
| 770 | + if not prev_token or prev_token.type != tokenize.OP or prev_token[1] != "(": |
| 771 | + return False |
| 772 | + next_token = self._find_next_token( |
| 773 | + index, tokens, ignore=(*_PAREN_IGNORE_TOKEN_TYPES, tokenize.STRING) |
| 774 | + ) |
| 775 | + return bool( |
| 776 | + next_token and next_token.type == tokenize.OP and next_token[1] == ")" |
| 777 | + ) |
| 778 | + |
| 779 | + def _find_prev_token( |
| 780 | + self, |
| 781 | + index: int, |
| 782 | + tokens: Sequence[tokenize.TokenInfo], |
| 783 | + *, |
| 784 | + ignore: tuple[int, ...] = _PAREN_IGNORE_TOKEN_TYPES, |
| 785 | + ) -> tokenize.TokenInfo | None: |
| 786 | + i = index - 1 |
| 787 | + while i >= 0 and tokens[i].type in ignore: |
| 788 | + i -= 1 |
| 789 | + return tokens[i] if i >= 0 else None |
| 790 | + |
| 791 | + def _find_next_token( |
| 792 | + self, |
| 793 | + index: int, |
| 794 | + tokens: Sequence[tokenize.TokenInfo], |
| 795 | + *, |
| 796 | + ignore: tuple[int, ...] = _PAREN_IGNORE_TOKEN_TYPES, |
| 797 | + ) -> tokenize.TokenInfo | None: |
| 798 | + i = index + 1 |
| 799 | + while i < len(tokens) and tokens[i].type in ignore: |
| 800 | + i += 1 |
| 801 | + return tokens[i] if i < len(tokens) else None |
| 802 | + |
745 | 803 | @only_required_for_messages("implicit-str-concat") |
746 | 804 | def visit_call(self, node: nodes.Call) -> None: |
747 | 805 | self.check_for_concatenated_strings(node.args, "call") |
@@ -815,10 +873,18 @@ def check_for_concatenated_strings( |
815 | 873 | matching_token, next_token = self.string_tokens[token_index] |
816 | 874 | # We detect string concatenation: the AST Const is the |
817 | 875 | # combination of 2 string tokens |
818 | | - if matching_token != elt.value and next_token is not None: |
819 | | - if next_token.type == tokenize.STRING and ( |
820 | | - next_token.start[0] == elt.lineno |
821 | | - or self.linter.config.check_str_concat_over_line_jumps |
| 876 | + if ( |
| 877 | + matching_token != elt.value |
| 878 | + and next_token is not None |
| 879 | + and next_token.type == tokenize.STRING |
| 880 | + ): |
| 881 | + if next_token.start[0] == elt.lineno or ( |
| 882 | + self.linter.config.check_str_concat_over_line_jumps |
| 883 | + # Allow implicitly concatenated strings in parens. |
| 884 | + # See https://github.com/pylint-dev/pylint/issues/8552. |
| 885 | + and not self._parenthesized_string_tokens.get( |
| 886 | + (elt.lineno, elt.col_offset) |
| 887 | + ) |
822 | 888 | ): |
823 | 889 | self.add_message( |
824 | 890 | "implicit-str-concat", |
|
0 commit comments