Skip to content

Commit 02ceb80

Browse files
collapse consecutive wildcards in glob to regex translation
1 parent 24854a8 commit 02ceb80

6 files changed

Lines changed: 62 additions & 6 deletions

File tree

dulwich/attrs.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -124,18 +124,22 @@ def _translate_pattern(pattern: bytes) -> bytes:
124124
i += 1
125125

126126
if c == b"*":
127-
if i < n and pattern[i : i + 1] == b"*":
128-
# Double asterisk
127+
# Consume the whole run of '*' so consecutive stars collapse to
128+
# a single wildcard. Emitting one quantifier per star produces
129+
# adjacent unbounded quantifiers (e.g. '.*.*.*') that backtrack
130+
# catastrophically on non-matching input (ReDoS); collapsing is
131+
# also semantically correct since repeated '*' are redundant.
132+
double = i < n and pattern[i : i + 1] == b"*"
133+
while i < n and pattern[i : i + 1] == b"*":
129134
i += 1
135+
if double:
136+
# Double asterisk - matches across directory separators
130137
if i < n and pattern[i : i + 1] == b"/":
131138
# **/ - match zero or more directories
132139
res += b"(?:.*/)??"
133140
i += 1
134-
elif i == n:
135-
# ** at end - match everything
136-
res += b".*"
137141
else:
138-
# ** in middle
142+
# ** at end or in the middle
139143
res += b".*"
140144
else:
141145
# Single * - match any character except /

dulwich/config.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,11 @@ def match_glob_pattern(value: str, pattern: str) -> bool:
153153
Raises:
154154
ValueError: If the pattern is invalid
155155
"""
156+
# Collapse a run of consecutive '*' to at most '**'. Repeated stars are
157+
# redundant, and translating each into its own quantifier would emit
158+
# adjacent unbounded quantifiers (e.g. '.*.*') that backtrack
159+
# catastrophically on non-matching input (ReDoS).
160+
pattern = re.sub(r"\*\*+", "**", pattern)
156161
# Convert glob pattern to regex
157162
pattern_escaped = re.escape(pattern)
158163
# Replace escaped \*\* with .* (match anything)

dulwich/ignore.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,13 @@ def _translate_segment(segment: bytes) -> bytes:
167167
c = segment[i : i + 1]
168168
i += 1
169169
if c == b"*":
170+
# Collapse a run of consecutive '*' into a single quantifier.
171+
# Within a segment repeated '*' are redundant ([^/]*[^/]* is
172+
# equivalent to [^/]*), and emitting one quantifier per star
173+
# builds a regex with adjacent unbounded quantifiers that
174+
# backtracks catastrophically on non-matching input (ReDoS).
175+
while i < n and segment[i : i + 1] == b"*":
176+
i += 1
170177
res += b"[^/]*"
171178
elif c == b"?":
172179
res += b"[^/]"

tests/test_attrs.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,15 @@ def test_leading_slash(self):
267267
self.assertTrue(pattern.match(b"/README.txt"))
268268
self.assertFalse(pattern.match(b"src/README.txt"))
269269

270+
def test_consecutive_stars_no_redos(self):
271+
"""A run of '*' must not produce a catastrophically backtracking regex."""
272+
# Many single '*' collapse to one [^/]* ...
273+
pattern = Pattern(b"*" * 50 + b"x")
274+
self.assertFalse(pattern.match(b"a" * 80))
275+
# ... and a run of '**' collapses to a single cross-slash wildcard.
276+
pattern = Pattern(b"**" * 25 + b"x")
277+
self.assertFalse(pattern.match(b"a" * 80))
278+
270279

271280
class MatchPathTests(TestCase):
272281
"""Test the match_path function."""

tests/test_config.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
apply_instead_of,
4242
env_config,
4343
get_git_proxy_command,
44+
match_glob_pattern,
4445
parse_submodules,
4546
read_submodules,
4647
)
@@ -1584,3 +1585,21 @@ def test_set_replaces_value(self) -> None:
15841585
self.assertEqual(1, content.count(b"sshCommand"))
15851586
self.assertIn(b"sshCommand = ssh -i ~/.ssh/id_rsa2", content)
15861587
self.assertNotIn(b"id_rsa1", content)
1588+
1589+
1590+
class MatchGlobPatternTests(TestCase):
1591+
"""Tests for match_glob_pattern."""
1592+
1593+
def test_single_star(self) -> None:
1594+
self.assertTrue(match_glob_pattern("foo", "*"))
1595+
self.assertFalse(match_glob_pattern("a/b", "*"))
1596+
1597+
def test_double_star(self) -> None:
1598+
self.assertTrue(match_glob_pattern("a/b", "**"))
1599+
self.assertTrue(match_glob_pattern("a/x/b", "a**b"))
1600+
1601+
def test_consecutive_stars_no_redos(self) -> None:
1602+
# A run of '*' collapses to a single quantifier so the regex does
1603+
# not contain adjacent unbounded quantifiers that backtrack
1604+
# exponentially on non-matching input.
1605+
self.assertFalse(match_glob_pattern("a" * 80, "*" * 50 + "x"))

tests/test_ignore.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,18 @@ def test_translate(self) -> None:
102102
f"orig pattern: {pattern!r}, regex: {translate(pattern)!r}, expected: {regex!r}",
103103
)
104104

105+
def test_collapses_consecutive_stars(self) -> None:
106+
# A run of '*' in a segment is redundant and must collapse to a
107+
# single quantifier so the regex does not contain adjacent
108+
# unbounded quantifiers that backtrack catastrophically.
109+
self.assertEqual(b"(?ms)(.*/)?[^/]*x/?\\Z", translate(b"****x"))
110+
111+
def test_consecutive_stars_no_redos(self) -> None:
112+
# A pattern of many '*' matched against a long non-matching path
113+
# used to backtrack exponentially; it must now return promptly.
114+
pattern = Pattern(b"*" * 50 + b"x", False)
115+
self.assertFalse(pattern.match(b"a" * 80))
116+
105117

106118
class ReadIgnorePatterns(TestCase):
107119
def test_read_file(self) -> None:

0 commit comments

Comments
 (0)