Skip to content

Commit 3e942f7

Browse files
committed
Add URL.joinpath(*elements, encoding=False) to build a URL with multiple new path elements
This is analogous to `Path(...).joinpath(...)`, except that it will not accept elements that start with `/`.
1 parent 15d4617 commit 3e942f7

File tree

5 files changed

+190
-39
lines changed

5 files changed

+190
-39
lines changed

CHANGES/704.feature.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Added ``URL.joinpath(*elements)``, to create a new URL appending multiple path elements.

docs/api.rst

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -817,6 +817,30 @@ The path is encoded if needed.
817817
>>> url
818818
URL('http://example.com/path/%D1%81%D1%8E%D0%B4%D0%B0')
819819

820+
.. method:: URL.joinpath(*other, encoded=False)
821+
822+
Construct a new URL by with all ``other`` elements appended to
823+
*path*, and cleaned up *query* and *fragment* parts.
824+
825+
Passing ``encoded=True`` parameter prevents path element auto-encoding, the caller is
826+
responsible for taking care of URL correctness.
827+
828+
.. doctest::
829+
830+
>>> url = URL('http://example.com/path?arg#frag').joinpath('to', 'subpath')
831+
>>> url
832+
URL('http://example.com/path/to/subpath')
833+
>>> url.parts
834+
('/', 'path', 'to', 'subpath')
835+
>>> url = URL('http://example.com/path?arg#frag').joinpath('сюда')
836+
>>> url
837+
URL('http://example.com/path/%D1%81%D1%8E%D0%B4%D0%B0')
838+
>>> url = URL('http://example.com/path').joinpath('%D1%81%D1%8E%D0%B4%D0%B0', encoded=True)
839+
>>> url
840+
URL('http://example.com/path/%D1%81%D1%8E%D0%B4%D0%B0')
841+
842+
.. versionadded:: 1.9
843+
820844
.. method:: URL.join(url)
821845

822846
Construct a full (“absolute”) URL by combining a “base URL”

tests/test_url.py

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -767,6 +767,111 @@ def test_div_with_dots():
767767
assert url.raw_path == "/path/to"
768768

769769

770+
# joinpath
771+
772+
773+
@pytest.mark.parametrize(
774+
"base,to_join,expected",
775+
[
776+
pytest.param("", ("path", "to"), "http://example.com/path/to", id="root"),
777+
pytest.param(
778+
"/", ("path", "to"), "http://example.com/path/to", id="root-with-slash"
779+
),
780+
pytest.param("/path", ("to",), "http://example.com/path/to", id="path"),
781+
pytest.param(
782+
"/path/", ("to",), "http://example.com/path/to", id="path-with-slash"
783+
),
784+
pytest.param(
785+
"/path?a=1#frag",
786+
("to",),
787+
"http://example.com/path/to",
788+
id="cleanup-query-and-fragment",
789+
),
790+
],
791+
)
792+
def test_joinpath(base, to_join, expected):
793+
url = URL(f"http://example.com{base}")
794+
assert str(url.joinpath(*to_join)) == expected
795+
796+
797+
@pytest.mark.parametrize(
798+
"url,to_join,expected",
799+
[
800+
pytest.param(URL(), ("a",), ("a",), id="empty-url"),
801+
pytest.param(URL("a"), ("b",), ("a", "b"), id="relative-path"),
802+
pytest.param(URL("a"), ("b", "", "c"), ("a", "b", "c"), id="empty-element"),
803+
pytest.param(URL("/a"), ("b"), ("/", "a", "b"), id="absolute-path"),
804+
],
805+
)
806+
def test_joinpath_relative(url, to_join, expected):
807+
assert url.joinpath(*to_join).raw_parts == expected
808+
809+
810+
@pytest.mark.parametrize(
811+
"url,to_join,encoded,e_path,e_raw_path,e_parts,e_raw_parts",
812+
[
813+
pytest.param(
814+
"http://example.com/сюда",
815+
("туда",),
816+
False,
817+
"/сюда/туда",
818+
"/%D1%81%D1%8E%D0%B4%D0%B0/%D1%82%D1%83%D0%B4%D0%B0",
819+
("/", "сюда", "туда"),
820+
("/", "%D1%81%D1%8E%D0%B4%D0%B0", "%D1%82%D1%83%D0%B4%D0%B0"),
821+
id="non-ascii",
822+
),
823+
pytest.param(
824+
"http://example.com/path",
825+
("%cf%80",),
826+
False,
827+
"/path/%cf%80",
828+
"/path/%25cf%2580",
829+
("/", "path", "%cf%80"),
830+
("/", "path", "%25cf%2580"),
831+
id="percent-encoded",
832+
),
833+
pytest.param(
834+
"http://example.com/path",
835+
("%cf%80",),
836+
True,
837+
"/path/π",
838+
"/path/%cf%80",
839+
("/", "path", "π"),
840+
("/", "path", "%cf%80"),
841+
id="encoded-percent-encoded",
842+
),
843+
],
844+
)
845+
def test_joinpath_encoding(
846+
url, to_join, encoded, e_path, e_raw_path, e_parts, e_raw_parts
847+
):
848+
joined = URL(url).joinpath(*to_join, encoded=encoded)
849+
assert joined.path == e_path
850+
assert joined.raw_path == e_raw_path
851+
assert joined.parts == e_parts
852+
assert joined.raw_parts == e_raw_parts
853+
854+
855+
@pytest.mark.parametrize(
856+
"to_join,expected",
857+
[
858+
pytest.param(("path:abc@123",), "/base/path:abc@123", id="with-colon-and-at"),
859+
pytest.param(("..", "path", ".", "to"), "/path/to", id="with-dots"),
860+
],
861+
)
862+
def test_joinpath_edgecases(to_join, expected):
863+
url = URL("http://example.com/base").joinpath(*to_join)
864+
assert url.raw_path == expected
865+
866+
867+
def test_joinpath_path_starting_from_slash_is_forbidden():
868+
url = URL("http://example.com/path/")
869+
with pytest.raises(
870+
ValueError, match="Appending path .* starting from slash is forbidden"
871+
):
872+
assert url.joinpath("/to/others")
873+
874+
770875
# with_path
771876

772877

yarl/__init__.pyi

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ class URL:
9696
def with_name(self, name: str) -> URL: ...
9797
def with_suffix(self, suffix: str) -> URL: ...
9898
def join(self, url: URL) -> URL: ...
99+
def joinpath(self, *url: str) -> URL: ...
99100
def human_repr(self) -> str: ...
100101
# private API
101102
@classmethod

yarl/_url.py

Lines changed: 59 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,30 @@ def __set__(self, inst, value):
5252
raise AttributeError("cached property is read-only")
5353

5454

55+
def _normalize_path_segments(segments):
56+
"""Drop '.' and '..' from a sequence of str segments"""
57+
58+
resolved_path = []
59+
60+
for seg in segments:
61+
if seg == "..":
62+
# ignore any .. segments that would otherwise cause an
63+
# IndexError when popped from resolved_path if
64+
# resolving for rfc3986
65+
with suppress(IndexError):
66+
resolved_path.pop()
67+
elif seg != ".":
68+
resolved_path.append(seg)
69+
70+
if segments and segments[-1] in (".", ".."):
71+
# do some post-processing here.
72+
# if the last segment was a relative dir,
73+
# then we need to append the trailing '/'
74+
resolved_path.append("")
75+
76+
return resolved_path
77+
78+
5579
@rewrite_module
5680
class URL:
5781
# Don't derive from str
@@ -316,25 +340,7 @@ def __gt__(self, other):
316340
return self._val > other._val
317341

318342
def __truediv__(self, name):
319-
name = self._PATH_QUOTER(name)
320-
if name.startswith("/"):
321-
raise ValueError(
322-
f"Appending path {name!r} starting from slash is forbidden"
323-
)
324-
path = self._val.path
325-
if path == "/":
326-
new_path = "/" + name
327-
elif not path and not self.is_absolute():
328-
new_path = name
329-
else:
330-
parts = path.rstrip("/").split("/")
331-
parts.append(name)
332-
new_path = "/".join(parts)
333-
if self.is_absolute():
334-
new_path = self._normalize_path(new_path)
335-
return URL(
336-
self._val._replace(path=new_path, query="", fragment=""), encoded=True
337-
)
343+
return self._make_child((name,))
338344

339345
def __mod__(self, query):
340346
return self.update_query(query)
@@ -702,9 +708,37 @@ def _validate_authority_uri_abs_path(host, path):
702708
"Path in a URL with authority should start with a slash ('/') if set"
703709
)
704710

711+
def _make_child(self, segments, encoded=False):
712+
"""add segments to self._val.path, accounting for absolute vs relative paths"""
713+
parsed = []
714+
for seg in reversed(segments):
715+
if not seg:
716+
continue
717+
if seg[0] == "/":
718+
raise ValueError(
719+
f"Appending path {seg!r} starting from slash is forbidden"
720+
)
721+
seg = seg if encoded else self._PATH_QUOTER(seg)
722+
if "/" in seg:
723+
parsed += (
724+
sub for sub in reversed(seg.split("/")) if sub and sub != "."
725+
)
726+
elif seg != ".":
727+
parsed.append(seg)
728+
parsed.reverse()
729+
old_path = self._val.path
730+
if old_path:
731+
parsed = [*old_path.rstrip("/").split("/"), *parsed]
732+
if self.is_absolute():
733+
parsed = _normalize_path_segments(parsed)
734+
new_path = "/".join(parsed)
735+
return URL(
736+
self._val._replace(path=new_path, query="", fragment=""), encoded=True
737+
)
738+
705739
@classmethod
706740
def _normalize_path(cls, path):
707-
# Drop '.' and '..' from path
741+
# Drop '.' and '..' from str path
708742

709743
prefix = ""
710744
if path.startswith("/"):
@@ -714,25 +748,7 @@ def _normalize_path(cls, path):
714748
path = path[1:]
715749

716750
segments = path.split("/")
717-
resolved_path = []
718-
719-
for seg in segments:
720-
if seg == "..":
721-
# ignore any .. segments that would otherwise cause an
722-
# IndexError when popped from resolved_path if
723-
# resolving for rfc3986
724-
with suppress(IndexError):
725-
resolved_path.pop()
726-
elif seg != ".":
727-
resolved_path.append(seg)
728-
729-
if segments and segments[-1] in (".", ".."):
730-
# do some post-processing here.
731-
# if the last segment was a relative dir,
732-
# then we need to append the trailing '/'
733-
resolved_path.append("")
734-
735-
return prefix + "/".join(resolved_path)
751+
return prefix + "/".join(_normalize_path_segments(segments))
736752

737753
@classmethod
738754
def _encode_host(cls, host, human=False):
@@ -1086,6 +1102,10 @@ def join(self, url):
10861102
raise TypeError("url should be URL")
10871103
return URL(urljoin(str(self), str(url)), encoded=True)
10881104

1105+
def joinpath(self, *other, encoded=False):
1106+
"""Return a new URL with the elements in other appended to the path."""
1107+
return self._make_child(other, encoded=encoded)
1108+
10891109
def human_repr(self):
10901110
"""Return decoded human readable string for URL representation."""
10911111
user = _human_quote(self.user, "#/:?@")

0 commit comments

Comments
 (0)