Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,15 @@
* Verify that an object retrieved by id actually hashes to the requested
id, raising ``ChecksumMismatch`` otherwise. (Jelmer Vernooij, #2223)

* Check out files whose names contain a colon or backslash. The NTFS path
validator rejected any element containing ``:`` or ``\``, so such files
were silently dropped on clone. It now rejects only the ``.git``/``git~1``
alternate-data-stream spellings, like git. (Jelmer Vernooij, #2205)

* Abort the checkout when a tree entry has an invalid path (e.g. a ``.git``
alias) instead of silently skipping it.
(Jelmer Vernooij, #2205)

* Reject pack names containing path separators in the dumb HTTP transport,
so a malicious server can no longer escape the temporary directory.
(Jelmer Vernooij, #2213)
Expand Down
78 changes: 56 additions & 22 deletions dulwich/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@
GitProtocolError,
NotGitRepository,
)
from .index import Index
from .index import Index, InvalidPathError
from .log_utils import _configure_logging_from_trace
from .objects import Commit, ObjectID, RawObjectID, sha_to_hex, valid_hexsha
from .objectspec import parse_commit_range
Expand Down Expand Up @@ -2072,7 +2072,7 @@ def run(self, args: Sequence[str]) -> None:
class cmd_clone(Command):
"""Clone a repository into a new directory."""

def run(self, args: Sequence[str]) -> None:
def run(self, args: Sequence[str]) -> int | None:
"""Execute the clone command.

Args:
Expand Down Expand Up @@ -2132,6 +2132,11 @@ def run(self, args: Sequence[str]) -> None:
)
except GitProtocolError as e:
logger.exception(e)
except InvalidPathError as e:
# The clone is kept; only the checkout failed, matching git.
logger.exception("unable to checkout working tree: %s", e)
return 1
return None


def _get_commit_message_with_template(
Expand Down Expand Up @@ -3322,7 +3327,7 @@ def _run_delete(self, parsed_args: argparse.Namespace) -> None:
class cmd_reset(Command):
"""Reset current HEAD to the specified state."""

def run(self, args: Sequence[str]) -> None:
def run(self, args: Sequence[str]) -> int | None:
"""Execute the reset command.

Args:
Expand Down Expand Up @@ -3351,13 +3356,18 @@ def run(self, args: Sequence[str]) -> None:
mode = "mixed"

# Use the porcelain.reset function for all modes
porcelain.reset(".", mode=mode, treeish=parsed_args.treeish)
try:
porcelain.reset(".", mode=mode, treeish=parsed_args.treeish)
except InvalidPathError as e:
logger.exception("unable to checkout working tree: %s", e)
return 1
return None


class cmd_revert(Command):
"""Revert some existing commits."""

def run(self, args: Sequence[str]) -> None:
def run(self, args: Sequence[str]) -> int | None:
"""Execute the revert command.

Args:
Expand All @@ -3374,15 +3384,20 @@ def run(self, args: Sequence[str]) -> None:
parser.add_argument("commits", nargs="+", help="Commits to revert")
parsed_args = parser.parse_args(args)

result = porcelain.revert(
".",
commits=parsed_args.commits,
no_commit=parsed_args.no_commit,
message=parsed_args.message,
)
try:
result = porcelain.revert(
".",
commits=parsed_args.commits,
no_commit=parsed_args.no_commit,
message=parsed_args.message,
)
except InvalidPathError as e:
logger.exception("unable to checkout working tree: %s", e)
return 1

if result and not parsed_args.no_commit:
logger.info("[%s] Revert completed", result.decode("ascii")[:7])
return None


class cmd_daemon(Command):
Expand Down Expand Up @@ -3783,7 +3798,7 @@ def progress(msg: str) -> None:
class cmd_pull(Command):
"""Fetch from and integrate with another repository or a local branch."""

def run(self, args: Sequence[str]) -> None:
def run(self, args: Sequence[str]) -> int | None:
"""Execute the pull command.

Args:
Expand All @@ -3795,14 +3810,19 @@ def run(self, args: Sequence[str]) -> None:
parser.add_argument("--filter", type=str, nargs=1)
parser.add_argument("--protocol", type=int)
parsed_args = parser.parse_args(args)
porcelain.pull(
".",
remote_location=parsed_args.from_location or None,
refspecs=parsed_args.refspec or None,
filter_spec=parsed_args.filter,
protocol_version=parsed_args.protocol or _protocol_version_from_env(),
ssh_command=_ssh_command_from_env(),
)
try:
porcelain.pull(
".",
remote_location=parsed_args.from_location or None,
refspecs=parsed_args.refspec or None,
filter_spec=parsed_args.filter,
protocol_version=parsed_args.protocol or _protocol_version_from_env(),
ssh_command=_ssh_command_from_env(),
)
except InvalidPathError as e:
logger.exception("unable to checkout working tree: %s", e)
return 1
return None


class cmd_push(Command):
Expand Down Expand Up @@ -4551,6 +4571,9 @@ def run(self, args: Sequence[str]) -> int | None:
except porcelain.CheckoutError as e:
sys.stderr.write(f"{e}\n")
return 1
except InvalidPathError as e:
logger.exception("unable to checkout working tree: %s", e)
return 1
return 0


Expand Down Expand Up @@ -4745,16 +4768,21 @@ def run(self, args: Sequence[str]) -> None:
class cmd_stash_pop(Command):
"""Apply a stash and remove it from the stash list."""

def run(self, args: Sequence[str]) -> None:
def run(self, args: Sequence[str]) -> int | None:
"""Execute the stash-pop command.

Args:
args: Command line arguments
"""
parser = argparse.ArgumentParser()
parser.parse_args(args)
porcelain.stash_pop(".")
try:
porcelain.stash_pop(".")
except InvalidPathError as e:
logger.exception("unable to checkout working tree: %s", e)
return 1
logger.info("Restored working directory and index state")
return None


class cmd_bisect(SuperCommand):
Expand Down Expand Up @@ -5196,6 +5224,9 @@ def run(self, args: Sequence[str]) -> int | None:
merge_commit_id.decode(),
)
return 0
except InvalidPathError as e:
logger.exception("unable to checkout working tree: %s", e)
return 1
except porcelain.Error as e:
logger.error("%s", e)
return 1
Expand Down Expand Up @@ -5698,6 +5729,9 @@ def run(self, args: Sequence[str]) -> int | None:
logger.info("Cherry-pick successful: %s", result.decode())

return None
except InvalidPathError as e:
logger.exception("unable to checkout working tree: %s", e)
return 1
except porcelain.Error as e:
logger.error("%s", e)
return 1
Expand Down
26 changes: 22 additions & 4 deletions dulwich/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -1580,8 +1580,6 @@ def clone(
else:
head = None

if checkout and head is not None:
target.get_worktree().reset_index(config=target.get_config_stack())
except BaseException:
if target is not None:
target.close()
Expand All @@ -1590,6 +1588,17 @@ def clone(

shutil.rmtree(target_path)
raise

# Checkout runs after the clone is complete, so a checkout failure
# leaves the fetched repository in place rather than deleting it.
if checkout and head is not None:
try:
target.get_worktree().reset_index(config=target.get_config_stack())
except BaseException:
# Release file handles on the kept repository so callers can
# still remove or reopen it (notably on Windows).
target.close()
raise
return target

def fetch(
Expand Down Expand Up @@ -3272,8 +3281,6 @@ def clone(
else:
head = None

if checkout and head is not None:
target.get_worktree().reset_index(config=target.get_config_stack())
except BaseException:
if target is not None:
target.close()
Expand All @@ -3282,6 +3289,17 @@ def clone(

shutil.rmtree(target_path)
raise

# Checkout runs after the clone is complete, so a checkout failure
# leaves the fetched repository in place rather than deleting it.
if checkout and head is not None:
try:
target.get_worktree().reset_index(config=target.get_config_stack())
except BaseException:
# Release file handles on the kept repository so callers can
# still remove or reopen it (notably on Windows).
target.close()
raise
return target


Expand Down
67 changes: 48 additions & 19 deletions dulwich/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
"Index",
"IndexEntry",
"IndexExtension",
"InvalidPathError",
"ResolveUndoExtension",
"SerializedIndexEntry",
"SparseDirExtension",
Expand Down Expand Up @@ -1960,6 +1961,15 @@ def normalize(path: bytes) -> bytes:
return normalize


class InvalidPathError(Exception):
"""Raised when a tree entry's path is unsafe to write to the work tree."""

def __init__(self, path: bytes) -> None:
"""Initialize the exception with the offending path."""
self.path = path
super().__init__(f"invalid path {path.decode('utf-8', 'replace')!r}")


def validate_path_element_default(element: bytes) -> bool:
"""Validate a path element using default rules.

Expand All @@ -1972,12 +1982,32 @@ def validate_path_element_default(element: bytes) -> bool:
return _normalize_path_element_default(element) not in INVALID_DOTNAMES


def _is_ntfs_dotgit_short_name(normalized: bytes) -> bool:
"""Match NTFS 8.3 short-name forms of ``.git`` (``git~<digits>``)."""
if not normalized.startswith(b"git~"):
def _is_ntfs_dotgit(name: bytes) -> bool:
"""Match NTFS-dangerous spellings of ``.git`` at the start of ``name``.

Matches ``.git`` or the 8.3 short name ``git~1`` followed only by
dots/spaces and then the end of the element, a separator, or a ``:``
(an alternate-data-stream marker, as in ``.git::$INDEX_ALLOCATION``).
"""
if name[:1] == b".":
if name[1:4].lower() != b"git":
return False
i = 4
elif name[:1].lower() == b"g": # ``git~1`` 8.3 short name
if name[1:3].lower() != b"it" or name[3:5] != b"~1":
return False
i = 5
else:
return False
tail = normalized[4:]
return len(tail) > 0 and tail.isdigit()

while i < len(name):
c = name[i : i + 1]
if c == b":":
return True
if c != b"." and c != b" ":
return False
i += 1
return True


# Reserved Windows device names. Opening any of these on Windows
Expand Down Expand Up @@ -2008,21 +2038,17 @@ def validate_path_element_ntfs(element: bytes) -> bool:
Returns:
True if path element is valid for NTFS, False otherwise
"""
# A backslash is a path separator on Windows, so accepting it
# here would let a tree authored on POSIX escape the work tree
# or plant files under ``.git\`` when checked out on Windows.
if b"\\" in element:
return False
# NTFS alternate data streams are addressed as ``name:stream``;
# reject any element containing ``:`` so ``.git::$INDEX_ALLOCATION``
# and similar forms cannot bypass the ``.git`` check.
if b":" in element:
# A backslash is a separator on Windows but an ordinary filename
# character on POSIX, so only reject it on Windows.
if os.name == "nt" and b"\\" in element:
return False
# Backslash also separates ``.git`` spellings, so check each segment.
for segment in element.split(b"\\"):
if _is_ntfs_dotgit(segment):
return False
normalized = _normalize_path_element_ntfs(element)
if normalized in INVALID_DOTNAMES:
return False
if _is_ntfs_dotgit_short_name(normalized):
return False
if _is_reserved_windows_device_name(normalized):
return False
return True
Expand Down Expand Up @@ -2150,8 +2176,10 @@ def build_index_from_tree(
assert (
entry.path is not None and entry.mode is not None and entry.sha is not None
)
# Validate as we go and abort on the first invalid path,
# leaving any files already written in place.
if not validate_path(entry.path, validate_path_element):
continue
raise InvalidPathError(entry.path)
full_path = _tree_to_fs_path(root_path, entry.path, tree_encoding)

if not os.path.exists(os.path.dirname(full_path)):
Expand Down Expand Up @@ -2960,9 +2988,10 @@ def update_working_tree(
and change.new.mode is not None
)
path = change.new.path
# Validate as we go and abort on the first invalid path,
# leaving any changes already applied in place.
if not validate_path(path, validate_path_element):
continue

raise InvalidPathError(path)
full_path = _tree_to_fs_path(repo_path, path, tree_encoding)
try:
modify_stat: os.stat_result | None = os.lstat(full_path)
Expand Down
5 changes: 3 additions & 2 deletions dulwich/stash.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
from .file import GitFile
from .index import (
IndexEntry,
InvalidPathError,
_tree_to_fs_path,
build_file_from_blob,
commit_tree,
Expand Down Expand Up @@ -205,7 +206,7 @@ def symlink_fn( # type: ignore[misc,unused-ignore]
and tree_entry.sha is not None
)
if not validate_path(tree_entry.path, validate_path_element):
continue
raise InvalidPathError(tree_entry.path)

# Add to index with stage 0 (normal)
# Get file stats for the entry
Expand All @@ -226,7 +227,7 @@ def symlink_fn( # type: ignore[misc,unused-ignore]
and tree_entry2.sha is not None
)
if not validate_path(tree_entry2.path, validate_path_element):
continue
raise InvalidPathError(tree_entry2.path)

full_path = _tree_to_fs_path(repo_path, tree_entry2.path)

Expand Down
Loading
Loading