Skip to content

Commit 079311a

Browse files
authored
object_store: Verify object id matches contents on retrieval (#2233)
Fixes #2223
2 parents 803d7a2 + afd8d8e commit 079311a

9 files changed

Lines changed: 83 additions & 30 deletions

File tree

NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
1.2.7 UNRELEASED
22

3+
* Verify that an object retrieved by id actually hashes to the requested
4+
id, raising ``ChecksumMismatch`` otherwise. (Jelmer Vernooij, #2223)
5+
36
* Reject pack names containing path separators in the dumb HTTP transport,
47
so a malicious server can no longer escape the temporary directory.
58
(Jelmer Vernooij, #2213)

contrib/test_release_robot.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -89,11 +89,12 @@ class GetRecentTagsTest(unittest.TestCase):
8989
test_repo = os.path.join(BASEDIR, "dulwich_test_repo.zip")
9090
committer = b"Mark Mikofski <mark.mikofski@sunpowercorp.com>"
9191
test_tags: ClassVar[list[bytes]] = [b"v0.1a", b"v0.1"]
92-
tag_test_data: ClassVar[
93-
dict[bytes, tuple[int, bytes, tuple[int, bytes] | None]]
94-
] = {
95-
test_tags[0]: (1484788003, b"3" * 40, None),
96-
test_tags[1]: (1484788314, b"1" * 40, (1484788401, b"2" * 40)),
92+
# Commit times and tag times per tag. Object ids are filled in by
93+
# setUpClass once the objects have been created, since the contents (and
94+
# thus the ids) are computed from the commit/tag data.
95+
tag_test_data: ClassVar[dict[bytes, list]] = {
96+
test_tags[0]: [1484788003, None, None],
97+
test_tags[1]: [1484788314, None, [1484788401, None]],
9798
}
9899

99100
# Class attributes set in setUpClass
@@ -111,35 +112,35 @@ def setUpClass(cls) -> None:
111112
obj_store = cls.repo.object_store # test repo object store
112113
# commit 1 ('2017-01-19T01:06:43')
113114
cls.c1 = make_commit(
114-
id=cls.tag_test_data[cls.test_tags[0]][1],
115115
commit_time=cls.tag_test_data[cls.test_tags[0]][0],
116116
message=b"unannotated tag",
117117
author=cls.committer,
118118
)
119119
obj_store.add_object(cls.c1)
120+
cls.tag_test_data[cls.test_tags[0]][1] = cls.c1.id
120121
# tag 1: unannotated
121122
cls.t1 = cls.test_tags[0]
122123
cls.repo[b"refs/tags/" + cls.t1] = cls.c1.id # add unannotated tag
123124
# commit 2 ('2017-01-19T01:11:54')
124125
cls.c2 = make_commit(
125-
id=cls.tag_test_data[cls.test_tags[1]][1],
126126
commit_time=cls.tag_test_data[cls.test_tags[1]][0],
127127
message=b"annotated tag",
128128
parents=[cls.c1.id],
129129
author=cls.committer,
130130
)
131131
obj_store.add_object(cls.c2)
132+
cls.tag_test_data[cls.test_tags[1]][1] = cls.c2.id
132133
# tag 2: annotated ('2017-01-19T01:13:21')
133134
tag_data = cls.tag_test_data[cls.test_tags[1]][2]
134135
if tag_data is None:
135136
raise AssertionError("test_tags[1] should have annotated tag data")
136137
cls.t2 = make_tag(
137138
cls.c2,
138-
id=tag_data[1],
139139
name=cls.test_tags[1],
140140
tag_time=tag_data[0],
141141
)
142142
obj_store.add_object(cls.t2)
143+
tag_data[1] = cls.t2.id
143144
cls.repo[b"refs/heads/master"] = cls.c2.id
144145
cls.repo[b"refs/tags/" + cls.t2.name] = cls.t2.id # add annotated tag
145146

dulwich/object_store.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -461,10 +461,19 @@ def get_raw(self, name: RawObjectID | ObjectID) -> tuple[int, bytes]:
461461
raise NotImplementedError(self.get_raw)
462462

463463
def __getitem__(self, sha1: ObjectID | RawObjectID) -> ShaFile:
464-
"""Obtain an object by SHA1."""
464+
"""Obtain an object by SHA1.
465+
466+
Raises:
467+
ChecksumMismatch: if the stored contents do not hash to the
468+
requested object id.
469+
"""
470+
if len(sha1) == self.object_format.oid_length:
471+
hexsha = sha_to_hex(RawObjectID(sha1))
472+
else:
473+
hexsha = ObjectID(sha1)
465474
type_num, uncomp = self.get_raw(sha1)
466475
return ShaFile.from_raw_string(
467-
type_num, uncomp, sha=sha1, object_format=self.object_format
476+
type_num, uncomp, verify_sha=hexsha, object_format=self.object_format
468477
)
469478

470479
def __iter__(self) -> Iterator[ObjectID]:

dulwich/objects.py

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -557,21 +557,38 @@ def as_pretty_string(self) -> str:
557557
return self.as_raw_string().decode("utf-8", "replace")
558558

559559
def set_raw_string(
560-
self, text: bytes, sha: ObjectID | RawObjectID | None = None
560+
self,
561+
text: bytes,
562+
sha: ObjectID | RawObjectID | None = None,
563+
*,
564+
verify_sha: ObjectID | RawObjectID | None = None,
561565
) -> None:
562566
"""Set the contents of this object from a serialized string."""
563567
if not isinstance(text, bytes):
564568
raise TypeError(f"Expected bytes for text, got {text!r}")
565-
self.set_raw_chunks([text], sha)
569+
self.set_raw_chunks([text], sha, verify_sha=verify_sha)
566570

567571
def set_raw_chunks(
568572
self,
569573
chunks: list[bytes],
570574
sha: ObjectID | RawObjectID | None = None,
571575
*,
572576
object_format: ObjectFormat | None = None,
577+
verify_sha: ObjectID | RawObjectID | None = None,
573578
) -> None:
574-
"""Set the contents of this object from a list of chunks."""
579+
"""Set the contents of this object from a list of chunks.
580+
581+
Args:
582+
chunks: The raw uncompressed contents.
583+
sha: Optional known, trusted sha for the object. Cached without
584+
being checked against the contents.
585+
object_format: Optional hash algorithm for the object.
586+
verify_sha: Optional sha that the contents are checked against;
587+
raises ChecksumMismatch if the object does not hash to it. On
588+
success it is cached like ``sha``. Mutually exclusive with ``sha``.
589+
"""
590+
if sha is not None and verify_sha is not None:
591+
raise ValueError("sha and verify_sha are mutually exclusive")
575592
self._chunked_text = chunks
576593
# Set hash algorithm if provided
577594
if object_format is not None:
@@ -583,6 +600,11 @@ def set_raw_chunks(
583600
self._sha = FixedSha(sha)
584601
self._deserialize(chunks)
585602
self._needs_serialization = False
603+
if verify_sha is not None:
604+
got = self.get_id(self.object_format)
605+
if got != verify_sha:
606+
raise ChecksumMismatch(verify_sha, got)
607+
self._sha = FixedSha(verify_sha)
586608

587609
@staticmethod
588610
def _parse_object_header(
@@ -690,22 +712,25 @@ def from_raw_string(
690712
sha: ObjectID | RawObjectID | None = None,
691713
*,
692714
object_format: ObjectFormat | None = None,
715+
verify_sha: ObjectID | RawObjectID | None = None,
693716
) -> "ShaFile":
694717
"""Creates an object of the indicated type from the raw string given.
695718
696719
Args:
697720
type_num: The numeric type of the object.
698721
string: The raw uncompressed contents.
699-
sha: Optional known sha for the object
722+
sha: Optional known, trusted sha for the object.
700723
object_format: Optional hash algorithm for the object
724+
verify_sha: Optional sha to check the contents against; raises
725+
ChecksumMismatch on mismatch. Mutually exclusive with ``sha``.
701726
"""
702727
cls = object_class(type_num)
703728
if cls is None:
704729
raise AssertionError(f"unsupported class type num: {type_num}")
705730
obj = cls()
706731
if object_format is not None:
707732
obj.object_format = object_format
708-
obj.set_raw_string(string, sha)
733+
obj.set_raw_string(string, sha, verify_sha=verify_sha)
709734
return obj
710735

711736
@staticmethod

tests/cli/test_cli.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3066,7 +3066,7 @@ def test_mktag_create(self):
30663066
# Verify the tag object exists and has correct content
30673067
from dulwich.objects import Tag
30683068

3069-
tag_obj = self.repo.object_store[tag_sha]
3069+
tag_obj = self.repo.object_store[tag_sha.encode("ascii")]
30703070
self.assertIsInstance(tag_obj, Tag)
30713071
self.assertEqual(tag_obj.object[1], commit_sha)
30723072
self.assertEqual(tag_obj.name, b"v1.0")

tests/porcelain/test_bisect.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,15 +45,15 @@ def setUp(self) -> None:
4545
self.repo.object_store.add_object(tree)
4646

4747
# Create some commits with proper trees
48-
self.c1 = make_commit(id=b"1" * 40, message=b"initial commit", tree=tree.id)
48+
self.c1 = make_commit(message=b"initial commit", tree=tree.id)
4949
self.c2 = make_commit(
50-
id=b"2" * 40, message=b"second commit", parents=[b"1" * 40], tree=tree.id
50+
message=b"second commit", parents=[self.c1.id], tree=tree.id
5151
)
5252
self.c3 = make_commit(
53-
id=b"3" * 40, message=b"third commit", parents=[b"2" * 40], tree=tree.id
53+
message=b"third commit", parents=[self.c2.id], tree=tree.id
5454
)
5555
self.c4 = make_commit(
56-
id=b"4" * 40, message=b"fourth commit", parents=[b"3" * 40], tree=tree.id
56+
message=b"fourth commit", parents=[self.c3.id], tree=tree.id
5757
)
5858

5959
# Add commits to object store

tests/test_bisect.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -118,10 +118,10 @@ def test_reset_no_session(self) -> None:
118118
def test_bisect_workflow(self) -> None:
119119
"""Test a complete bisect workflow."""
120120
# Create some commits
121-
c1 = make_commit(id=b"1" * 40, message=b"good commit 1")
122-
c2 = make_commit(id=b"2" * 40, message=b"good commit 2", parents=[b"1" * 40])
123-
c3 = make_commit(id=b"3" * 40, message=b"bad commit", parents=[b"2" * 40])
124-
c4 = make_commit(id=b"4" * 40, message=b"bad commit 2", parents=[b"3" * 40])
121+
c1 = make_commit(message=b"good commit 1")
122+
c2 = make_commit(message=b"good commit 2", parents=[c1.id])
123+
c3 = make_commit(message=b"bad commit", parents=[c2.id])
124+
c4 = make_commit(message=b"bad commit 2", parents=[c3.id])
125125

126126
# Add commits to object store
127127
for commit in [c1, c2, c3, c4]:

tests/test_object_store.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,19 @@ def test_corrupted_object_raise_exception(self) -> None:
271271
# this does not change iteration on loose objects though
272272
self.assertEqual([testobject.id], list(self.store._iter_loose_objects()))
273273

274+
def test_getitem_verifies_sha(self) -> None:
275+
"""Retrieving a loose object stored under a wrong sha is rejected."""
276+
from dulwich.errors import ChecksumMismatch
277+
from dulwich.file import GitFile
278+
279+
self.store.add_object(testobject)
280+
wrong_sha = b"1" * 40
281+
path = self.store._get_shafile_path(wrong_sha)
282+
os.makedirs(os.path.dirname(path), exist_ok=True)
283+
with GitFile(path, "wb") as f:
284+
f.write(testobject.as_legacy_object())
285+
self.assertRaises(ChecksumMismatch, self.store.__getitem__, wrong_sha)
286+
274287
def test_tempfile_in_loose_store(self) -> None:
275288
self.store.add_object(testobject)
276289
self.assertEqual([testobject.id], list(self.store._iter_loose_objects()))

tests/test_server.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -241,17 +241,18 @@ def test_get_tagged(self) -> None:
241241
def test_nothing_to_do_but_wants(self) -> None:
242242
# Just the fact that the client claims to want an object is enough
243243
# for sending a pack. Even if there turns out to be nothing.
244-
refs = {b"refs/tags/tag1": ONE}
245244
tree = Tree()
246245
self._repo.object_store.add_object(tree)
247-
self._repo.object_store.add_object(make_commit(id=ONE, tree=tree))
246+
commit = make_commit(tree=tree.id)
247+
self._repo.object_store.add_object(commit)
248+
refs = {b"refs/tags/tag1": commit.id}
248249
for name, sha in refs.items():
249250
self._repo.refs[name] = sha
250251
self._handler.proto.set_output(
251252
[
252-
b"want " + ONE + b" side-band-64k thin-pack ofs-delta",
253+
b"want " + commit.id + b" side-band-64k thin-pack ofs-delta",
253254
None,
254-
b"have " + ONE,
255+
b"have " + commit.id,
255256
b"done",
256257
None,
257258
]
@@ -262,10 +263,11 @@ def test_nothing_to_do_but_wants(self) -> None:
262263

263264
def test_nothing_to_do_no_wants(self) -> None:
264265
# Don't send a pack if the client didn't ask for anything.
265-
refs = {b"refs/tags/tag1": ONE}
266266
tree = Tree()
267267
self._repo.object_store.add_object(tree)
268-
self._repo.object_store.add_object(make_commit(id=ONE, tree=tree))
268+
commit = make_commit(tree=tree.id)
269+
self._repo.object_store.add_object(commit)
270+
refs = {b"refs/tags/tag1": commit.id}
269271
for ref, sha in refs.items():
270272
self._repo.refs[ref] = sha
271273
self._handler.proto.set_output([None])

0 commit comments

Comments
 (0)