Skip to content

Commit 93b87a4

Browse files
authored
crc combine (#707)
1 parent c49b22f commit 93b87a4

8 files changed

Lines changed: 276 additions & 3 deletions

File tree

awscrt/checksums.py

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,3 +30,84 @@ def crc64nvme(input: bytes, previous_crc64nvme: int = 0) -> int:
3030
Returns an unsigned 64-bit integer.
3131
"""
3232
return _awscrt.checksums_crc64nvme(input, previous_crc64nvme)
33+
34+
35+
def combine_crc32(crc32_result1: int, crc32_result2: int, data_length2: int) -> int:
36+
"""
37+
Combine two CRC32 (Ethernet, gzip) checksums computed over separate data blocks.
38+
39+
This is equivalent to computing the CRC32 of the concatenated data blocks without
40+
having to re-scan the data.
41+
42+
Given:
43+
crc1 = CRC32(data_block_A)
44+
crc2 = CRC32(data_block_B)
45+
46+
This function computes:
47+
result = CRC32(data_block_A || data_block_B)
48+
49+
Args:
50+
crc32_result1: The CRC32 checksum of the first data block
51+
crc32_result2: The CRC32 checksum of the second data block
52+
data_length2: The length (in bytes) of the original data that produced crc32_result2.
53+
This is NOT the size of the checksum (which is always 4 bytes),
54+
but rather the size of the data block that was checksummed.
55+
56+
Returns:
57+
The combined CRC32 checksum as if computed over the concatenated data
58+
"""
59+
return _awscrt.checksums_crc32_combine(crc32_result1, crc32_result2, data_length2)
60+
61+
62+
def combine_crc32c(crc32c_result1: int, crc32c_result2: int, data_length2: int) -> int:
63+
"""
64+
Combine two CRC32C (Castagnoli, iSCSI) checksums computed over separate data blocks.
65+
66+
This is equivalent to computing the CRC32C of the concatenated data blocks without
67+
having to re-scan the data.
68+
69+
Given:
70+
crc1 = CRC32C(data_block_A)
71+
crc2 = CRC32C(data_block_B)
72+
73+
This function computes:
74+
result = CRC32C(data_block_A || data_block_B)
75+
76+
Args:
77+
crc32c_result1: The CRC32C checksum of the first data block
78+
crc32c_result2: The CRC32C checksum of the second data block
79+
data_length2: The length (in bytes) of the original data that produced crc32c_result2.
80+
This is NOT the size of the checksum (which is always 4 bytes),
81+
but rather the size of the data block that was checksummed.
82+
83+
Returns:
84+
The combined CRC32C checksum as if computed over the concatenated data
85+
"""
86+
return _awscrt.checksums_crc32c_combine(crc32c_result1, crc32c_result2, data_length2)
87+
88+
89+
def combine_crc64nvme(crc64nvme_result1: int, crc64nvme_result2: int, data_length2: int) -> int:
90+
"""
91+
Combine two CRC64-NVME (CRC64-Rocksoft) checksums computed over separate data blocks.
92+
93+
This is equivalent to computing the CRC64-NVME of the concatenated data blocks without
94+
having to re-scan the data.
95+
96+
Given:
97+
crc1 = CRC64_NVME(data_block_A)
98+
crc2 = CRC64_NVME(data_block_B)
99+
100+
This function computes:
101+
result = CRC64_NVME(data_block_A || data_block_B)
102+
103+
Args:
104+
crc64nvme_result1: The CRC64-NVME checksum of the first data block
105+
crc64nvme_result2: The CRC64-NVME checksum of the second data block
106+
data_length2: The length (in bytes) of the original data that produced crc64nvme_result2.
107+
This is NOT the size of the checksum (which is always 8 bytes),
108+
but rather the size of the data block that was checksummed.
109+
110+
Returns:
111+
The combined CRC64-NVME checksum as if computed over the concatenated data
112+
"""
113+
return _awscrt.checksums_crc64nvme_combine(crc64nvme_result1, crc64nvme_result2, data_length2)

crt/aws-lc

crt/s2n

Submodule s2n updated from f6ca8f0 to 1c98447

source/checksums.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,5 +9,8 @@
99
PyObject *aws_py_checksums_crc32(PyObject *self, PyObject *args);
1010
PyObject *aws_py_checksums_crc32c(PyObject *self, PyObject *args);
1111
PyObject *aws_py_checksums_crc64nvme(PyObject *self, PyObject *args);
12+
PyObject *aws_py_checksums_crc32_combine(PyObject *self, PyObject *args);
13+
PyObject *aws_py_checksums_crc32c_combine(PyObject *self, PyObject *args);
14+
PyObject *aws_py_checksums_crc64nvme_combine(PyObject *self, PyObject *args);
1215

1316
#endif /* AWS_CRT_PYTHON_CHECKSUMS_H */

source/crc.c

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,3 +104,108 @@ PyObject *aws_py_checksums_crc64nvme(PyObject *self, PyObject *args) {
104104
}
105105
return py_result;
106106
}
107+
108+
PyObject *aws_py_checksums_crc32_combine(PyObject *self, PyObject *args) {
109+
(void)self;
110+
PyObject *py_crc1;
111+
PyObject *py_crc2;
112+
PyObject *py_len2;
113+
114+
if (!PyArg_ParseTuple(args, "OOO", &py_crc1, &py_crc2, &py_len2)) {
115+
return NULL;
116+
}
117+
118+
uint32_t crc1 = PyLong_AsUnsignedLong(py_crc1);
119+
if (crc1 == (uint32_t)-1 && PyErr_Occurred()) {
120+
PyErr_Clear();
121+
PyErr_SetString(PyExc_ValueError, "crc32_result1 is not a valid unsigned 32-bit integer");
122+
return NULL;
123+
}
124+
125+
uint32_t crc2 = PyLong_AsUnsignedLong(py_crc2);
126+
if (crc2 == (uint32_t)-1 && PyErr_Occurred()) {
127+
PyErr_Clear();
128+
PyErr_SetString(PyExc_ValueError, "crc32_result2 is not a valid unsigned 32-bit integer");
129+
return NULL;
130+
}
131+
132+
uint64_t len2 = PyLong_AsUnsignedLongLong(py_len2);
133+
if (len2 == (uint64_t)-1 && PyErr_Occurred()) {
134+
PyErr_Clear();
135+
PyErr_SetString(PyExc_ValueError, "data_length2 is not a valid unsigned 64-bit integer");
136+
return NULL;
137+
}
138+
139+
uint32_t result = aws_checksums_crc32_combine(crc1, crc2, len2);
140+
return PyLong_FromUnsignedLong(result);
141+
}
142+
143+
PyObject *aws_py_checksums_crc32c_combine(PyObject *self, PyObject *args) {
144+
(void)self;
145+
PyObject *py_crc1;
146+
PyObject *py_crc2;
147+
PyObject *py_len2;
148+
149+
if (!PyArg_ParseTuple(args, "OOO", &py_crc1, &py_crc2, &py_len2)) {
150+
return NULL;
151+
}
152+
153+
uint32_t crc1 = PyLong_AsUnsignedLong(py_crc1);
154+
if (crc1 == (uint32_t)-1 && PyErr_Occurred()) {
155+
PyErr_Clear();
156+
PyErr_SetString(PyExc_ValueError, "crc32c_result1 is not a valid unsigned 32-bit integer");
157+
return NULL;
158+
}
159+
160+
uint32_t crc2 = PyLong_AsUnsignedLong(py_crc2);
161+
if (crc2 == (uint32_t)-1 && PyErr_Occurred()) {
162+
PyErr_Clear();
163+
PyErr_SetString(PyExc_ValueError, "crc32c_result2 is not a valid unsigned 32-bit integer");
164+
return NULL;
165+
}
166+
167+
uint64_t len2 = PyLong_AsUnsignedLongLong(py_len2);
168+
if (len2 == (uint64_t)-1 && PyErr_Occurred()) {
169+
PyErr_Clear();
170+
PyErr_SetString(PyExc_ValueError, "data_length2 is not a valid unsigned 64-bit integer");
171+
return NULL;
172+
}
173+
174+
uint32_t result = aws_checksums_crc32c_combine(crc1, crc2, len2);
175+
return PyLong_FromUnsignedLong(result);
176+
}
177+
178+
PyObject *aws_py_checksums_crc64nvme_combine(PyObject *self, PyObject *args) {
179+
(void)self;
180+
PyObject *py_crc1;
181+
PyObject *py_crc2;
182+
PyObject *py_len2;
183+
184+
if (!PyArg_ParseTuple(args, "OOO", &py_crc1, &py_crc2, &py_len2)) {
185+
return NULL;
186+
}
187+
188+
uint64_t crc1 = PyLong_AsUnsignedLongLong(py_crc1);
189+
if (crc1 == (uint64_t)-1 && PyErr_Occurred()) {
190+
PyErr_Clear();
191+
PyErr_SetString(PyExc_ValueError, "crc64nvme_result1 is not a valid unsigned 64-bit integer");
192+
return NULL;
193+
}
194+
195+
uint64_t crc2 = PyLong_AsUnsignedLongLong(py_crc2);
196+
if (crc2 == (uint64_t)-1 && PyErr_Occurred()) {
197+
PyErr_Clear();
198+
PyErr_SetString(PyExc_ValueError, "crc64nvme_result2 is not a valid unsigned 64-bit integer");
199+
return NULL;
200+
}
201+
202+
uint64_t len2 = PyLong_AsUnsignedLongLong(py_len2);
203+
if (len2 == (uint64_t)-1 && PyErr_Occurred()) {
204+
PyErr_Clear();
205+
PyErr_SetString(PyExc_ValueError, "data_length2 is not a valid unsigned 64-bit integer");
206+
return NULL;
207+
}
208+
209+
uint64_t result = aws_checksums_crc64nvme_combine(crc1, crc2, len2);
210+
return PyLong_FromUnsignedLongLong(result);
211+
}

source/module.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -859,6 +859,9 @@ static PyMethodDef s_module_methods[] = {
859859
AWS_PY_METHOD_DEF(checksums_crc32, METH_VARARGS),
860860
AWS_PY_METHOD_DEF(checksums_crc32c, METH_VARARGS),
861861
AWS_PY_METHOD_DEF(checksums_crc64nvme, METH_VARARGS),
862+
AWS_PY_METHOD_DEF(checksums_crc32_combine, METH_VARARGS),
863+
AWS_PY_METHOD_DEF(checksums_crc32c_combine, METH_VARARGS),
864+
AWS_PY_METHOD_DEF(checksums_crc64nvme_combine, METH_VARARGS),
862865

863866
/* HTTP */
864867
AWS_PY_METHOD_DEF(http_connection_close, METH_VARARGS),

test/test_checksums.py

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,87 @@ def test_crc64nvme_huge_buffer(self):
137137
val = checksums.crc64nvme(huge_buffer)
138138
self.assertEqual(0x2645c28052b1fbb0, val)
139139

140+
def _test_combine_helper(self, checksum_fn, combine_fn):
141+
"""Helper method to test checksum combine functions with various scenarios."""
142+
143+
# Test 1: Basic combine of two blocks
144+
data1 = b"Hello, "
145+
data2 = b"World!"
146+
147+
crc1 = checksum_fn(data1)
148+
crc2 = checksum_fn(data2)
149+
combined = combine_fn(crc1, crc2, len(data2))
150+
expected = checksum_fn(data1 + data2)
151+
152+
self.assertEqual(expected, combined)
153+
154+
# Test 2: Empty second block
155+
data1 = b"Hello, World!"
156+
data2 = b""
157+
158+
crc1 = checksum_fn(data1)
159+
crc2 = checksum_fn(data2)
160+
combined = combine_fn(crc1, crc2, len(data2))
161+
162+
self.assertEqual(crc1, combined)
163+
164+
# Test 3: Multiple blocks
165+
data1 = b"The quick "
166+
data2 = b"brown fox "
167+
data3 = b"jumps over the lazy dog"
168+
169+
crc1 = checksum_fn(data1)
170+
crc2 = checksum_fn(data2)
171+
crc3 = checksum_fn(data3)
172+
173+
combined_12 = combine_fn(crc1, crc2, len(data2))
174+
combined_123 = combine_fn(combined_12, crc3, len(data3))
175+
expected = checksum_fn(data1 + data2 + data3)
176+
177+
self.assertEqual(expected, combined_123)
178+
179+
# Test 4: Large blocks
180+
data1 = bytes(1024)
181+
data2 = bytes(range(256)) * 4
182+
183+
crc1 = checksum_fn(data1)
184+
crc2 = checksum_fn(data2)
185+
combined = combine_fn(crc1, crc2, len(data2))
186+
expected = checksum_fn(data1 + data2)
187+
188+
self.assertEqual(expected, combined)
189+
190+
def test_crc32_combine(self):
191+
"""Test CRC32 combine function."""
192+
self._test_combine_helper(checksums.crc32, checksums.combine_crc32)
193+
194+
def test_crc32c_combine(self):
195+
"""Test CRC32C combine function."""
196+
self._test_combine_helper(checksums.crc32c, checksums.combine_crc32c)
197+
198+
def test_crc64nvme_combine(self):
199+
"""Test CRC64-NVME combine function."""
200+
self._test_combine_helper(checksums.crc64nvme, checksums.combine_crc64nvme)
201+
202+
def test_combine_invalid_inputs(self):
203+
"""Test that combine functions raise ValueError for invalid inputs."""
204+
# Test invalid values (should fail for all algorithms)
205+
for combine_fn in [checksums.combine_crc32, checksums.combine_crc32c, checksums.combine_crc64nvme]:
206+
with self.assertRaises(ValueError) as context:
207+
combine_fn(-1, 0, 0)
208+
self.assertIn("not a valid unsigned", str(context.exception))
209+
210+
with self.assertRaises(ValueError) as context:
211+
combine_fn(0, 0, -1)
212+
self.assertIn("not a valid unsigned", str(context.exception))
213+
214+
# Test that valid inputs don't raise exceptions
215+
for combine_fn in [checksums.combine_crc32, checksums.combine_crc32c, checksums.combine_crc64nvme]:
216+
# This should not raise any exception
217+
result = combine_fn(0, 0, 0)
218+
# Result should be an integer
219+
self.assertIsInstance(result, int)
220+
140221

141222
if __name__ == '__main__':
142223
unittest.main()

0 commit comments

Comments
 (0)