Stack allocate small strings (#270)

andreer · pre-commit-ci[bot] · agronholm · web-flow · commit 2b53b2813d44 · 2026-03-22T17:24:52.000+02:00
* normalize str_errors='error' to 'strict' 'error' was never a valid Python string error handler. Accept it for backwards compatibility but normalize to 'strict' internally. Update error messages to only mention valid options. * fix str_errors handling in C string decoder The C implementation of decode_definite_short_string was not respecting the str_errors setting - it always used PyUnicode_FromStringAndSize which only supports strict mode. Now uses PyUnicode_DecodeUTF8 with the str_errors field passed directly. Store str_errors as const char* (NULL for strict, "replace" for replace) instead of PyObject*. This eliminates a conditional in the hot path since PyUnicode_DecodeUTF8 accepts NULL to mean strict mode. Fixes #255 * use fp_read directly in string decoder Skip creating an intermediate Python bytes object by using fp_read directly into a PyMem_Malloc buffer. This avoids Python allocator and reference counting overhead. * stack allocate small strings in C decoder Use a stack buffer for strings <= 256 bytes to avoid heap allocation overhead. Larger strings continue to use PyMem_Malloc. --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Alex Grönholm <alex.gronholm@nextday.fi>
diff --git a/cbor2/_decoder.py b/cbor2/_decoder.py
@@ -65,13 +65,14 @@ class CBORDecoder:
 
     _fp: IO[bytes]
     _fp_read: Callable[[int], bytes]
+    _str_errors: str
 
     def __init__(
         self,
         fp: IO[bytes],
         tag_hook: Callable[[CBORDecoder, CBORTag], Any] | None = None,
         object_hook: Callable[[CBORDecoder, dict[Any, Any]], Any] | None = None,
-        str_errors: Literal["strict", "error", "replace"] = "strict",
+        str_errors: str = "strict",
         read_size: int = 1,
         *,
         max_depth: int = 400,
@@ -164,17 +165,19 @@ def object_hook(self, value: Callable[[CBORDecoder, dict[Any, Any]], Any] | None
             raise ValueError("object_hook must be None or a callable")
 
     @property
-    def str_errors(self) -> Literal["strict", "error", "replace"]:
+    def str_errors(self) -> str:
         return self._str_errors
 
     @str_errors.setter
-    def str_errors(self, value: Literal["strict", "error", "replace"]) -> None:
-        if value in ("strict", "error", "replace"):
+    def str_errors(self, value: str) -> None:
+        if value == "error":
+            self._str_errors = "strict"
+        elif value in ("strict", "error", "replace", "backslashreplace", "surrogateescape"):
             self._str_errors = value
         else:
             raise ValueError(
-                f"invalid str_errors value {value!r} (must be one of 'strict', "
-                "'error', or 'replace')"
+                f"invalid str_errors value {value!r} (must be 'strict', 'error', 'replace', "
+                f"'backslashreplace' or 'surrogateescape')"
             )
 
     def set_shareable(self, value: T) -> T:
diff --git a/docs/versionhistory.rst b/docs/versionhistory.rst
@@ -22,6 +22,10 @@ This library adheres to `Semantic Versioning 2.0 <http://semver.org/>`_.
   (`#287 <https://github.com/agronholm/cbor2/issues/287>`_)
 - Fixed two reference/memory leaks in the C extension's long string decoder
   (`#290 <https://github.com/agronholm/cbor2/pull/290>`_ PR by @killiancowan82)
+- Fixed C decoder ignoring the ``str_errors`` setting when decoding strings, and improved
+  string decoding performance by using stack allocation for small strings and eliminating
+  unnecessary conditionals. Benchmarks show 9-17% faster deserialization.
+  (`#255 <https://github.com/agronholm/cbor2/issues/255>`_; PR by @andreer)
 
 **5.8.0** (2025-12-30)
 
diff --git a/source/decoder.c b/source/decoder.c
@@ -34,6 +34,9 @@
 // copied from cpython/Objects/bytesobject.c for bounds checks
 #define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
 
+// Threshold for using stack allocation vs heap allocation for short strings
+#define SMALL_STRING_STACK_THRESHOLD 256
+
 enum DecodeOption {
     DECODE_NORMAL = 0,
     DECODE_IMMUTABLE = 1,
@@ -106,7 +109,6 @@ CBORDecoder_clear(CBORDecoderObject *self)
     Py_CLEAR(self->object_hook);
     Py_CLEAR(self->shareables);
     Py_CLEAR(self->stringref_namespace);
-    Py_CLEAR(self->str_errors);
     if (self->readahead) {
         PyMem_Free(self->readahead);
         self->readahead = NULL;
@@ -152,7 +154,7 @@ CBORDecoder_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
         self->tag_hook = Py_None;
         Py_INCREF(Py_None);
         self->object_hook = Py_None;
-        self->str_errors = PyBytes_FromString("strict");
+        self->str_errors = NULL;  // NULL means strict mode
         self->max_depth = CBOR2_DEFAULT_MAX_DEPTH;
         self->immutable = false;
         self->shared_index = -1;
@@ -362,9 +364,8 @@ _CBORDecoder_set_object_hook(CBORDecoderObject *self, PyObject *value,
 static PyObject *
 _CBORDecoder_get_str_errors(CBORDecoderObject *self, void *closure)
 {
-    return PyUnicode_DecodeASCII(
-            PyBytes_AS_STRING(self->str_errors),
-            PyBytes_GET_SIZE(self->str_errors), "strict");
+    const char *mode = self->str_errors ? self->str_errors : "strict";
+    return PyUnicode_FromString(mode);
 }
 
 
@@ -373,30 +374,46 @@ static int
 _CBORDecoder_set_str_errors(CBORDecoderObject *self, PyObject *value,
                             void *closure)
 {
-    PyObject *tmp, *bytes;
-
     if (!value) {
         PyErr_SetString(PyExc_AttributeError,
                         "cannot delete str_errors attribute");
         return -1;
     }
     if (PyUnicode_Check(value)) {
-        bytes = PyUnicode_AsASCIIString(value);
+        PyObject *bytes = PyUnicode_AsASCIIString(value);
         if (bytes) {
-            if (!strcmp(PyBytes_AS_STRING(bytes), "strict") ||
-                    !strcmp(PyBytes_AS_STRING(bytes), "error") ||
-                    !strcmp(PyBytes_AS_STRING(bytes), "replace")) {
-                tmp = self->str_errors;
-                self->str_errors = bytes;
-                Py_DECREF(tmp);
+            const char *mode = PyBytes_AS_STRING(bytes);
+            if (!strcmp(mode, "strict") || !strcmp(mode, "error")) {
+                self->str_errors = NULL;
+                Py_DECREF(bytes);
+                return 0;
+            }
+            if (!strcmp(mode, "replace")) {
+                self->str_errors = "replace";
+                Py_DECREF(bytes);
+                return 0;
+            }
+            if (!strcmp(mode, "ignore")) {
+                self->str_errors = "ignore";
+                Py_DECREF(bytes);
+                return 0;
+            }
+            if (!strcmp(mode, "backslashreplace")) {
+                self->str_errors = "backslashreplace";
+                Py_DECREF(bytes);
+                return 0;
+            }
+            if (!strcmp(mode, "surrogateescape")) {
+                self->str_errors = "surrogateescape";
+                Py_DECREF(bytes);
                 return 0;
             }
             Py_DECREF(bytes);
         }
     }
     PyErr_Format(PyExc_ValueError,
-            "invalid str_errors value %R (must be one of 'strict', "
-            "'error', or 'replace')", value);
+            "invalid str_errors value %R (must be 'strict', 'error', 'replace', "
+            "'backslashreplace' or 'surrogateescape')", value);
     return -1;
 }
 
@@ -861,13 +878,22 @@ decode_bytestring(CBORDecoderObject *self, uint8_t subtype)
 static PyObject *
 decode_definite_short_string(CBORDecoderObject *self, Py_ssize_t length)
 {
-    PyObject *bytes_obj = fp_read_object(self, length);
-    if (!bytes_obj)
+    char stack_buf[SMALL_STRING_STACK_THRESHOLD];
+    char *buf = (length <= SMALL_STRING_STACK_THRESHOLD) ? stack_buf : PyMem_Malloc(length);
+    if (!buf)
+        return PyErr_NoMemory();
+
+    if (self->fp_read(self, buf, length) == -1) {
+        if (buf != stack_buf)
+            PyMem_Free(buf);
         return NULL;
+    }
+
+    PyObject *ret = PyUnicode_DecodeUTF8(buf, length, self->str_errors);
+
+    if (buf != stack_buf)
+        PyMem_Free(buf);
 
-    const char *bytes = PyBytes_AS_STRING(bytes_obj);
-    PyObject *ret = PyUnicode_FromStringAndSize(bytes, length);
-    Py_DECREF(bytes_obj);
     if (ret && string_namespace_add(self, ret, length) == -1) {
         Py_DECREF(ret);
         return NULL;
@@ -925,18 +951,18 @@ decode_definite_long_string(CBORDecoderObject *self, Py_ssize_t length)
         }
 
         consumed = chunk_length;  // workaround for https://github.com/python/cpython/issues/99612
-        string = PyUnicode_DecodeUTF8Stateful(source_buffer, chunk_length, NULL, &consumed);
+        string = PyUnicode_DecodeUTF8Stateful(source_buffer, chunk_length, self->str_errors, &consumed);
         if (!string)
             goto error;
 
         if (ret) {
             // Concatenate the result to the existing result
             PyObject *joined = PyUnicode_Concat(ret, string);
+            Py_DECREF(string);
+            string = NULL;
             if (!joined)
                 goto error;
 
-            Py_DECREF(string);
-            string = NULL;
             Py_DECREF(ret);
             ret = joined;
         } else {
@@ -967,12 +993,35 @@ decode_definite_long_string(CBORDecoderObject *self, Py_ssize_t length)
         chunk = NULL;
     }
 
-    if (ret && string_namespace_add(self, ret, length) == -1)
-        goto error;
+    // Handle any remaining bytes in the buffer (incomplete UTF-8 sequences)
+    if (buffer_length > 0) {
+        string = PyUnicode_DecodeUTF8(buffer, buffer_length, self->str_errors);
+        if (!string)
+            goto error;
+
+        if (ret) {
+            PyObject *joined = PyUnicode_Concat(ret, string);
+            Py_DECREF(string);
+            string = NULL;
+            if (!joined)
+                goto error;
+
+            Py_DECREF(ret);
+            ret = joined;
+        } else {
+            ret = string;
+            string = NULL;
+        }
+    }
 
     if (buffer)
         PyMem_Free(buffer);
 
+    if (ret && string_namespace_add(self, ret, length) == -1) {
+        Py_DECREF(ret);
+        return NULL;
+    }
+
     return ret;
 error:
     Py_XDECREF(ret);
diff --git a/source/decoder.h b/source/decoder.h
@@ -21,7 +21,7 @@ typedef struct CBORDecoderObject_ {
     PyObject *object_hook;
     PyObject *shareables;
     PyObject *stringref_namespace;
-    PyObject *str_errors;
+    const char *str_errors;  // NULL for strict, "replace" for replace mode
     Py_ssize_t max_depth;
     bool immutable;
     Py_ssize_t shared_index;
diff --git a/tests/test_decoder.py b/tests/test_decoder.py
@@ -1204,3 +1204,76 @@ def tag_hook(decoder, tag):
     assert result[0] == [1, 2, 3]
     assert result[1] == "after"
     assert result[2] == "final"
+
+
+def test_str_errors_error_alias(impl):
+    """'error' is not a valid Python string error handler, normalize to 'strict'."""
+    with BytesIO(b"\x65hello") as stream:
+        decoder = impl.CBORDecoder(stream, str_errors="error")
+        assert decoder.str_errors == "strict"
+
+
+def test_str_errors_invalid_mode(impl):
+    payload = b"\x65hello"
+    with pytest.raises(ValueError, match="invalid str_errors value 'invalid'"):
+        impl.loads(payload, str_errors="invalid")
+
+
+@pytest.mark.parametrize(
+    "mode, expected",
+    [
+        ("strict", None),  # Should raise exception
+        ("replace", "hello\ufffdworld"),  # Should replace invalid byte with U+FFFD
+    ],
+    ids=["strict_mode", "replace_mode"],
+)
+def test_str_errors_handling(impl, mode, expected):
+    invalid_utf8 = b"\x6bhello\xffworld"  # \xFF is invalid UTF-8
+
+    if expected is None:
+        with pytest.raises(impl.CBORDecodeValueError, match="error decoding unicode string"):
+            impl.loads(invalid_utf8, str_errors=mode)
+    else:
+        result = impl.loads(invalid_utf8, str_errors=mode)
+        assert result == expected
+        assert len(result) == 11
+        assert result[5] == "\ufffd"
+
+
+@pytest.mark.parametrize(
+    "payload, mode, expected",
+    [
+        (b"\x66hello\xff", "replace", "hello\ufffd"),  # <=256 bytes: stack path
+        (
+            b"\x79\x01\x05" + b"a" * 260 + b"\xff",
+            "replace",
+            "a" * 260 + "\ufffd",
+        ),  # >256: heap path
+    ],
+    ids=["short_string", "long_string"],
+)
+def test_str_errors_different_lengths(impl, payload, mode, expected):
+    """Tests both stack (<=256 bytes) and heap (>256 bytes) allocation paths."""
+    result = impl.loads(payload, str_errors=mode)
+    assert result == expected
+    assert result[-1] == "\ufffd"
+
+
+def test_str_errors_long_string_over_65536_bytes(impl):
+    """Issue #255: str_errors not respected for strings >65536 bytes."""
+    # 65537 bytes: 65536 'a' + 1 invalid UTF-8 byte
+    payload = unhexlify("7a00010001" + "61" * 65536 + "c3")
+    result = impl.loads(payload, str_errors="replace")
+    assert len(result) == 65537
+    assert result[-1] == "\ufffd"
+
+
+def test_str_errors_long_string_invalid_middle(impl):
+    """Test str_errors with invalid UTF-8 in the middle of a long string."""
+    # 65536 'a' + invalid byte + 65536 'b' = 131073 bytes
+    payload = unhexlify("7a00020001" + "61" * 65536 + "c3" + "62" * 65536)
+    result = impl.loads(payload, str_errors="replace")
+    assert len(result) == 131073
+    assert result[65536] == "\ufffd"
+    assert result[:65536] == "a" * 65536
+    assert result[65537:] == "b" * 65536