Skip to content

Commit cef74d0

Browse files
pythongh-148821: Always reject known multi-byte encodings in pyexpat
The XML parser (pyexpat) now raises ValueError for known unsupported multi-byte encodings such us "ISO-2022-JP", "utf8" (without hyphen) or "raw-unicode-escape" instead of failing later, when encounter non-ASCII data.
1 parent 9633c52 commit cef74d0

45 files changed

Lines changed: 128 additions & 3 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

Include/codecs.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,12 @@ PyAPI_FUNC(PyObject *) PyCodec_NameReplaceErrors(PyObject *exc);
170170
PyAPI_DATA(const char *) Py_hexdigits;
171171
#endif
172172

173+
#ifndef Py_LIMITED_API
174+
PyAPI_FUNC(PyObject*) _PyCodec_LookupTextEncoding(
175+
const char *encoding,
176+
const char *alternate_command);
177+
#endif
178+
173179
#ifdef __cplusplus
174180
}
175181
#endif

Include/internal/pycore_codecs.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ extern int _PyCodec_UnregisterError(const char *name);
4545
in Python 3.5+?
4646
4747
*/
48-
extern PyObject* _PyCodec_LookupTextEncoding(
48+
PyAPI_FUNC(PyObject*) _PyCodec_LookupTextEncoding(
4949
const char *encoding,
5050
const char *alternate_command);
5151

Lib/codecs.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,8 @@ class CodecInfo(tuple):
9393

9494
def __new__(cls, encode, decode, streamreader=None, streamwriter=None,
9595
incrementalencoder=None, incrementaldecoder=None, name=None,
96-
*, _is_text_encoding=None):
96+
*, _is_text_encoding=None,
97+
_is_single_byte=None):
9798
self = tuple.__new__(cls, (encode, decode, streamreader, streamwriter))
9899
self.name = name
99100
self.encode = encode
@@ -104,6 +105,8 @@ def __new__(cls, encode, decode, streamreader=None, streamwriter=None,
104105
self.streamreader = streamreader
105106
if _is_text_encoding is not None:
106107
self._is_text_encoding = _is_text_encoding
108+
if _is_single_byte is not None:
109+
self._is_single_byte = _is_single_byte
107110
return self
108111

109112
def __repr__(self):

Lib/encodings/big5.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,4 +36,5 @@ def getregentry():
3636
incrementaldecoder=IncrementalDecoder,
3737
streamreader=StreamReader,
3838
streamwriter=StreamWriter,
39+
_is_single_byte=False,
3940
)

Lib/encodings/big5hkscs.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,4 +36,5 @@ def getregentry():
3636
incrementaldecoder=IncrementalDecoder,
3737
streamreader=StreamReader,
3838
streamwriter=StreamWriter,
39+
_is_single_byte=False,
3940
)

Lib/encodings/cp932.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,4 +36,5 @@ def getregentry():
3636
incrementaldecoder=IncrementalDecoder,
3737
streamreader=StreamReader,
3838
streamwriter=StreamWriter,
39+
_is_single_byte=False,
3940
)

Lib/encodings/cp949.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,4 +36,5 @@ def getregentry():
3636
incrementaldecoder=IncrementalDecoder,
3737
streamreader=StreamReader,
3838
streamwriter=StreamWriter,
39+
_is_single_byte=False,
3940
)

Lib/encodings/cp950.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,4 +36,5 @@ def getregentry():
3636
incrementaldecoder=IncrementalDecoder,
3737
streamreader=StreamReader,
3838
streamwriter=StreamWriter,
39+
_is_single_byte=False,
3940
)

Lib/encodings/euc_jis_2004.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,4 +36,5 @@ def getregentry():
3636
incrementaldecoder=IncrementalDecoder,
3737
streamreader=StreamReader,
3838
streamwriter=StreamWriter,
39+
_is_single_byte=False,
3940
)

Lib/encodings/euc_jisx0213.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,4 +36,5 @@ def getregentry():
3636
incrementaldecoder=IncrementalDecoder,
3737
streamreader=StreamReader,
3838
streamwriter=StreamWriter,
39+
_is_single_byte=False,
3940
)

0 commit comments

Comments
 (0)