diff --git a/pypdf/filters.py b/pypdf/filters.py index 67905dd0e..9084aa7dd 100644 --- a/pypdf/filters.py +++ b/pypdf/filters.py @@ -262,11 +262,11 @@ def _decode_png_prediction(data: bytes, columns: int, row_length: int) -> bytes: logger_warning("Image data is not rectangular. Adding padding.", source=__name__) data += b"\x00" * (row_length - remainder) assert len(data) % row_length == 0 - output = [] - previous_row_data = (0,) * row_length + output = bytearray() + previous_row_data = bytes(row_length) bpp = (row_length - 1) // columns # recomputed locally to not change params for row in range(0, len(data), row_length): - row_data: list[int] = list(data[row : row + row_length]) + row_data = bytearray(data[row : row + row_length]) filter_byte = row_data[0] if filter_byte == 0: @@ -315,8 +315,8 @@ def _decode_png_prediction(data: bytes, columns: int, row_length: int) -> bytes: raise PdfReadError( f"Unsupported PNG filter {filter_byte!r}" ) # pragma: no cover - previous_row_data = tuple(row_data) - output.extend(row_data[1:]) + previous_row_data = bytes(row_data) + output += row_data[1:] return bytes(output) @staticmethod diff --git a/tests/test_filters.py b/tests/test_filters.py index 5fb2fa215..8f3a5416d 100644 --- a/tests/test_filters.py +++ b/tests/test_filters.py @@ -1130,3 +1130,18 @@ def test_lzwdecode__invalid_first_code(): match=r"^LZW code 258 out of range with empty base at table index 258\.$" ): LZWDecode.decode(data=lzw_data) + + +@pytest.mark.timeout(5) # Has been 20 seconds before. +def test_flatedecode__decode_png_prediction__speed(): + columns = 4096 + rows = 120000 + row_length = columns + 1 # +1 for PNG filter byte + + # Build raw PNG-predicted data: every row starts with filter byte 0 (PNG None) + raw = bytearray(rows * row_length) + for row in range(rows): + raw[row * row_length] = 0 + data = bytes(raw) + + FlateDecode._decode_png_prediction(data=data, columns=columns, row_length=row_length)