Skip to content

Commit 2afbb90

Browse files
Fix assertions in byte-stream split decoding functions
1 parent 6cda8c8 commit 2afbb90

3 files changed

Lines changed: 11 additions & 10 deletions

File tree

parquet/internal/encoding/byte_stream_split_amd64.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ func decodeByteStreamSplitBatchWidth4AVX2(data []byte, nValues, stride int, out
4545
}
4646
const width = 4
4747
debug.Assert(len(out) >= nValues*width, fmt.Sprintf("not enough space in output buffer for decoding, out: %d bytes, data: %d bytes", len(out), len(data)))
48-
debug.Assert(len(data) >= width*stride, fmt.Sprintf("not enough data for decoding, data: %d bytes, expected at least: %d bytes", len(data), width*stride))
48+
debug.Assert(len(data) >= 3*stride+nValues, fmt.Sprintf("not enough data for decoding, data: %d bytes, expected at least: %d bytes", len(data), 3*stride+nValues))
4949
_decodeByteStreamSplitWidth4AVX2(unsafe.Pointer(&data[0]), unsafe.Pointer(&out[0]), nValues, stride)
5050
}
5151

@@ -55,6 +55,6 @@ func decodeByteStreamSplitBatchWidth8AVX2(data []byte, nValues, stride int, out
5555
}
5656
const width = 8
5757
debug.Assert(len(out) >= nValues*width, fmt.Sprintf("not enough space in output buffer for decoding, out: %d bytes, data: %d bytes", len(out), len(data)))
58-
debug.Assert(len(data) >= width*stride, fmt.Sprintf("not enough data for decoding, data: %d bytes, expected at least: %d bytes", len(data), width*stride))
58+
debug.Assert(len(data) >= 7*stride+nValues, fmt.Sprintf("not enough data for decoding, data: %d bytes, expected at least: %d bytes", len(data), 7*stride+nValues))
5959
_decodeByteStreamSplitWidth8AVX2(unsafe.Pointer(&data[0]), unsafe.Pointer(&out[0]), nValues, stride)
6060
}

parquet/internal/encoding/byte_stream_split_arm64.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ func decodeByteStreamSplitBatchWidth4NEON(data []byte, nValues, stride int, out
4545
}
4646
const width = 4
4747
debug.Assert(len(out) >= nValues*width, fmt.Sprintf("not enough space in output buffer for decoding, out: %d bytes, data: %d bytes", len(out), len(data)))
48-
debug.Assert(len(data) >= width*stride, fmt.Sprintf("not enough data for decoding, data: %d bytes, expected at least: %d bytes", len(data), width*stride))
48+
debug.Assert(len(data) >= 3*stride+nValues, fmt.Sprintf("not enough data for decoding, data: %d bytes, expected at least: %d bytes", len(data), 3*stride+nValues))
4949
_decodeByteStreamSplitWidth4NEON(unsafe.Pointer(&data[0]), unsafe.Pointer(&out[0]), nValues, stride)
5050
}
5151

@@ -55,6 +55,6 @@ func decodeByteStreamSplitBatchWidth8NEON(data []byte, nValues, stride int, out
5555
}
5656
const width = 8
5757
debug.Assert(len(out) >= nValues*width, fmt.Sprintf("not enough space in output buffer for decoding, out: %d bytes, data: %d bytes", len(out), len(data)))
58-
debug.Assert(len(data) >= width*stride, fmt.Sprintf("not enough data for decoding, data: %d bytes, expected at least: %d bytes", len(data), width*stride))
58+
debug.Assert(len(data) >= 7*stride+nValues, fmt.Sprintf("not enough data for decoding, data: %d bytes, expected at least: %d bytes", len(data), 7*stride+nValues))
5959
_decodeByteStreamSplitWidth8NEON(unsafe.Pointer(&data[0]), unsafe.Pointer(&out[0]), nValues, stride)
6060
}

parquet/internal/encoding/byte_stream_split_decode.go

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,8 @@ var (
3535
func decodeByteStreamSplitBatchWidth4Default(data []byte, nValues, stride int, out []byte) {
3636
const width = 4
3737
debug.Assert(len(out) >= nValues*width, fmt.Sprintf("not enough space in output buffer for decoding, out: %d bytes, data: %d bytes", len(out), len(data)))
38-
debug.Assert(len(data) >= width*stride, fmt.Sprintf("not enough data for decoding, data: %d bytes, expected at least: %d bytes", len(data), width*stride))
38+
// the beginning of the data slice can be truncated, but for valid encoding we need at least (width-1)*stride+nValues bytes
39+
debug.Assert(len(data) >= 3*stride+nValues, fmt.Sprintf("not enough data for decoding, data: %d bytes, expected at least: %d bytes", len(data), 3*stride+nValues))
3940
s0 := data[:nValues]
4041
s1 := data[stride : stride+nValues]
4142
s2 := data[2*stride : 2*stride+nValues]
@@ -53,7 +54,7 @@ func decodeByteStreamSplitBatchWidth4Default(data []byte, nValues, stride int, o
5354
func decodeByteStreamSplitBatchWidth8Default(data []byte, nValues, stride int, out []byte) {
5455
const width = 8
5556
debug.Assert(len(out) >= nValues*width, fmt.Sprintf("not enough space in output buffer for decoding, out: %d bytes, data: %d bytes", len(out), len(data)))
56-
debug.Assert(len(data) >= width*stride, fmt.Sprintf("not enough data for decoding, data: %d bytes, expected at least: %d bytes", len(data), width*stride))
57+
debug.Assert(len(data) >= 7*stride+nValues, fmt.Sprintf("not enough data for decoding, data: %d bytes, expected at least: %d bytes", len(data), 7*stride+nValues))
5758
s0 := data[:nValues]
5859
s1 := data[stride : stride+nValues]
5960
s2 := data[2*stride : 2*stride+nValues]
@@ -75,7 +76,7 @@ func decodeByteStreamSplitBatchWidth8Default(data []byte, nValues, stride int, o
7576
// 'out' must have space for at least nValues slices.
7677
func decodeByteStreamSplitBatchFLBA(data []byte, nValues, stride, width int, out []parquet.FixedLenByteArray) {
7778
debug.Assert(len(out) >= nValues, fmt.Sprintf("not enough space in output slice for decoding, out: %d values, data: %d values", len(out), nValues))
78-
debug.Assert(len(data) >= width*stride, fmt.Sprintf("not enough data for decoding, data: %d bytes, expected at least: %d bytes", len(data), width*stride))
79+
debug.Assert(len(data) >= (width-1)*stride+nValues, fmt.Sprintf("not enough data for decoding, data: %d bytes, expected at least: %d bytes", len(data), (width-1)*stride+nValues))
7980
for stream := 0; stream < width; stream++ {
8081
for element := 0; element < nValues; element++ {
8182
encLoc := stride*stream + element
@@ -90,7 +91,7 @@ func decodeByteStreamSplitBatchFLBA(data []byte, nValues, stride, width int, out
9091
func decodeByteStreamSplitBatchFLBAWidth2(data []byte, nValues, stride int, out []parquet.FixedLenByteArray) {
9192
const width = 2
9293
debug.Assert(len(out) >= nValues, fmt.Sprintf("not enough space in output slice for decoding, out: %d values, data: %d values", len(out), nValues))
93-
debug.Assert(len(data) >= width*stride, fmt.Sprintf("not enough data for decoding, data: %d bytes, expected at least: %d bytes", len(data), width*stride))
94+
debug.Assert(len(data) >= stride+nValues, fmt.Sprintf("not enough data for decoding, data: %d bytes, expected at least: %d bytes", len(data), stride+nValues))
9495
s0 := data[:nValues]
9596
s1 := data[stride : stride+nValues]
9697
for i := range nValues {
@@ -105,7 +106,7 @@ func decodeByteStreamSplitBatchFLBAWidth2(data []byte, nValues, stride int, out
105106
func decodeByteStreamSplitBatchFLBAWidth4(data []byte, nValues, stride int, out []parquet.FixedLenByteArray) {
106107
const width = 4
107108
debug.Assert(len(out) >= nValues, fmt.Sprintf("not enough space in output slice for decoding, out: %d values, data: %d values", len(out), nValues))
108-
debug.Assert(len(data) >= width*stride, fmt.Sprintf("not enough data for decoding, data: %d bytes, expected at least: %d bytes", len(data), width*stride))
109+
debug.Assert(len(data) >= 3*stride+nValues, fmt.Sprintf("not enough data for decoding, data: %d bytes, expected at least: %d bytes", len(data), 3*stride+nValues))
109110
s0 := data[:nValues]
110111
s1 := data[stride : stride+nValues]
111112
s2 := data[stride*2 : stride*2+nValues]
@@ -122,7 +123,7 @@ func decodeByteStreamSplitBatchFLBAWidth4(data []byte, nValues, stride int, out
122123
func decodeByteStreamSplitBatchFLBAWidth8(data []byte, nValues, stride int, out []parquet.FixedLenByteArray) {
123124
const width = 8
124125
debug.Assert(len(out) >= nValues, fmt.Sprintf("not enough space in output slice for decoding, out: %d values, data: %d values", len(out), nValues))
125-
debug.Assert(len(data) >= width*stride, fmt.Sprintf("not enough data for decoding, data: %d bytes, expected at least: %d bytes", len(data), width*stride))
126+
debug.Assert(len(data) >= 7*stride+nValues, fmt.Sprintf("not enough data for decoding, data: %d bytes, expected at least: %d bytes", len(data), 7*stride+nValues))
126127
s0 := data[:nValues]
127128
s1 := data[stride : stride+nValues]
128129
s2 := data[stride*2 : stride*2+nValues]

0 commit comments

Comments
 (0)