Skip to content

Commit 5bf10f2

Browse files
zeroshadeMatt
andauthored
perf(parquet/file): avoid double bool bitmap conversion (#707)
### Rationale for this change Boolean columns currently get double converted when transferring between Arrow and Parquet ### What changes are included in this PR? **1. Arrow bitutil (`arrow/bitutil/bitmaps.go`)** - Added `AppendBitmap()` method to `BitmapWriter` - Directly copies bits from source bitmap using efficient `CopyBitmap()` **2. Parquet encoder (`parquet/internal/encoding/boolean_encoder.go`)** - Added `PutBitmap()` method to `PlainBooleanEncoder` - Writes bitmap data directly without bool slice conversion **3. Parquet decoder (`parquet/internal/encoding/boolean_decoder.go`)** - Added `DecodeToBitmap()` method to `PlainBooleanDecoder` - Reads directly into output bitmap - Optimized fast path for byte-aligned cases **4. Column writer (`parquet/file/column_writer_types.gen.go`)** - Added `WriteBitmapBatch()` for non-nullable boolean columns - Added `WriteBitmapBatchSpaced()` for nullable boolean columns - Internal helper methods `writeBitmapValues()` and `writeBitmapValuesSpaced()` **5. Arrow-Parquet bridge (`parquet/pqarrow/encode_arrow.go`)** - Modified `writeDenseArrow()` to detect boolean arrays - Uses bitmap methods when available - Falls back to original `[]bool` path if needed (backward compatible) ### Are these changes tested? Yes, and new benchmarks are added as appropriate ### Are there any user-facing changes? Just performance: ### Non-Nullable Boolean Columns ``` BenchmarkBooleanBitmapWrite/1K-16 314847 19126 ns/op 6.54 MB/s 36057 B/op 237 allocs/op BenchmarkBooleanBitmapWrite/10K-16 174715 33985 ns/op 36.78 MB/s 53266 B/op 247 allocs/op BenchmarkBooleanBitmapWrite/100K-16 34099 175655 ns/op 71.16 MB/s 218866 B/op 340 allocs/op BenchmarkBooleanBitmapWrite/1M-16 3778 1568818 ns/op 79.68 MB/s 1763712 B/op 1237 allocs/op ``` ### Nullable Boolean Columns (10% null rate) ``` BenchmarkBooleanBitmapWriteNullable/1K-16 214921 28002 ns/op 4.46 MB/s 39706 B/op 249 allocs/op BenchmarkBooleanBitmapWriteNullable/10K-16 44618 134483 ns/op 9.29 MB/s 113690 B/op 268 allocs/op BenchmarkBooleanBitmapWriteNullable/100K-16 5239 1149658 ns/op 10.87 MB/s 657178 B/op 451 allocs/op BenchmarkBooleanBitmapWriteNullable/1M-16 556 10926274 ns/op 11.44 MB/s 5575200 B/op 2219 allocs/op ``` **Key Observations:** - Direct bitmap path successfully avoids `[]bool` conversion - Throughput scales well with data size (6.5 → 80 MB/s for non-nullable) - Memory usage remains efficient with minimal allocations per operation - Nullable columns have overhead from validity bitmap processing (expected) --------- Co-authored-by: Matt <zero@gibson>
1 parent fc20f37 commit 5bf10f2

11 files changed

Lines changed: 868 additions & 9 deletions

arrow/bitutil/bitmaps.go

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,39 @@ func (b *BitmapWriter) AppendBools(in []bool) int {
167167
return space
168168
}
169169

170+
// AppendBitmap writes bits directly from a source bitmap to this bitmap writer,
171+
// avoiding the intermediate []bool conversion. Returns the number of bits written.
172+
func (b *BitmapWriter) AppendBitmap(srcBitmap []byte, srcOffset int64, length int64) int64 {
173+
space := int64(min(b.length-b.pos, int(length)))
174+
if space == 0 {
175+
return 0
176+
}
177+
178+
bitOffset := bits.TrailingZeros32(uint32(b.bitMask))
179+
dstOffset := int64(b.byteOffset)*8 + int64(bitOffset)
180+
181+
// Flush curByte to buffer before CopyBitmap overwrites it
182+
// Similar to how AppendBools writes curByte to appslice[0]
183+
b.buf[b.byteOffset] = b.curByte
184+
185+
// Use CopyBitmap for efficient bit-level copying
186+
CopyBitmap(srcBitmap, int(srcOffset), int(space), b.buf, int(dstOffset))
187+
188+
// Update writer state
189+
b.pos += int(space)
190+
newBitOffset := (bitOffset + int(space)) % 8
191+
b.bitMask = BitMask[newBitOffset]
192+
b.byteOffset += (bitOffset + int(space)) / 8
193+
194+
// Reload curByte to reflect the current byte's state after CopyBitmap
195+
// We must reload even if pos == length, as Finish() may need to write curByte
196+
if b.byteOffset < len(b.buf) {
197+
b.curByte = b.buf[b.byteOffset]
198+
}
199+
200+
return space
201+
}
202+
170203
// Finish flushes the final byte out to the byteslice in case it was not already
171204
// on a byte aligned boundary.
172205
func (b *BitmapWriter) Finish() {

arrow/bitutil/bitmaps_test.go

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -596,3 +596,133 @@ func BenchmarkBitmapAnd(b *testing.B) {
596596
})
597597
}
598598
}
599+
600+
func TestBitmapWriterAppendBitmap(t *testing.T) {
601+
tests := []struct {
602+
name string
603+
srcBits []bool
604+
dstOffset int
605+
srcOffset int64
606+
length int64
607+
wantResult []bool
608+
}{
609+
{
610+
name: "append_aligned",
611+
srcBits: []bool{true, false, true, true, false, false, true, false},
612+
dstOffset: 0,
613+
srcOffset: 0,
614+
length: 8,
615+
wantResult: []bool{true, false, true, true, false, false, true, false},
616+
},
617+
{
618+
name: "append_unaligned_source",
619+
srcBits: []bool{false, false, true, false, true, true, false, false, true, false},
620+
dstOffset: 0,
621+
srcOffset: 2,
622+
length: 6,
623+
wantResult: []bool{true, false, true, true, false, false},
624+
},
625+
{
626+
name: "append_unaligned_dest",
627+
srcBits: []bool{true, true, false, false},
628+
dstOffset: 3,
629+
srcOffset: 0,
630+
length: 4,
631+
wantResult: []bool{true, true, false, false},
632+
},
633+
{
634+
name: "append_partial_byte",
635+
srcBits: []bool{true, false, true},
636+
dstOffset: 0,
637+
srcOffset: 0,
638+
length: 3,
639+
wantResult: []bool{true, false, true},
640+
},
641+
{
642+
name: "append_multiple_bytes",
643+
srcBits: []bool{true, false, true, false, true, false, true, false, false, true, false, true, false, true, false, true},
644+
dstOffset: 0,
645+
srcOffset: 0,
646+
length: 16,
647+
wantResult: []bool{true, false, true, false, true, false, true, false, false, true, false, true, false, true, false, true},
648+
},
649+
}
650+
651+
for _, tt := range tests {
652+
t.Run(tt.name, func(t *testing.T) {
653+
// Create source bitmap
654+
srcBytes := make([]byte, bitutil.BytesForBits(int64(len(tt.srcBits))))
655+
for i, bit := range tt.srcBits {
656+
if bit {
657+
bitutil.SetBit(srcBytes, i)
658+
}
659+
}
660+
661+
// Create destination bitmap
662+
dstBytes := make([]byte, bitutil.BytesForBits(int64(tt.dstOffset+len(tt.wantResult))))
663+
writer := bitutil.NewBitmapWriter(dstBytes, tt.dstOffset, len(tt.wantResult))
664+
665+
// Append bitmap
666+
written := writer.AppendBitmap(srcBytes, tt.srcOffset, tt.length)
667+
writer.Finish()
668+
669+
// Verify
670+
assert.Equal(t, tt.length, written, "wrong number of bits written")
671+
for i, expectedBit := range tt.wantResult {
672+
actualBit := bitutil.BitIsSet(dstBytes, tt.dstOffset+i)
673+
assert.Equal(t, expectedBit, actualBit, "bit mismatch at position %d", i)
674+
}
675+
})
676+
}
677+
}
678+
679+
func TestBitmapWriterAppendBitmapEmpty(t *testing.T) {
680+
dstBytes := make([]byte, 10)
681+
writer := bitutil.NewBitmapWriter(dstBytes, 0, 8)
682+
683+
// Append zero bits
684+
written := writer.AppendBitmap([]byte{0xFF}, 0, 0)
685+
assert.Equal(t, int64(0), written)
686+
}
687+
688+
func TestBitmapWriterAppendBitmapFull(t *testing.T) {
689+
dstBytes := make([]byte, 1)
690+
writer := bitutil.NewBitmapWriter(dstBytes, 0, 4)
691+
692+
srcBytes := []byte{0xFF}
693+
694+
// Write 4 bits
695+
written := writer.AppendBitmap(srcBytes, 0, 4)
696+
assert.Equal(t, int64(4), written)
697+
698+
// Try to write more (should write 0 since buffer is full)
699+
written = writer.AppendBitmap(srcBytes, 0, 4)
700+
assert.Equal(t, int64(0), written)
701+
}
702+
703+
func TestBitmapWriterAppendBitmapLarge(t *testing.T) {
704+
// Test with large bitmap (1024 bits = 128 bytes)
705+
numBits := 1024
706+
srcBytes := make([]byte, bitutil.BytesForBits(int64(numBits)))
707+
dstBytes := make([]byte, bitutil.BytesForBits(int64(numBits)))
708+
709+
// Create alternating pattern
710+
for i := 0; i < numBits; i++ {
711+
if i%2 == 0 {
712+
bitutil.SetBit(srcBytes, i)
713+
}
714+
}
715+
716+
writer := bitutil.NewBitmapWriter(dstBytes, 0, numBits)
717+
written := writer.AppendBitmap(srcBytes, 0, int64(numBits))
718+
writer.Finish()
719+
720+
assert.Equal(t, int64(numBits), written)
721+
722+
// Verify pattern
723+
for i := 0; i < numBits; i++ {
724+
expected := i%2 == 0
725+
actual := bitutil.BitIsSet(dstBytes, i)
726+
assert.Equal(t, expected, actual, "bit mismatch at position %d", i)
727+
}
728+
}

parquet/file/column_writer_test.go

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -862,3 +862,94 @@ func TestWriteDataFailure(t *testing.T) {
862862
assert.Equal(t, err, failureErr)
863863
assert.Equal(t, int64(0), wr.TotalBytesWritten())
864864
}
865+
866+
func (b *BooleanValueWriterSuite) TestWriteBitmapBatch() {
867+
b.SetupSchema(parquet.Repetitions.Required, 1)
868+
writer := b.buildWriter(SmallSize, parquet.DefaultColumnProperties(), parquet.WithVersion(parquet.V1_0)).(*file.BooleanColumnChunkWriter)
869+
870+
// Create test bitmap with alternating pattern for SmallSize elements
871+
expected := make([]bool, SmallSize)
872+
bitmapBytes := make([]byte, bitutil.BytesForBits(int64(SmallSize)))
873+
for i := 0; i < SmallSize; i++ {
874+
val := (i % 4) < 2 // Pattern: true, true, false, false, repeat
875+
expected[i] = val
876+
if val {
877+
bitutil.SetBit(bitmapBytes, i)
878+
}
879+
}
880+
881+
// Write using WriteBitmapBatch
882+
n, err := writer.WriteBitmapBatch(bitmapBytes, 0, SmallSize, nil, nil)
883+
b.NoError(err)
884+
b.Equal(int64(SmallSize), n)
885+
886+
writer.Close()
887+
b.readColumn(compress.Codecs.Uncompressed)
888+
b.Equal(expected, b.ValuesOut)
889+
}
890+
891+
func (b *BooleanValueWriterSuite) TestWriteBitmapBatchUnaligned() {
892+
b.SetupSchema(parquet.Repetitions.Required, 1)
893+
writer := b.buildWriter(SmallSize, parquet.DefaultColumnProperties(), parquet.WithVersion(parquet.V1_0)).(*file.BooleanColumnChunkWriter)
894+
895+
// Create source bitmap with more elements
896+
srcSize := SmallSize + 10
897+
srcBits := make([]bool, srcSize)
898+
bitmapBytes := make([]byte, bitutil.BytesForBits(int64(srcSize)))
899+
for i := 0; i < srcSize; i++ {
900+
val := (i % 3) == 0
901+
srcBits[i] = val
902+
if val {
903+
bitutil.SetBit(bitmapBytes, i)
904+
}
905+
}
906+
907+
// Write SmallSize bits starting from offset 5
908+
expected := srcBits[5 : 5+SmallSize]
909+
n, err := writer.WriteBitmapBatch(bitmapBytes, 5, SmallSize, nil, nil)
910+
b.NoError(err)
911+
b.Equal(int64(SmallSize), n)
912+
913+
writer.Close()
914+
b.readColumn(compress.Codecs.Uncompressed)
915+
b.Equal(expected, b.ValuesOut)
916+
}
917+
918+
func (b *BooleanValueWriterSuite) TestWriteBitmapBatchSpaced() {
919+
b.SetupSchema(parquet.Repetitions.Optional, 1)
920+
b.descr = b.Schema.Column(0)
921+
922+
// Create test data with nulls (every 4th element is null)
923+
bitmapBytes := make([]byte, bitutil.BytesForBits(int64(SmallSize)))
924+
validBits := make([]byte, bitutil.BytesForBits(int64(SmallSize)))
925+
defLevels := make([]int16, SmallSize)
926+
expected := make([]bool, 0)
927+
928+
for i := 0; i < SmallSize; i++ {
929+
if i%4 == 0 {
930+
// Null value
931+
defLevels[i] = 0
932+
} else {
933+
// Valid value
934+
defLevels[i] = 1
935+
bitutil.SetBit(validBits, i)
936+
val := (i % 3) == 1
937+
if val {
938+
bitutil.SetBit(bitmapBytes, i)
939+
}
940+
expected = append(expected, val)
941+
}
942+
}
943+
944+
writer := b.buildWriter(int64(SmallSize), parquet.DefaultColumnProperties(), parquet.WithVersion(parquet.V1_0)).(*file.BooleanColumnChunkWriter)
945+
writer.WriteBitmapBatchSpaced(bitmapBytes, 0, SmallSize, defLevels, nil, validBits, 0)
946+
writer.Close()
947+
948+
// Read back with proper def levels setup
949+
b.GenerateData(SmallSize) // This initializes DefLevels and DefLevelsOut properly for optional
950+
values := b.readColumn(compress.Codecs.Uncompressed)
951+
b.Equal(int64(len(expected)), values) // Should read only non-null values
952+
953+
// ValuesOut should contain only the non-null values
954+
b.Equal(expected, b.ValuesOut.([]bool)[:values])
955+
}

0 commit comments

Comments
 (0)