Skip to content

Commit dae7a71

Browse files
authored
ArrayData Enumeration for Primitive, Binary and UTF8 (#3749)
* Add BooleanBuffer * Add NullBuffer * Add PrimitiveArrayData * Add BytesArrayData * Move module * Make private for now * Move NullBuffer to arrow-buffer * Format * More docs * Seal traits * Doc * Review feedback
1 parent 96791ea commit dae7a71

File tree

7 files changed

+812
-2
lines changed

7 files changed

+812
-2
lines changed

arrow-buffer/src/buffer/boolean.rs

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use crate::{bit_util, Buffer};
19+
20+
/// A slice-able [`Buffer`] containing bit-packed booleans
21+
#[derive(Debug, Clone)]
22+
pub struct BooleanBuffer {
23+
buffer: Buffer,
24+
offset: usize,
25+
len: usize,
26+
}
27+
28+
impl BooleanBuffer {
29+
/// Create a new [`BooleanBuffer`] from a [`Buffer`], an `offset` and `length` in bits
30+
///
31+
/// # Panics
32+
///
33+
/// This method will panic if `buffer` is not large enough
34+
pub fn new(buffer: Buffer, offset: usize, len: usize) -> Self {
35+
let total_len = offset.saturating_add(len);
36+
let bit_len = buffer.len().saturating_mul(8);
37+
assert!(total_len <= bit_len);
38+
Self {
39+
buffer,
40+
offset,
41+
len,
42+
}
43+
}
44+
45+
/// Returns the number of set bits in this buffer
46+
pub fn count_set_bits(&self) -> usize {
47+
self.buffer.count_set_bits_offset(self.offset, self.len)
48+
}
49+
50+
/// Returns `true` if the bit at index `i` is set
51+
///
52+
/// # Panics
53+
///
54+
/// Panics if `i >= self.len()`
55+
#[inline]
56+
pub fn is_set(&self, i: usize) -> bool {
57+
assert!(i < self.len);
58+
unsafe { bit_util::get_bit_raw(self.buffer.as_ptr(), i + self.offset) }
59+
}
60+
61+
/// Returns the offset of this [`BooleanBuffer`] in bits
62+
#[inline]
63+
pub fn offset(&self) -> usize {
64+
self.offset
65+
}
66+
67+
/// Returns the length of this [`BooleanBuffer`] in bits
68+
#[inline]
69+
pub fn len(&self) -> usize {
70+
self.len
71+
}
72+
73+
/// Returns true if this [`BooleanBuffer`] is empty
74+
#[inline]
75+
pub fn is_empty(&self) -> bool {
76+
self.len == 0
77+
}
78+
79+
/// Returns the packed values of this [`BooleanBuffer`] not including any offset
80+
#[inline]
81+
pub fn values(&self) -> &[u8] {
82+
&self.buffer
83+
}
84+
}

arrow-buffer/src/buffer/mod.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,10 @@ pub use immutable::*;
2525
mod mutable;
2626
pub use mutable::*;
2727
mod ops;
28+
pub use ops::*;
2829
mod scalar;
2930
pub use scalar::*;
30-
31-
pub use ops::*;
31+
mod boolean;
32+
pub use boolean::*;
33+
mod null;
34+
pub use null::*;

arrow-buffer/src/buffer/null.rs

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use crate::buffer::BooleanBuffer;
19+
20+
#[derive(Debug, Clone)]
21+
pub struct NullBuffer {
22+
buffer: BooleanBuffer,
23+
null_count: usize,
24+
}
25+
26+
impl NullBuffer {
27+
/// Create a new [`NullBuffer`] computing the null count
28+
pub fn new(buffer: BooleanBuffer) -> Self {
29+
let null_count = buffer.len() - buffer.count_set_bits();
30+
Self { buffer, null_count }
31+
}
32+
33+
/// Create a new [`NullBuffer`] with the provided `buffer` and `null_count`
34+
///
35+
/// # Safety
36+
///
37+
/// `buffer` must contain `null_count` `0` bits
38+
pub unsafe fn new_unchecked(buffer: BooleanBuffer, null_count: usize) -> Self {
39+
Self { buffer, null_count }
40+
}
41+
42+
/// Returns the length of this [`NullBuffer`]
43+
#[inline]
44+
pub fn len(&self) -> usize {
45+
self.buffer.len()
46+
}
47+
48+
/// Returns true if this [`NullBuffer`] is empty
49+
#[inline]
50+
pub fn is_empty(&self) -> bool {
51+
self.buffer.is_empty()
52+
}
53+
54+
/// Returns the null count for this [`NullBuffer`]
55+
#[inline]
56+
pub fn null_count(&self) -> usize {
57+
self.null_count
58+
}
59+
60+
/// Returns `true` if the value at `idx` is not null
61+
#[inline]
62+
pub fn is_valid(&self, idx: usize) -> bool {
63+
self.buffer.is_set(idx)
64+
}
65+
66+
/// Returns `true` if the value at `idx` is null
67+
#[inline]
68+
pub fn is_null(&self, idx: usize) -> bool {
69+
!self.is_valid(idx)
70+
}
71+
72+
/// Returns the inner buffer
73+
#[inline]
74+
pub fn inner(&self) -> &BooleanBuffer {
75+
&self.buffer
76+
}
77+
}
78+
79+
#[cfg(test)]
80+
mod tests {
81+
use super::*;
82+
#[test]
83+
fn test_size() {
84+
// This tests that the niche optimisation eliminates the overhead of an option
85+
assert_eq!(
86+
std::mem::size_of::<NullBuffer>(),
87+
std::mem::size_of::<Option<NullBuffer>>()
88+
);
89+
}
90+
}

0 commit comments

Comments
 (0)