diff --git a/arrow-array/src/array/struct_array.rs b/arrow-array/src/array/struct_array.rs index 1404fc08fe8..96361c4ab53 100644 --- a/arrow-array/src/array/struct_array.rs +++ b/arrow-array/src/array/struct_array.rs @@ -294,10 +294,19 @@ impl StructArray { impl From for StructArray { fn from(data: ArrayData) -> Self { + let parent_offset = data.offset(); + let parent_len = data.len(); + let fields = data .child_data() .iter() - .map(|cd| make_array(cd.clone())) + .map(|cd| { + if parent_offset != 0 || parent_len != cd.len() { + make_array(cd.slice(parent_offset, parent_len)) + } else { + make_array(cd.clone()) + } + }) .collect(); Self { @@ -523,6 +532,81 @@ mod tests { assert_eq!(0, struct_array.offset()); } + #[test] + fn test_struct_array_from_data_with_offset_and_length() { + // Various ways to make the struct array: + // + // [{x: 2}, {x: 3}, None] + // + // from slicing larger buffers/arrays with offsets and lengths + let int_arr = Int32Array::from(vec![1, 2, 3, 4, 5]); + let int_field = Field::new("x", DataType::Int32, false); + let struct_nulls = NullBuffer::new(BooleanBuffer::from(vec![true, true, false])); + let int_data = int_arr.to_data(); + // Case 1: Offset + length, nulls are not sliced + let case1 = ArrayData::builder(DataType::Struct(Fields::from(vec![int_field.clone()]))) + .len(3) + .offset(1) + .nulls(Some(struct_nulls)) + .add_child_data(int_data.clone()) + .build() + .unwrap(); + + // Case 2: Offset + length, nulls are sliced + let struct_nulls = + NullBuffer::new(BooleanBuffer::from(vec![true, true, true, false, true]).slice(1, 3)); + let case2 = ArrayData::builder(DataType::Struct(Fields::from(vec![int_field.clone()]))) + .len(3) + .offset(1) + .nulls(Some(struct_nulls.clone())) + .add_child_data(int_data.clone()) + .build() + .unwrap(); + + // Case 3: struct length is smaller than child length but no offset + let offset_int_data = int_data.slice(1, 4); + let case3 = ArrayData::builder(DataType::Struct(Fields::from(vec![int_field.clone()]))) + .len(3) + .nulls(Some(struct_nulls)) + .add_child_data(offset_int_data) + .build() + .unwrap(); + + let expected = StructArray::new( + Fields::from(vec![int_field.clone()]), + vec![Arc::new(int_arr)], + Some(NullBuffer::new(BooleanBuffer::from(vec![ + true, true, true, false, true, + ]))), + ) + .slice(1, 3); + + for case in [case1, case2, case3] { + let struct_arr_from_data = StructArray::from(case); + assert_eq!(struct_arr_from_data, expected); + assert_eq!(struct_arr_from_data.column(0), expected.column(0)); + } + } + + #[test] + #[should_panic(expected = "assertion failed: (offset + length) <= self.len()")] + fn test_struct_array_from_data_with_offset_and_length_error() { + let int_arr = Int32Array::from(vec![1, 2, 3, 4, 5]); + let int_field = Field::new("x", DataType::Int32, false); + let struct_nulls = NullBuffer::new(BooleanBuffer::from(vec![true, true, false])); + let int_data = int_arr.to_data(); + // If parent offset is 3 and len is 3 then child must have 6 items + let struct_data = + ArrayData::builder(DataType::Struct(Fields::from(vec![int_field.clone()]))) + .len(3) + .offset(3) + .nulls(Some(struct_nulls)) + .add_child_data(int_data) + .build() + .unwrap(); + let _ = StructArray::from(struct_data); + } + /// validates that struct can be accessed using `column_name` as index i.e. `struct_array["column_name"]`. #[test] fn test_struct_array_index_access() { diff --git a/arrow-ipc/src/reader.rs b/arrow-ipc/src/reader.rs index a0bbf38181c..83dc5702dc9 100644 --- a/arrow-ipc/src/reader.rs +++ b/arrow-ipc/src/reader.rs @@ -2548,12 +2548,7 @@ mod tests { fn test_invalid_struct_array_ipc_read_errors() { let a_field = Field::new("a", DataType::Int32, false); let b_field = Field::new("b", DataType::Int32, false); - - let schema = Arc::new(Schema::new(vec![Field::new_struct( - "s", - vec![a_field.clone(), b_field.clone()], - false, - )])); + let struct_fields = Fields::from(vec![a_field.clone(), b_field.clone()]); let a_array_data = ArrayData::builder(a_field.data_type().clone()) .len(4) @@ -2566,17 +2561,14 @@ mod tests { .build() .unwrap(); - let struct_data_type = schema.field(0).data_type(); - let invalid_struct_arr = unsafe { - make_array( - ArrayData::builder(struct_data_type.clone()) - .len(4) - .add_child_data(a_array_data) - .add_child_data(b_array_data) - .build_unchecked(), + StructArray::new_unchecked( + struct_fields, + vec![make_array(a_array_data), make_array(b_array_data)], + None, ) }; + expect_ipc_validation_error( Arc::new(invalid_struct_arr), "Invalid argument error: Incorrect array length for StructArray field \"b\", expected 4 got 3",