Skip to content

Commit bb4fc59

Browse files
authored
Cleanup FFI interface (#3684) (#3683) (#3685)
* Cleanup FFI interface (#3684) (#3683) * Add import example * Tweak doc example * Use ManuallyDrop to model external memory
1 parent 98ce68f commit bb4fc59

File tree

4 files changed

+126
-95
lines changed

4 files changed

+126
-95
lines changed

arrow/src/array/ffi.rs

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ impl TryFrom<ArrayData> for ffi::ArrowArray {
4747
/// # Safety
4848
/// Assumes that these pointers represent valid C Data Interfaces, both in memory
4949
/// representation and lifetime via the `release` mechanism.
50+
#[deprecated(note = "Use ArrowArray::new")]
51+
#[allow(deprecated)]
5052
pub unsafe fn make_array_from_raw(
5153
array: *const ffi::FFI_ArrowArray,
5254
schema: *const ffi::FFI_ArrowSchema,
@@ -91,30 +93,22 @@ mod tests {
9193
StructArray, UInt32Array, UInt64Array,
9294
},
9395
datatypes::{DataType, Field},
94-
ffi::ArrowArray,
96+
ffi::{ArrowArray, FFI_ArrowArray, FFI_ArrowSchema},
9597
};
9698
use std::convert::TryFrom;
9799
use std::sync::Arc;
98100

99101
fn test_round_trip(expected: &ArrayData) -> Result<()> {
100-
// create a `ArrowArray` from the data.
101-
let d1 = ArrowArray::try_from(expected.clone())?;
102-
103-
// here we export the array as 2 pointers. We would have no control over ownership if it was not for
104-
// the release mechanism.
105-
let (array, schema) = ArrowArray::into_raw(d1);
102+
// here we export the array
103+
let array = FFI_ArrowArray::new(expected);
104+
let schema = FFI_ArrowSchema::try_from(expected.data_type())?;
106105

107106
// simulate an external consumer by being the consumer
108-
let d1 = unsafe { ArrowArray::try_from_raw(array, schema) }?;
107+
let d1 = ArrowArray::new(array, schema);
109108

110109
let result = &ArrayData::try_from(d1)?;
111110

112111
assert_eq!(result, expected);
113-
114-
unsafe {
115-
Arc::from_raw(array);
116-
Arc::from_raw(schema);
117-
}
118112
Ok(())
119113
}
120114

arrow/src/array/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ pub use arrow_data::{
3434
pub use arrow_data::transform::{Capacities, MutableArrayData};
3535

3636
#[cfg(feature = "ffi")]
37+
#[allow(deprecated)]
3738
pub use self::ffi::{export_array_into_raw, make_array_from_raw};
3839

3940
// --------------------- Array's values comparison ---------------------

arrow/src/ffi.rs

Lines changed: 111 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -24,68 +24,63 @@
2424
//! The second interface maps native Rust types to the Rust-specific implementation of Arrow such as `format` to `Datatype`,
2525
//! `Buffer`, etc. This is handled by `ArrowArray`.
2626
//!
27+
//!
28+
//! Export to FFI
29+
//!
2730
//! ```rust
2831
//! # use std::sync::Arc;
29-
//! # use arrow::array::{Int32Array, Array, ArrayData, export_array_into_raw, make_array, make_array_from_raw};
30-
//! # use arrow::error::{Result, ArrowError};
32+
//! # use arrow::array::{Int32Array, Array, ArrayData, make_array};
33+
//! # use arrow::error::Result;
3134
//! # use arrow::compute::kernels::arithmetic;
3235
//! # use arrow::ffi::{ArrowArray, FFI_ArrowArray, FFI_ArrowSchema};
33-
//! # use std::convert::TryFrom;
3436
//! # fn main() -> Result<()> {
3537
//! // create an array natively
3638
//! let array = Int32Array::from(vec![Some(1), None, Some(3)]);
39+
//! let data = array.into_data();
3740
//!
38-
//! // export it
39-
//!
40-
//! let ffi_array = ArrowArray::try_new(array.data().clone())?;
41-
//! let (array_ptr, schema_ptr) = ArrowArray::into_raw(ffi_array);
42-
//!
43-
//! // consumed and used by something else...
41+
//! // Export it
42+
//! let out_array = FFI_ArrowArray::new(&data);
43+
//! let out_schema = FFI_ArrowSchema::try_from(data.data_type())?;
4444
//!
4545
//! // import it
46-
//! let array = unsafe { make_array_from_raw(array_ptr, schema_ptr)? };
46+
//! let array = ArrowArray::new(out_array, out_schema);
47+
//! let array = Int32Array::from(ArrayData::try_from(array)?);
4748
//!
4849
//! // perform some operation
49-
//! let array = array.as_any().downcast_ref::<Int32Array>().ok_or(
50-
//! ArrowError::ParseError("Expects an int32".to_string()),
51-
//! )?;
5250
//! let array = arithmetic::add(&array, &array)?;
5351
//!
5452
//! // verify
5553
//! assert_eq!(array, Int32Array::from(vec![Some(2), None, Some(6)]));
54+
//! #
55+
//! # Ok(())
56+
//! # }
57+
//! ```
5658
//!
57-
//! // Simulate if raw pointers are provided by consumer
58-
//! let array = make_array(Int32Array::from(vec![Some(1), None, Some(3)]).into_data());
59-
//!
60-
//! let out_array = Box::new(FFI_ArrowArray::empty());
61-
//! let out_schema = Box::new(FFI_ArrowSchema::empty());
62-
//! let out_array_ptr = Box::into_raw(out_array);
63-
//! let out_schema_ptr = Box::into_raw(out_schema);
64-
//!
65-
//! // export array into raw pointers from consumer
66-
//! unsafe { export_array_into_raw(array, out_array_ptr, out_schema_ptr)?; };
67-
//!
68-
//! // import it
69-
//! let array = unsafe { make_array_from_raw(out_array_ptr, out_schema_ptr)? };
59+
//! Import from FFI
7060
//!
71-
//! // perform some operation
72-
//! let array = array.as_any().downcast_ref::<Int32Array>().ok_or(
73-
//! ArrowError::ParseError("Expects an int32".to_string()),
74-
//! )?;
75-
//! let array = arithmetic::add(&array, &array)?;
76-
//!
77-
//! // verify
78-
//! assert_eq!(array, Int32Array::from(vec![Some(2), None, Some(6)]));
61+
//! ```
62+
//! # use std::ptr::addr_of_mut;
63+
//! # use arrow::ffi::{ArrowArray, FFI_ArrowArray, FFI_ArrowSchema};
64+
//! # use arrow_array::{ArrayRef, make_array};
65+
//! # use arrow_schema::ArrowError;
66+
//! #
67+
//! /// A foreign data container that can export to C Data interface
68+
//! struct ForeignArray {};
7969
//!
80-
//! // (drop/release)
81-
//! unsafe {
82-
//! Box::from_raw(out_array_ptr);
83-
//! Box::from_raw(out_schema_ptr);
84-
//! Arc::from_raw(array_ptr);
85-
//! Arc::from_raw(schema_ptr);
70+
//! impl ForeignArray {
71+
//! /// Export from foreign array representation to C Data interface
72+
//! /// e.g. <https://github.com/apache/arrow/blob/fc1f9ebbc4c3ae77d5cfc2f9322f4373d3d19b8a/python/pyarrow/array.pxi#L1552>
73+
//! fn export_to_c(&self, array: *mut FFI_ArrowArray, schema: *mut FFI_ArrowSchema) {
74+
//! // ...
75+
//! }
8676
//! }
8777
//!
88-
//! Ok(())
78+
//! /// Import an [`ArrayRef`] from a [`ForeignArray`]
79+
//! fn import_array(foreign: &ForeignArray) -> Result<ArrayRef, ArrowError> {
80+
//! let mut schema = FFI_ArrowSchema::empty();
81+
//! let mut array = FFI_ArrowArray::empty();
82+
//! foreign.export_to_c(addr_of_mut!(array), addr_of_mut!(schema));
83+
//! Ok(make_array(ArrowArray::new(array, schema).try_into()?))
8984
//! }
9085
//! ```
9186
@@ -139,7 +134,15 @@ bitflags! {
139134

140135
/// ABI-compatible struct for `ArrowSchema` from C Data Interface
141136
/// See <https://arrow.apache.org/docs/format/CDataInterface.html#structure-definitions>
142-
/// This was created by bindgen
137+
///
138+
/// ```
139+
/// # use arrow::ffi::FFI_ArrowSchema;
140+
/// # use arrow_data::ArrayData;
141+
/// fn array_schema(data: &ArrayData) -> FFI_ArrowSchema {
142+
/// FFI_ArrowSchema::try_from(data.data_type()).unwrap()
143+
/// }
144+
/// ```
145+
///
143146
#[repr(C)]
144147
#[derive(Debug)]
145148
pub struct FFI_ArrowSchema {
@@ -394,7 +397,14 @@ fn bit_width(data_type: &DataType, i: usize) -> Result<usize> {
394397

395398
/// ABI-compatible struct for ArrowArray from C Data Interface
396399
/// See <https://arrow.apache.org/docs/format/CDataInterface.html#structure-definitions>
397-
/// This was created by bindgen
400+
///
401+
/// ```
402+
/// # use arrow::ffi::FFI_ArrowArray;
403+
/// # use arrow_array::Array;
404+
/// fn export_array(array: &dyn Array) -> FFI_ArrowArray {
405+
/// FFI_ArrowArray::new(array.data())
406+
/// }
407+
/// ```
398408
#[repr(C)]
399409
#[derive(Debug)]
400410
pub struct FFI_ArrowArray {
@@ -859,6 +869,14 @@ impl<'a> ArrowArrayRef for ArrowArrayChild<'a> {
859869
}
860870

861871
impl ArrowArray {
872+
/// Creates a new [`ArrowArray`] from the provided array and schema
873+
pub fn new(array: FFI_ArrowArray, schema: FFI_ArrowSchema) -> Self {
874+
Self {
875+
array: Arc::new(array),
876+
schema: Arc::new(schema),
877+
}
878+
}
879+
862880
/// creates a new `ArrowArray`. This is used to export to the C Data Interface.
863881
///
864882
/// # Memory Leaks
@@ -878,6 +896,7 @@ impl ArrowArray {
878896
/// on managing the allocation of the structs by themselves.
879897
/// # Error
880898
/// Errors if any of the pointers is null
899+
#[deprecated(note = "Use ArrowArray::new")]
881900
pub unsafe fn try_from_raw(
882901
array: *const FFI_ArrowArray,
883902
schema: *const FFI_ArrowSchema,
@@ -911,6 +930,7 @@ impl ArrowArray {
911930
}
912931

913932
/// exports [ArrowArray] to the C Data Interface
933+
#[deprecated(note = "Use FFI_ArrowArray and FFI_ArrowSchema directly")]
914934
pub fn into_raw(this: ArrowArray) -> (*const FFI_ArrowArray, *const FFI_ArrowSchema) {
915935
(Arc::into_raw(this.array), Arc::into_raw(this.schema))
916936
}
@@ -946,28 +966,49 @@ mod tests {
946966
use arrow_array::types::{Float64Type, Int32Type};
947967
use arrow_array::{Float64Array, UnionArray};
948968
use std::convert::TryFrom;
969+
use std::mem::ManuallyDrop;
970+
use std::ptr::addr_of_mut;
949971

950972
#[test]
951-
fn test_round_trip() -> Result<()> {
973+
fn test_round_trip() {
952974
// create an array natively
953975
let array = Int32Array::from(vec![1, 2, 3]);
954976

955977
// export it
956-
let array = ArrowArray::try_from(array.into_data())?;
978+
let array = ArrowArray::try_from(array.into_data()).unwrap();
957979

958980
// (simulate consumer) import it
959-
let data = ArrayData::try_from(array)?;
960-
let array = make_array(data);
961-
962-
// perform some operation
963-
let array = array.as_any().downcast_ref::<Int32Array>().unwrap();
964-
let array = kernels::arithmetic::add(array, array).unwrap();
981+
let array = Int32Array::from(ArrayData::try_from(array).unwrap());
982+
let array = kernels::arithmetic::add(&array, &array).unwrap();
965983

966984
// verify
967985
assert_eq!(array, Int32Array::from(vec![2, 4, 6]));
986+
}
968987

969-
// (drop/release)
970-
Ok(())
988+
#[test]
989+
fn test_import() {
990+
// Model receiving const pointers from an external system
991+
992+
// Create an array natively
993+
let data = Int32Array::from(vec![1, 2, 3]).into_data();
994+
let schema = FFI_ArrowSchema::try_from(data.data_type()).unwrap();
995+
let array = FFI_ArrowArray::new(&data);
996+
997+
// Use ManuallyDrop to avoid Box:Drop recursing
998+
let schema = Box::new(ManuallyDrop::new(schema));
999+
let array = Box::new(ManuallyDrop::new(array));
1000+
1001+
let schema_ptr = &**schema as *const _;
1002+
let array_ptr = &**array as *const _;
1003+
1004+
// We can read them back to memory
1005+
// SAFETY:
1006+
// Pointers are aligned and valid
1007+
let array =
1008+
unsafe { ArrowArray::new(ptr::read(array_ptr), ptr::read(schema_ptr)) };
1009+
1010+
let array = Int32Array::from(ArrayData::try_from(array).unwrap());
1011+
assert_eq!(array, Int32Array::from(vec![1, 2, 3]));
9711012
}
9721013

9731014
#[test]
@@ -1424,31 +1465,28 @@ mod tests {
14241465
let array = make_array(Int32Array::from(vec![1, 2, 3]).into_data());
14251466

14261467
// Assume two raw pointers provided by the consumer
1427-
let out_array = Box::new(FFI_ArrowArray::empty());
1428-
let out_schema = Box::new(FFI_ArrowSchema::empty());
1429-
let out_array_ptr = Box::into_raw(out_array);
1430-
let out_schema_ptr = Box::into_raw(out_schema);
1431-
1432-
unsafe {
1433-
export_array_into_raw(array, out_array_ptr, out_schema_ptr)?;
1468+
let mut out_array = FFI_ArrowArray::empty();
1469+
let mut out_schema = FFI_ArrowSchema::empty();
1470+
1471+
{
1472+
let out_array_ptr = addr_of_mut!(out_array);
1473+
let out_schema_ptr = addr_of_mut!(out_schema);
1474+
unsafe {
1475+
export_array_into_raw(array, out_array_ptr, out_schema_ptr)?;
1476+
}
14341477
}
14351478

14361479
// (simulate consumer) import it
1437-
unsafe {
1438-
let array = ArrowArray::try_from_raw(out_array_ptr, out_schema_ptr).unwrap();
1439-
let data = ArrayData::try_from(array)?;
1440-
let array = make_array(data);
1441-
1442-
// perform some operation
1443-
let array = array.as_any().downcast_ref::<Int32Array>().unwrap();
1444-
let array = kernels::arithmetic::add(array, array).unwrap();
1480+
let array = ArrowArray::new(out_array, out_schema);
1481+
let data = ArrayData::try_from(array)?;
1482+
let array = make_array(data);
14451483

1446-
// verify
1447-
assert_eq!(array, Int32Array::from(vec![2, 4, 6]));
1484+
// perform some operation
1485+
let array = array.as_any().downcast_ref::<Int32Array>().unwrap();
1486+
let array = kernels::arithmetic::add(array, array).unwrap();
14481487

1449-
drop(Box::from_raw(out_array_ptr));
1450-
drop(Box::from_raw(out_schema_ptr));
1451-
}
1488+
// verify
1489+
assert_eq!(array, Int32Array::from(vec![2, 4, 6]));
14521490
Ok(())
14531491
}
14541492

arrow/src/pyarrow.rs

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
//! arrays from and to Python.
2020
2121
use std::convert::{From, TryFrom};
22+
use std::ptr::addr_of_mut;
2223
use std::sync::Arc;
2324

2425
use pyo3::ffi::Py_uintptr_t;
@@ -30,7 +31,7 @@ use crate::array::{make_array, Array, ArrayData};
3031
use crate::datatypes::{DataType, Field, Schema};
3132
use crate::error::ArrowError;
3233
use crate::ffi;
33-
use crate::ffi::FFI_ArrowSchema;
34+
use crate::ffi::{FFI_ArrowArray, FFI_ArrowSchema};
3435
use crate::ffi_stream::{
3536
export_reader_into_raw, ArrowArrayStreamReader, FFI_ArrowArrayStream,
3637
};
@@ -111,24 +112,21 @@ impl PyArrowConvert for Schema {
111112
impl PyArrowConvert for ArrayData {
112113
fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
113114
// prepare a pointer to receive the Array struct
114-
let (array_pointer, schema_pointer) =
115-
ffi::ArrowArray::into_raw(unsafe { ffi::ArrowArray::empty() });
115+
let mut array = FFI_ArrowArray::empty();
116+
let mut schema = FFI_ArrowSchema::empty();
116117

117118
// make the conversion through PyArrow's private API
118119
// this changes the pointer's memory and is thus unsafe.
119120
// In particular, `_export_to_c` can go out of bounds
120121
value.call_method1(
121122
"_export_to_c",
122123
(
123-
array_pointer as Py_uintptr_t,
124-
schema_pointer as Py_uintptr_t,
124+
addr_of_mut!(array) as Py_uintptr_t,
125+
addr_of_mut!(schema) as Py_uintptr_t,
125126
),
126127
)?;
127128

128-
let ffi_array = unsafe {
129-
ffi::ArrowArray::try_from_raw(array_pointer, schema_pointer)
130-
.map_err(to_py_err)?
131-
};
129+
let ffi_array = ffi::ArrowArray::new(array, schema);
132130
let data = ArrayData::try_from(ffi_array).map_err(to_py_err)?;
133131

134132
Ok(data)

0 commit comments

Comments
 (0)