Skip to content

Commit 0e04699

Browse files
paleolimbotde-bgunter
authored andcommitted
Allow SQL TypePlanner to plan SQL types as extension types (apache#20676)
## Which issue does this PR close? - Closes apache#20675 ## Rationale for this change The existing enum `SQLDataType` has a number of existing members that have canonical Arrow extension type equivalents; however, the `TypePlanner` trait only supports returning `DataType` (which cannot represent an Arrow extension type). This will be substantially more useful after apache#18136, as the SQL planner inserts casts in a number of places (and currently the extension metadata of those casts is dropped when the logical cast is created). ## What changes are included in this PR? This PR adds a `fn plan_type_field()` member to the `TypePlanner` trait. The only place that the previously existing `plan_type()` member was called was already wrapping it in a `FieldRef` and so few other changes were needed. ## Are these changes tested? Yes ## Are there any user-facing changes? Existing `TypePlanner`s will continue to work and an example was added for supporting the UUID type.
1 parent 0eb50f5 commit 0e04699

7 files changed

Lines changed: 101 additions & 23 deletions

File tree

datafusion/core/src/execution/context/mod.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2218,7 +2218,9 @@ mod tests {
22182218
use crate::test;
22192219
use crate::test_util::{plan_and_collect, populate_csv_partitions};
22202220
use arrow::datatypes::{DataType, TimeUnit};
2221+
use arrow_schema::FieldRef;
22212222
use datafusion_common::DataFusionError;
2223+
use datafusion_common::datatype::DataTypeExt;
22222224
use std::error::Error;
22232225
use std::path::PathBuf;
22242226

@@ -2735,7 +2737,7 @@ mod tests {
27352737
struct MyTypePlanner {}
27362738

27372739
impl TypePlanner for MyTypePlanner {
2738-
fn plan_type(&self, sql_type: &ast::DataType) -> Result<Option<DataType>> {
2740+
fn plan_type_field(&self, sql_type: &ast::DataType) -> Result<Option<FieldRef>> {
27392741
match sql_type {
27402742
ast::DataType::Datetime(precision) => {
27412743
let precision = match precision {
@@ -2745,7 +2747,9 @@ mod tests {
27452747
None | Some(9) => TimeUnit::Nanosecond,
27462748
_ => unreachable!(),
27472749
};
2748-
Ok(Some(DataType::Timestamp(precision, None)))
2750+
Ok(Some(
2751+
DataType::Timestamp(precision, None).into_nullable_field_ref(),
2752+
))
27492753
}
27502754
_ => Ok(None),
27512755
}

datafusion/expr/src/planner.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -434,10 +434,28 @@ pub trait TypePlanner: Debug + Send + Sync {
434434
/// Plan SQL [`sqlparser::ast::DataType`] to DataFusion [`DataType`]
435435
///
436436
/// Returns None if not possible
437+
#[deprecated(since = "53.0.0", note = "Use plan_type_field()")]
437438
fn plan_type(
438439
&self,
439440
_sql_type: &sqlparser::ast::DataType,
440441
) -> Result<Option<DataType>> {
441442
Ok(None)
442443
}
444+
445+
/// Plan SQL [`sqlparser::ast::DataType`] to DataFusion [`FieldRef`]
446+
///
447+
/// Returns None if not possible. Unlike [`Self::plan_type`], `plan_type_field()`
448+
/// makes it possible to express extension types (e.g., `arrow.uuid`) or otherwise
449+
/// insert metadata into the DataFusion type representation. The default implementation
450+
/// falls back on [`Self::plan_type`] for backward compatibility and wraps the result
451+
/// in a nullable field reference.
452+
fn plan_type_field(
453+
&self,
454+
sql_type: &sqlparser::ast::DataType,
455+
) -> Result<Option<FieldRef>> {
456+
#[expect(deprecated)]
457+
Ok(self
458+
.plan_type(sql_type)?
459+
.map(|data_type| data_type.into_nullable_field_ref()))
460+
}
443461
}

datafusion/sql/src/expr/mod.rs

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -292,27 +292,23 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
292292
return not_impl_err!("CAST with format is not supported: {format}");
293293
}
294294

295-
Ok(Expr::TryCast(TryCast::new(
295+
Ok(Expr::TryCast(TryCast::new_from_field(
296296
Box::new(self.sql_expr_to_logical_expr(
297297
*expr,
298298
schema,
299299
planner_context,
300300
)?),
301-
self.convert_data_type_to_field(&data_type)?
302-
.data_type()
303-
.clone(),
301+
self.convert_data_type_to_field(&data_type)?,
304302
)))
305303
}
306304

307305
SQLExpr::TypedString(TypedString {
308306
data_type,
309307
value,
310308
uses_odbc_syntax: _,
311-
}) => Ok(Expr::Cast(Cast::new(
309+
}) => Ok(Expr::Cast(Cast::new_from_field(
312310
Box::new(lit(value.into_string().unwrap())),
313-
self.convert_data_type_to_field(&data_type)?
314-
.data_type()
315-
.clone(),
311+
self.convert_data_type_to_field(&data_type)?,
316312
))),
317313

318314
SQLExpr::IsNull(expr) => Ok(Expr::IsNull(Box::new(
@@ -1061,12 +1057,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
10611057
_ => expr,
10621058
};
10631059

1064-
// Currently drops metadata attached to the type
1065-
// https://github.com/apache/datafusion/issues/18060
1066-
Ok(Expr::Cast(Cast::new(
1067-
Box::new(expr),
1068-
dt.data_type().clone(),
1069-
)))
1060+
Ok(Expr::Cast(Cast::new_from_field(Box::new(expr), dt)))
10701061
}
10711062

10721063
/// Extracts the root expression and access chain from a compound expression.

datafusion/sql/src/planner.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -621,9 +621,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
621621
) -> Result<FieldRef> {
622622
// First check if any of the registered type_planner can handle this type
623623
if let Some(type_planner) = self.context_provider.get_type_planner()
624-
&& let Some(data_type) = type_planner.plan_type(sql_type)?
624+
&& let Some(data_type) = type_planner.plan_type_field(sql_type)?
625625
{
626-
return Ok(data_type.into_nullable_field_ref());
626+
return Ok(data_type);
627627
}
628628

629629
// If no type_planner can handle this type, use the default conversion

datafusion/sql/tests/common/mod.rs

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ use std::{sync::Arc, vec};
2323

2424
use arrow::datatypes::*;
2525
use datafusion_common::config::ConfigOptions;
26+
use datafusion_common::datatype::DataTypeExt;
2627
use datafusion_common::file_options::file_type::FileType;
2728
use datafusion_common::{DFSchema, GetExt, Result, TableReference, plan_err};
2829
use datafusion_expr::planner::{ExprPlanner, PlannerResult, TypePlanner};
@@ -341,8 +342,17 @@ impl TableSource for EmptyTable {
341342
pub struct CustomTypePlanner {}
342343

343344
impl TypePlanner for CustomTypePlanner {
344-
fn plan_type(&self, sql_type: &sqlparser::ast::DataType) -> Result<Option<DataType>> {
345+
fn plan_type_field(
346+
&self,
347+
sql_type: &sqlparser::ast::DataType,
348+
) -> Result<Option<FieldRef>> {
345349
match sql_type {
350+
sqlparser::ast::DataType::Uuid => Ok(Some(Arc::new(
351+
Field::new("", DataType::FixedSizeBinary(16), true).with_metadata(
352+
[("ARROW:extension:name".to_string(), "arrow.uuid".to_string())]
353+
.into(),
354+
),
355+
))),
346356
sqlparser::ast::DataType::Datetime(precision) => {
347357
let precision = match precision {
348358
Some(0) => TimeUnit::Second,
@@ -351,7 +361,9 @@ impl TypePlanner for CustomTypePlanner {
351361
None | Some(9) => TimeUnit::Nanosecond,
352362
_ => unreachable!(),
353363
};
354-
Ok(Some(DataType::Timestamp(precision, None)))
364+
Ok(Some(
365+
DataType::Timestamp(precision, None).into_nullable_field_ref(),
366+
))
355367
}
356368
_ => Ok(None),
357369
}

datafusion/sql/tests/sql_integration.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4727,6 +4727,14 @@ fn test_custom_type_plan() -> Result<()> {
47274727
"#
47284728
);
47294729

4730+
let plan = plan_sql("SELECT UUID '00010203-0405-0607-0809-000102030506'");
4731+
assert_snapshot!(
4732+
plan,
4733+
@r#"
4734+
Projection: CAST(Utf8("00010203-0405-0607-0809-000102030506") AS FixedSizeBinary(16)<{"ARROW:extension:name": "arrow.uuid"}>)
4735+
EmptyRelation: rows=1
4736+
"#
4737+
);
47304738
Ok(())
47314739
}
47324740

docs/source/library-user-guide/extending-sql.md

Lines changed: 48 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ when you need to support SQL types that aren't natively recognized.
158158

159159
```rust
160160
# use std::sync::Arc;
161-
# use arrow::datatypes::{DataType, TimeUnit};
161+
# use arrow::datatypes::{DataType, FieldRef, TimeUnit};
162162
# use datafusion::error::Result;
163163
# use datafusion::prelude::*;
164164
# use datafusion::execution::SessionStateBuilder;
@@ -169,7 +169,7 @@ use datafusion_expr::planner::TypePlanner;
169169
struct MyTypePlanner;
170170

171171
impl TypePlanner for MyTypePlanner {
172-
fn plan_type(&self, sql_type: &ast::DataType) -> Result<Option<DataType>> {
172+
fn plan_type_field(&self, sql_type: &ast::DataType) -> Result<Option<FieldRef>> {
173173
match sql_type {
174174
// Map DATETIME(precision) to Arrow Timestamp
175175
ast::DataType::Datetime(precision) => {
@@ -180,7 +180,9 @@ impl TypePlanner for MyTypePlanner {
180180
None | Some(9) => TimeUnit::Nanosecond,
181181
_ => return Ok(None), // Let default handling take over
182182
};
183-
Ok(Some(DataType::Timestamp(time_unit, None)))
183+
Ok(Some(
184+
DataType::Timestamp(time_unit, None).into_nullable_field_ref()
185+
))
184186
}
185187
_ => Ok(None), // Return None for types we don't handle
186188
}
@@ -202,6 +204,49 @@ async fn main() -> Result<()> {
202204
}
203205
```
204206

207+
#### Example: Supporting the UUID Type
208+
209+
```rust
210+
# use std::sync::Arc;
211+
# use arrow::datatypes::{DataType, FieldRef, TimeUnit};
212+
# use datafusion::error::Result;
213+
# use datafusion::prelude::*;
214+
# use datafusion::execution::SessionStateBuilder;
215+
use datafusion_expr::planner::TypePlanner;
216+
# use sqlparser::ast;
217+
218+
#[derive(Debug)]
219+
struct MyTypePlanner;
220+
221+
impl TypePlanner for MyTypePlanner {
222+
fn plan_type_field(&self, sql_type: &ast::DataType) -> Result<Option<FieldRef>> {
223+
match sql_type {
224+
sqlparser::ast::DataType::Uuid => Ok(Some(Arc::new(
225+
Field::new("", DataType::FixedSizeBinary(16), true).with_metadata(
226+
[("ARROW:extension:name".to_string(), "arrow.uuid".to_string())]
227+
.into(),
228+
),
229+
))),
230+
_ => Ok(None),
231+
}
232+
}
233+
}
234+
235+
#[tokio::main]
236+
async fn main() -> Result<()> {
237+
let state = SessionStateBuilder::new()
238+
.with_default_features()
239+
.with_type_planner(Arc::new(MyTypePlanner))
240+
.build();
241+
242+
let ctx = SessionContext::new_with_state(state);
243+
244+
// Now UUID type is recognized
245+
ctx.sql("CREATE TABLE idx (uuid UUID)").await?;
246+
Ok(())
247+
}
248+
```
249+
205250
For more details, see the [TypePlanner API documentation].
206251

207252
### RelationPlanner: Custom FROM Clause Elements

0 commit comments

Comments
 (0)