Skip to content

Commit c73067f

Browse files
committed
Workaround producer literals and types
1 parent 33a32d4 commit c73067f

3 files changed

Lines changed: 39 additions & 42 deletions

File tree

datafusion/substrait/src/logical_plan/producer/expr/literal.rs

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,10 @@ pub(crate) fn to_substrait_literal(
295295
}),
296296
DEFAULT_TYPE_VARIATION_REF,
297297
),
298+
// TODO: DataDog-specific workaround, don't commit upstream
299+
ScalarValue::Dictionary(_, value) => {
300+
return to_substrait_literal(producer, value)
301+
}
298302
_ => (
299303
not_impl_err!("Unsupported literal: {value:?}")?,
300304
DEFAULT_TYPE_VARIATION_REF,
@@ -386,17 +390,18 @@ mod tests {
386390
round_trip_literal(ScalarValue::UInt64(Some(u64::MIN)))?;
387391
round_trip_literal(ScalarValue::UInt64(Some(u64::MAX)))?;
388392

389-
for (ts, tz) in [
390-
(Some(12345), None),
391-
(None, None),
392-
(Some(12345), Some("UTC".into())),
393-
(None, Some("UTC".into())),
394-
] {
395-
round_trip_literal(ScalarValue::TimestampSecond(ts, tz.clone()))?;
396-
round_trip_literal(ScalarValue::TimestampMillisecond(ts, tz.clone()))?;
397-
round_trip_literal(ScalarValue::TimestampMicrosecond(ts, tz.clone()))?;
398-
round_trip_literal(ScalarValue::TimestampNanosecond(ts, tz))?;
399-
}
393+
// TODO: DataDog-specific workaround, don't commit upstream
394+
// for (ts, tz) in [
395+
// (Some(12345), None),
396+
// (None, None),
397+
// (Some(12345), Some("UTC".into())),
398+
// (None, Some("UTC".into())),
399+
// ] {
400+
// round_trip_literal(ScalarValue::TimestampSecond(ts, tz.clone()))?;
401+
// round_trip_literal(ScalarValue::TimestampMillisecond(ts, tz.clone()))?;
402+
// round_trip_literal(ScalarValue::TimestampMicrosecond(ts, tz.clone()))?;
403+
// round_trip_literal(ScalarValue::TimestampNanosecond(ts, tz))?;
404+
// }
400405

401406
round_trip_literal(ScalarValue::List(ScalarValue::new_list_nullable(
402407
&[ScalarValue::Float32(Some(1.0))],

datafusion/substrait/src/logical_plan/producer/types.rs

Lines changed: 21 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,16 @@
1616
// under the License.
1717

1818
use crate::logical_plan::producer::utils::flatten_names;
19+
#[allow(deprecated)]
20+
use crate::variation_const::TIMESTAMP_NANO_TYPE_VARIATION_REF;
1921
use crate::variation_const::{
2022
DATE_32_TYPE_VARIATION_REF, DATE_64_TYPE_VARIATION_REF,
2123
DECIMAL_128_TYPE_VARIATION_REF, DECIMAL_256_TYPE_VARIATION_REF,
2224
DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF,
2325
LARGE_CONTAINER_TYPE_VARIATION_REF, UNSIGNED_INTEGER_TYPE_VARIATION_REF,
2426
VIEW_CONTAINER_TYPE_VARIATION_REF,
2527
};
26-
use datafusion::arrow::datatypes::{DataType, IntervalUnit, TimeUnit};
28+
use datafusion::arrow::datatypes::{DataType, IntervalUnit};
2729
use datafusion::common::{internal_err, not_impl_err, plan_err, DFSchemaRef};
2830
use substrait::proto::{r#type, NamedStruct};
2931

@@ -105,31 +107,16 @@ pub(crate) fn to_substrait_type(
105107
nullability,
106108
})),
107109
}),
108-
DataType::Timestamp(unit, tz) => {
109-
let precision = match unit {
110-
TimeUnit::Second => 0,
111-
TimeUnit::Millisecond => 3,
112-
TimeUnit::Microsecond => 6,
113-
TimeUnit::Nanosecond => 9,
114-
};
115-
let kind = match tz {
116-
None => r#type::Kind::PrecisionTimestamp(r#type::PrecisionTimestamp {
117-
type_variation_reference: DEFAULT_TYPE_VARIATION_REF,
110+
DataType::Timestamp(_unit, _) => {
111+
// TODO: DataDog-specific workaround, don't commit upstream
112+
#[allow(deprecated)]
113+
let type_variation_reference = TIMESTAMP_NANO_TYPE_VARIATION_REF;
114+
Ok(substrait::proto::Type {
115+
kind: Some(r#type::Kind::Timestamp(r#type::Timestamp {
116+
type_variation_reference,
118117
nullability,
119-
precision,
120-
}),
121-
Some(_) => {
122-
// If timezone is present, no matter what the actual tz value is, it indicates the
123-
// value of the timestamp is tied to UTC epoch. That's all that Substrait cares about.
124-
// As the timezone is lost, this conversion may be lossy for downstream use of the value.
125-
r#type::Kind::PrecisionTimestampTz(r#type::PrecisionTimestampTz {
126-
type_variation_reference: DEFAULT_TYPE_VARIATION_REF,
127-
nullability,
128-
precision,
129-
})
130-
}
131-
};
132-
Ok(substrait::proto::Type { kind: Some(kind) })
118+
})),
119+
})
133120
}
134121
DataType::Date32 => Ok(substrait::proto::Type {
135122
kind: Some(r#type::Kind::Date(r#type::Date {
@@ -284,6 +271,8 @@ pub(crate) fn to_substrait_type(
284271
precision: *p as i32,
285272
})),
286273
}),
274+
// TODO: DataDog-specific workaround, don't commit upstream
275+
DataType::Dictionary(_, dt) => to_substrait_type(dt, nullable),
287276
_ => not_impl_err!("Unsupported cast type: {dt:?}"),
288277
}
289278
}
@@ -337,12 +326,13 @@ mod tests {
337326
round_trip_type(DataType::Float32)?;
338327
round_trip_type(DataType::Float64)?;
339328

340-
for tz in [None, Some("UTC".into())] {
341-
round_trip_type(DataType::Timestamp(TimeUnit::Second, tz.clone()))?;
342-
round_trip_type(DataType::Timestamp(TimeUnit::Millisecond, tz.clone()))?;
343-
round_trip_type(DataType::Timestamp(TimeUnit::Microsecond, tz.clone()))?;
344-
round_trip_type(DataType::Timestamp(TimeUnit::Nanosecond, tz))?;
345-
}
329+
// TODO: DataDog-specific workaround, don't commit upstream
330+
// for tz in [None, Some("UTC".into())] {
331+
// round_trip_type(DataType::Timestamp(TimeUnit::Second, tz.clone()))?;
332+
// round_trip_type(DataType::Timestamp(TimeUnit::Millisecond, tz.clone()))?;
333+
// round_trip_type(DataType::Timestamp(TimeUnit::Microsecond, tz.clone()))?;
334+
// round_trip_type(DataType::Timestamp(TimeUnit::Nanosecond, tz))?;
335+
// }
346336

347337
round_trip_type(DataType::Date32)?;
348338
round_trip_type(DataType::Date64)?;

datafusion/substrait/tests/cases/roundtrip_logical_plan.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1039,6 +1039,7 @@ async fn qualified_catalog_schema_table_reference() -> Result<()> {
10391039
/// - List, this nested type is not supported in arrow_cast
10401040
/// - Decimal128 and Decimal256, them will fallback to UTF8 cast expr rather than plain literal.
10411041
#[tokio::test]
1042+
#[ignore] // TODO: DataDog-specific workaround, don't commit upstream
10421043
async fn all_type_literal() -> Result<()> {
10431044
roundtrip_all_types(
10441045
"select * from data where
@@ -1219,6 +1220,7 @@ async fn duplicate_column() -> Result<()> {
12191220

12201221
/// Construct a plan that cast columns. Only those SQL types are supported for now.
12211222
#[tokio::test]
1223+
#[ignore] // TODO: DataDog-specific workaround, don't commit upstream
12221224
async fn new_test_grammar() -> Result<()> {
12231225
roundtrip_all_types(
12241226
"select

0 commit comments

Comments
 (0)