Skip to content

Commit 224c682

Browse files
authored
Support coercing utf8 to interval and timestamp (including arguments to date_bin) (apache#5117)
* Support coercing strings to intervals (for date_bin and others) * Review feedback
1 parent 7d2d51b commit 224c682

3 files changed

Lines changed: 88 additions & 10 deletions

File tree

datafusion/core/tests/sqllogictests/test_files/timestamps.slt

Lines changed: 64 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -72,39 +72,51 @@ drop table foo;
7272
## test date_bin function
7373
###
7474
query T
75-
SELECT DATE_BIN(INTERVAL '15 minutes', TIMESTAMP '2022-08-03 14:38:50Z', TIMESTAMP '1970-01-01T00:00:00Z') AS res
75+
SELECT DATE_BIN(INTERVAL '15 minutes', TIMESTAMP '2022-08-03 14:38:50Z', TIMESTAMP '1970-01-01T00:00:00Z')
76+
----
77+
2022-08-03T14:30:00
78+
79+
# Can coerce string interval arguments
80+
query T
81+
SELECT DATE_BIN('15 minutes', TIMESTAMP '2022-08-03 14:38:50Z', TIMESTAMP '1970-01-01T00:00:00Z')
82+
----
83+
2022-08-03T14:30:00
84+
85+
# Can coerce all string arguments
86+
query T
87+
SELECT DATE_BIN('15 minutes', '2022-08-03 14:38:50Z', '1970-01-01T00:00:00Z')
7688
----
7789
2022-08-03T14:30:00
7890

7991
# Shift forward by 5 minutes
8092
query T
81-
SELECT DATE_BIN(INTERVAL '15 minutes', TIMESTAMP '2022-08-03 14:38:50Z', TIMESTAMP '1970-01-01T00:05:00Z') AS res
93+
SELECT DATE_BIN(INTERVAL '15 minutes', TIMESTAMP '2022-08-03 14:38:50Z', TIMESTAMP '1970-01-01T00:05:00Z')
8294
----
8395
2022-08-03T14:35:00
8496

8597

8698
# Shift backward by 5 minutes
8799
query T
88-
SELECT DATE_BIN(INTERVAL '15 minutes', TIMESTAMP '2022-08-03 14:38:50Z', TIMESTAMP '1970-01-01T23:55:00Z') AS res
100+
SELECT DATE_BIN(INTERVAL '15 minutes', TIMESTAMP '2022-08-03 14:38:50Z', TIMESTAMP '1970-01-01T23:55:00Z')
89101
----
90102
2022-08-03T14:25:00
91103

92104
# origin after source, timestamp in previous bucket
93105
query T
94-
SELECT DATE_BIN(INTERVAL '15 minutes', TIMESTAMP '2022-08-03 14:38:50Z', TIMESTAMP '2022-08-03 14:40:00Z') AS res
106+
SELECT DATE_BIN(INTERVAL '15 minutes', TIMESTAMP '2022-08-03 14:38:50Z', TIMESTAMP '2022-08-03 14:40:00Z')
95107
----
96108
2022-08-03T14:25:00
97109

98110
# stride by 7 days
99111
query T
100-
SELECT DATE_BIN(INTERVAL '7 days', TIMESTAMP '2022-08-03 14:38:50Z', TIMESTAMP '1970-01-01 00:00:00Z') AS res
112+
SELECT DATE_BIN(INTERVAL '7 days', TIMESTAMP '2022-08-03 14:38:50Z', TIMESTAMP '1970-01-01 00:00:00Z')
101113
----
102114
2022-07-28T00:00:00
103115

104116

105117
# origin shifts bins forward 1 day
106118
query T
107-
SELECT DATE_BIN(INTERVAL '7 days', TIMESTAMP '2022-08-03 14:38:50Z', TIMESTAMP '1970-01-02 00:00:00Z') AS res
119+
SELECT DATE_BIN(INTERVAL '7 days', TIMESTAMP '2022-08-03 14:38:50Z', TIMESTAMP '1970-01-02 00:00:00Z')
108120
----
109121
2022-07-29T00:00:00
110122

@@ -133,3 +145,49 @@ FROM (
133145
(TIMESTAMP '2021-06-10 17:05:00Z', TIMESTAMP '2001-01-01T00:00:00Z', 0.5),
134146
(TIMESTAMP '2021-06-10 17:19:10Z', TIMESTAMP '2001-01-01T00:00:00Z', 0.3)
135147
) as t (time, origin, val)
148+
149+
###
150+
## test date_trunc function
151+
###
152+
query T
153+
SELECT DATE_TRUNC('year', TIMESTAMP '2022-08-03 14:38:50Z');
154+
----
155+
2022-01-01T00:00:00
156+
157+
query T
158+
SELECT DATE_TRUNC('quarter', TIMESTAMP '2022-08-03 14:38:50Z');
159+
----
160+
2022-07-01T00:00:00
161+
162+
query T
163+
SELECT DATE_TRUNC('month', TIMESTAMP '2022-08-03 14:38:50Z');
164+
----
165+
2022-08-01T00:00:00
166+
167+
query T
168+
SELECT DATE_TRUNC('day', TIMESTAMP '2022-08-03 14:38:50Z');
169+
----
170+
2022-08-03T00:00:00
171+
172+
query T
173+
SELECT DATE_TRUNC('hour', TIMESTAMP '2022-08-03 14:38:50Z');
174+
----
175+
2022-08-03T14:00:00
176+
177+
query T
178+
SELECT DATE_TRUNC('minute', TIMESTAMP '2022-08-03 14:38:50Z');
179+
----
180+
2022-08-03T14:38:00
181+
182+
query T
183+
SELECT DATE_TRUNC('second', TIMESTAMP '2022-08-03 14:38:50Z');
184+
----
185+
2022-08-03T14:38:50
186+
187+
188+
# Demonstrate that strings are automatically coerced to timestamps (don't use TIMESTAMP)
189+
190+
query T
191+
SELECT DATE_TRUNC('second', '2022-08-03 14:38:50Z');
192+
----
193+
2022-08-03T14:38:50

datafusion/expr/src/type_coercion/functions.rs

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,10 @@ use arrow::{
2222
};
2323
use datafusion_common::{DataFusionError, Result};
2424

25-
/// Performs type coercion for functions Returns the data types that
26-
/// each argument must be coerced to match `signature`.
25+
/// Performs type coercion for function arguments.
26+
///
27+
/// Returns the data types to which each argument must be coerced to
28+
/// match `signature`.
2729
///
2830
/// For more details on coercion in general, please see the
2931
/// [`type_coercion`](crate::type_coercion) module.
@@ -175,7 +177,13 @@ pub fn can_coerce_from(type_into: &DataType, type_from: &DataType) -> bool {
175177
| Decimal128(_, _)
176178
),
177179
Timestamp(TimeUnit::Nanosecond, _) => {
178-
matches!(type_from, Null | Timestamp(_, _) | Date32)
180+
matches!(
181+
type_from,
182+
Null | Timestamp(_, _) | Date32 | Utf8 | LargeUtf8
183+
)
184+
}
185+
Interval(_) => {
186+
matches!(type_from, Utf8 | LargeUtf8)
179187
}
180188
Utf8 | LargeUtf8 => true,
181189
Null => can_cast_types(type_from, type_into),

datafusion/optimizer/src/type_coercion.rs

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -553,10 +553,22 @@ fn coerce_arguments_for_signature(
553553
expressions
554554
.iter()
555555
.enumerate()
556-
.map(|(i, expr)| expr.clone().cast_to(&new_types[i], schema))
556+
.map(|(i, expr)| cast_expr(expr, &new_types[i], schema))
557557
.collect::<Result<Vec<_>>>()
558558
}
559559

560+
/// Cast `expr` to the specified type, if possible
561+
fn cast_expr(expr: &Expr, to_type: &DataType, schema: &DFSchema) -> Result<Expr> {
562+
// Special case until Interval coercion is handled in arrow-rs
563+
// https://github.com/apache/arrow-rs/issues/3643
564+
match (expr, to_type) {
565+
(Expr::Literal(ScalarValue::Utf8(Some(s))), DataType::Interval(_)) => {
566+
parse_interval("millisecond", s.as_str()).map(Expr::Literal)
567+
}
568+
_ => expr.clone().cast_to(to_type, schema),
569+
}
570+
}
571+
560572
/// Returns the coerced exprs for each `input_exprs`.
561573
/// Get the coerced data type from `aggregate_rule::coerce_types` and add `try_cast` if the
562574
/// data type of `input_exprs` need to be coerced.

0 commit comments

Comments
 (0)