Skip to content

Commit 934af5c

Browse files
authored
chore(cubesql): nullif_str func (#42)
1 parent 911fed5 commit 934af5c

6 files changed

Lines changed: 219 additions & 0 deletions

File tree

datafusion/cube_ext/Cargo.toml

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
[package]
19+
name = "cube-ext"
20+
description = ""
21+
version = "1.0.0"
22+
homepage = "https://github.com/apache/arrow-datafusion"
23+
repository = "https://github.com/apache/arrow-datafusion"
24+
readme = "README.md"
25+
authors = ["Cube Dev Inc"]
26+
license = ""
27+
keywords = []
28+
edition = "2021"
29+
rust-version = "1.59"
30+
31+
[features]
32+
33+
[lib]
34+
name = "cube_ext"
35+
path = "src/lib.rs"
36+
37+
[dependencies]
38+
arrow = { git = 'https://github.com/cube-js/arrow-rs.git', rev = "f541e13c7b847b3fb6935602182e08b41ba3d9db", features = ["prettyprint"] }
39+
datafusion-common = { path = "../common", version = "7.0.0" }
40+
datafusion-expr = { path = "../expr", version = "7.0.0" }

datafusion/cube_ext/README.md

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
<!---
2+
Licensed to the Apache Software Foundation (ASF) under one
3+
or more contributor license agreements. See the NOTICE file
4+
distributed with this work for additional information
5+
regarding copyright ownership. The ASF licenses this file
6+
to you under the Apache License, Version 2.0 (the
7+
"License"); you may not use this file except in compliance
8+
with the License. You may obtain a copy of the License at
9+
10+
http://www.apache.org/licenses/LICENSE-2.0
11+
12+
Unless required by applicable law or agreed to in writing,
13+
software distributed under the License is distributed on an
14+
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
KIND, either express or implied. See the License for the
16+
specific language governing permissions and limitations
17+
under the License.
18+
-->
19+
20+
# DataFusion CubeExt
21+
22+
[df]: https://crates.io/crates/datafusion

datafusion/cube_ext/src/lib.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
mod nullif_str;
19+
20+
pub use nullif_str::nullif_func_str;
Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
use arrow::array::Array;
2+
use arrow::array::*;
3+
use arrow::datatypes::DataType;
4+
use datafusion_common::{DataFusionError, Result};
5+
use datafusion_expr::ColumnarValue;
6+
use std::sync::Arc;
7+
8+
macro_rules! null_if_equal {
9+
($LEFT:expr, $RIGHT:expr, $TYPE:ident) => {{
10+
let left = $LEFT
11+
.as_any()
12+
.downcast_ref::<$TYPE>()
13+
.expect("failed to downcast array");
14+
let right = $RIGHT
15+
.as_any()
16+
.downcast_ref::<$TYPE>()
17+
.expect("failed to downcast array");
18+
19+
let res = left
20+
.iter()
21+
.zip(right.iter())
22+
.map(|(l, r)| if l == r { None } else { l })
23+
.collect::<$TYPE>();
24+
25+
Arc::new(res) as ArrayRef
26+
}};
27+
}
28+
29+
pub fn nullif_func_str(args: &[ColumnarValue]) -> Result<ColumnarValue> {
30+
let (lhs, rhs) = (&args[0], &args[1]);
31+
let (left_arr, right_arr) = match (lhs, rhs) {
32+
(ColumnarValue::Array(lhs), _) => (lhs, rhs.clone().into_array(lhs.len())),
33+
_ => {
34+
return Err(DataFusionError::NotImplemented(
35+
"nullif_str does not support a literal as first argument".to_string(),
36+
))
37+
}
38+
};
39+
40+
if left_arr.data_type() != right_arr.data_type() {
41+
return Err(DataFusionError::NotImplemented(
42+
"both arguments have to have the same type".to_string(),
43+
));
44+
}
45+
46+
let res = match left_arr.data_type() {
47+
DataType::Utf8 => null_if_equal!(left_arr, right_arr, StringArray),
48+
DataType::LargeUtf8 => null_if_equal!(left_arr, right_arr, LargeStringArray),
49+
_ => {
50+
return Err(DataFusionError::NotImplemented(
51+
"nullif_str supports Utf8 and LargeUtf8 only".to_string(),
52+
))
53+
}
54+
};
55+
56+
Ok(ColumnarValue::Array(res))
57+
}
58+
59+
#[cfg(test)]
60+
mod tests {
61+
use datafusion_common::ScalarValue;
62+
63+
use super::*;
64+
65+
#[test]
66+
fn test_nullif_str_array() {
67+
let a = GenericStringArray::<i32>::from(vec![
68+
Some("1"),
69+
None,
70+
Some("2"),
71+
Some("3"),
72+
Some("4"),
73+
]);
74+
let b = GenericStringArray::<i32>::from(vec![
75+
None,
76+
None,
77+
Some("2"),
78+
Some("a"),
79+
Some("b"),
80+
]);
81+
82+
let expected = GenericStringArray::<i32>::from(vec![
83+
Some("1"),
84+
None,
85+
None,
86+
Some("3"),
87+
Some("4"),
88+
]);
89+
90+
match nullif_func_str(&[
91+
ColumnarValue::Array(Arc::new(a) as ArrayRef),
92+
ColumnarValue::Array(Arc::new(b) as ArrayRef),
93+
])
94+
.unwrap()
95+
{
96+
ColumnarValue::Array(arr) => {
97+
let we = arr
98+
.as_any()
99+
.downcast_ref::<StringArray>()
100+
.expect("failed to downcast array");
101+
assert_eq!(&expected, we);
102+
}
103+
_ => panic!("test_nullif_str_array failed"),
104+
}
105+
106+
let a =
107+
GenericStringArray::<i64>::from(vec![Some("FOO"), Some("BAR"), Some("KEK")]);
108+
109+
let expected =
110+
GenericStringArray::<i64>::from(vec![Some("FOO"), Some("BAR"), None]);
111+
112+
match nullif_func_str(&[
113+
ColumnarValue::Array(Arc::new(a) as ArrayRef),
114+
ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some("KEK".to_string()))),
115+
])
116+
.unwrap()
117+
{
118+
ColumnarValue::Array(arr) => {
119+
let we = arr
120+
.as_any()
121+
.downcast_ref::<LargeStringArray>()
122+
.expect("failed to downcast array");
123+
assert_eq!(&expected, we);
124+
}
125+
_ => panic!("test_nullif_str_array failed"),
126+
}
127+
}
128+
}

datafusion/physical-expr/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ blake3 = { version = "1.0", optional = true }
4646
chrono = { version = "0.4", default-features = false }
4747
datafusion-common = { path = "../common", version = "7.0.0" }
4848
datafusion-expr = { path = "../expr", version = "7.0.0" }
49+
cube-ext = { path = "../cube_ext", version = "1.0.0" }
4950
hashbrown = { version = "0.12", features = ["raw"] }
5051
lazy_static = { version = "^1.4.0" }
5152
md-5 = { version = "^0.10.0", optional = true }

datafusion/physical-expr/src/expressions/nullif.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ use arrow::compute::kernels::comparison::{
2525
eq, eq_bool, eq_bool_scalar, eq_scalar, eq_utf8, eq_utf8_scalar,
2626
};
2727
use arrow::datatypes::{DataType, TimeUnit};
28+
use cube_ext::nullif_func_str;
2829
use datafusion_common::ScalarValue;
2930
use datafusion_common::{DataFusionError, Result};
3031
use datafusion_expr::ColumnarValue;
@@ -80,6 +81,11 @@ pub fn nullif_func(args: &[ColumnarValue]) -> Result<ColumnarValue> {
8081

8182
let (lhs, rhs) = (&args[0], &args[1]);
8283

84+
match lhs.data_type() {
85+
DataType::Utf8 | DataType::LargeUtf8 => return nullif_func_str(args),
86+
_ => (),
87+
}
88+
8389
match (lhs, rhs) {
8490
(ColumnarValue::Array(lhs), ColumnarValue::Scalar(rhs)) => {
8591
let cond_array = binary_array_op_scalar!(lhs, rhs.clone(), eq).unwrap()?;
@@ -117,6 +123,8 @@ pub static SUPPORTED_NULLIF_TYPES: &[DataType] = &[
117123
DataType::Int64,
118124
DataType::Float32,
119125
DataType::Float64,
126+
DataType::Utf8,
127+
DataType::LargeUtf8,
120128
];
121129

122130
#[cfg(test)]

0 commit comments

Comments
 (0)