Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 53 additions & 4 deletions datafusion/spark/src/function/hash/crc32.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use std::any::Any;
use std::sync::Arc;

use arrow::array::{ArrayRef, Int64Array};
use arrow::datatypes::DataType;
use arrow::datatypes::{DataType, Field, FieldRef};
use crc32fast::Hasher;
use datafusion_common::cast::{
as_binary_array, as_binary_view_array, as_fixed_size_binary_array,
Expand All @@ -29,8 +29,8 @@ use datafusion_common::types::{NativeType, logical_string};
use datafusion_common::utils::take_function_args;
use datafusion_common::{Result, internal_err};
use datafusion_expr::{
Coercion, ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature,
TypeSignatureClass, Volatility,
Coercion, ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl,
Signature, TypeSignatureClass, Volatility,
};
use datafusion_functions::utils::make_scalar_function;

Expand Down Expand Up @@ -75,7 +75,16 @@ impl ScalarUDFImpl for SparkCrc32 {
}

fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
Ok(DataType::Int64)
internal_err!("return_field_from_args should be used instead")
}

fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result<FieldRef> {
let nullable = args.arg_fields.iter().any(|f| f.is_nullable())
|| args
.scalar_arguments
.iter()
.any(|scalar| scalar.is_some_and(|s| s.is_null()));
Ok(Arc::new(Field::new(self.name(), DataType::Int64, nullable)))
}

fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
Expand Down Expand Up @@ -122,3 +131,43 @@ fn spark_crc32(args: &[ArrayRef]) -> Result<ArrayRef> {
}
}
}

#[cfg(test)]
mod tests {
use super::*;
use datafusion_common::ScalarValue;

#[test]
fn test_crc32_nullability() -> Result<()> {
let crc32_func = SparkCrc32::new();

// non-nullable field should produce non-nullable output
let field_not_null = Arc::new(Field::new("data", DataType::Binary, false));
let result = crc32_func.return_field_from_args(ReturnFieldArgs {
arg_fields: std::slice::from_ref(&field_not_null),
scalar_arguments: &[None],
})?;
assert!(!result.is_nullable());
assert_eq!(result.data_type(), &DataType::Int64);

// nullable field should produce nullable output
let field_nullable = Arc::new(Field::new("data", DataType::Binary, true));
let result = crc32_func.return_field_from_args(ReturnFieldArgs {
arg_fields: &[field_nullable],
scalar_arguments: &[None],
})?;
assert!(result.is_nullable());
assert_eq!(result.data_type(), &DataType::Int64);

// null scalar value - user input literal NULL
let scalar_null = ScalarValue::Binary(None);
let result = crc32_func.return_field_from_args(ReturnFieldArgs {
arg_fields: &[field_not_null],
scalar_arguments: &[Some(&scalar_null)],
})?;
assert!(result.is_nullable());
assert_eq!(result.data_type(), &DataType::Int64);

Ok(())
}
}