@@ -56,7 +56,7 @@ use datafusion_common::{
5656use datafusion_execution:: TaskContext ;
5757use datafusion_expr:: Operator ;
5858use datafusion_physical_expr:: equivalence:: ProjectionMapping ;
59- use datafusion_physical_expr:: expressions:: { BinaryExpr , Column , lit} ;
59+ use datafusion_physical_expr:: expressions:: { BinaryExpr , Column , Literal , lit} ;
6060use datafusion_physical_expr:: intervals:: utils:: check_support;
6161use datafusion_physical_expr:: utils:: { collect_columns, reassign_expr_columns} ;
6262use datafusion_physical_expr:: {
@@ -358,18 +358,37 @@ impl FilterExec {
358358 if let Some ( binary) = conjunction. as_any ( ) . downcast_ref :: < BinaryExpr > ( )
359359 && binary. op ( ) == & Operator :: Eq
360360 {
361- // Filter evaluates to single value for all partitions
362- if input_eqs. is_expr_constant ( binary. left ( ) ) . is_some ( ) {
363- let across = input_eqs
364- . is_expr_constant ( binary. right ( ) )
365- . unwrap_or_default ( ) ;
361+ // Check if either side is constant — either already known
362+ // constant from the input equivalence properties, or a literal
363+ // value (which is inherently constant across all partitions).
364+ let left_const = input_eqs
365+ . is_expr_constant ( binary. left ( ) )
366+ . or_else ( || {
367+ binary. left ( ) . as_any ( ) . downcast_ref :: < Literal > ( ) . map ( |l| {
368+ AcrossPartitions :: Uniform ( Some ( l. value ( ) . clone ( ) ) )
369+ } )
370+ } ) ;
371+ let right_const = input_eqs
372+ . is_expr_constant ( binary. right ( ) )
373+ . or_else ( || {
374+ binary. right ( ) . as_any ( ) . downcast_ref :: < Literal > ( ) . map ( |l| {
375+ AcrossPartitions :: Uniform ( Some ( l. value ( ) . clone ( ) ) )
376+ } )
377+ } ) ;
378+
379+ if let Some ( left_across) = left_const {
380+ // LEFT is constant, so RIGHT must also be constant.
381+ // Use RIGHT's known across value if available, otherwise
382+ // propagate LEFT's (e.g. Uniform from a literal).
383+ let across = right_const. unwrap_or ( left_across) ;
366384 res_constants
367385 . push ( ConstExpr :: new ( Arc :: clone ( binary. right ( ) ) , across) ) ;
368- } else if input_eqs. is_expr_constant ( binary. right ( ) ) . is_some ( ) {
369- let across = input_eqs
370- . is_expr_constant ( binary. left ( ) )
371- . unwrap_or_default ( ) ;
372- res_constants. push ( ConstExpr :: new ( Arc :: clone ( binary. left ( ) ) , across) ) ;
386+ } else if let Some ( right_across) = right_const {
387+ // RIGHT is constant, so LEFT must also be constant.
388+ res_constants. push ( ConstExpr :: new (
389+ Arc :: clone ( binary. left ( ) ) ,
390+ right_across,
391+ ) ) ;
373392 }
374393 }
375394 }
@@ -977,6 +996,19 @@ fn collect_columns_from_predicate_inner(
977996 let predicates = split_conjunction ( predicate) ;
978997 predicates. into_iter ( ) . for_each ( |p| {
979998 if let Some ( binary) = p. as_any ( ) . downcast_ref :: < BinaryExpr > ( ) {
999+ // Only extract pairs where at least one side is a Column reference.
1000+ // Pairs like `complex_expr = literal` should not create equivalence
1001+ // classes — the literal could appear in many unrelated expressions
1002+ // (e.g. sort keys), and normalize_expr's deep traversal would
1003+ // replace those occurrences with the complex expression, corrupting
1004+ // sort orderings. Constant propagation for such pairs is handled
1005+ // separately by `extend_constants`.
1006+ let has_column =
1007+ binary. left ( ) . as_any ( ) . downcast_ref :: < Column > ( ) . is_some ( )
1008+ || binary. right ( ) . as_any ( ) . downcast_ref :: < Column > ( ) . is_some ( ) ;
1009+ if !has_column {
1010+ return ;
1011+ }
9801012 match binary. op ( ) {
9811013 Operator :: Eq => {
9821014 eq_predicate_columns. push ( ( binary. left ( ) , binary. right ( ) ) )
@@ -2011,4 +2043,54 @@ mod tests {
20112043
20122044 Ok ( ( ) )
20132045 }
2046+
2047+ /// Regression test for https://github.com/apache/datafusion/issues/20194
2048+ ///
2049+ /// `collect_columns_from_predicate_inner` should only extract equality
2050+ /// pairs where at least one side is a Column. Pairs like
2051+ /// `complex_expr = literal` must not create equivalence classes because
2052+ /// `normalize_expr`'s deep traversal would replace the literal inside
2053+ /// unrelated expressions (e.g. sort keys) with the complex expression.
2054+ #[ tokio:: test]
2055+ async fn test_collect_columns_skips_non_column_pairs ( ) -> Result < ( ) > {
2056+ let schema = test:: aggr_test_schema ( ) ;
2057+
2058+ // Simulate: nvl(c2, 0) = 0 → (c2 IS DISTINCT FROM 0) = 0
2059+ // Neither side is a Column, so this should NOT be extracted.
2060+ let complex_expr: Arc < dyn PhysicalExpr > = binary (
2061+ col ( "c2" , & schema) ?,
2062+ Operator :: IsDistinctFrom ,
2063+ lit ( 0u32 ) ,
2064+ & schema,
2065+ ) ?;
2066+ let predicate: Arc < dyn PhysicalExpr > = binary (
2067+ complex_expr,
2068+ Operator :: Eq ,
2069+ lit ( 0u32 ) ,
2070+ & schema,
2071+ ) ?;
2072+
2073+ let ( equal_pairs, _) = collect_columns_from_predicate_inner ( & predicate) ;
2074+ assert_eq ! (
2075+ 0 ,
2076+ equal_pairs. len( ) ,
2077+ "Should not extract equality pairs where neither side is a Column"
2078+ ) ;
2079+
2080+ // But col = literal should still be extracted
2081+ let predicate: Arc < dyn PhysicalExpr > = binary (
2082+ col ( "c2" , & schema) ?,
2083+ Operator :: Eq ,
2084+ lit ( 0u32 ) ,
2085+ & schema,
2086+ ) ?;
2087+ let ( equal_pairs, _) = collect_columns_from_predicate_inner ( & predicate) ;
2088+ assert_eq ! (
2089+ 1 ,
2090+ equal_pairs. len( ) ,
2091+ "Should extract equality pairs where one side is a Column"
2092+ ) ;
2093+
2094+ Ok ( ( ) )
2095+ }
20142096}
0 commit comments