@@ -57,7 +57,7 @@ use datafusion_common::{
5757use datafusion_execution:: TaskContext ;
5858use datafusion_expr:: Operator ;
5959use datafusion_physical_expr:: equivalence:: ProjectionMapping ;
60- use datafusion_physical_expr:: expressions:: { BinaryExpr , Column , lit} ;
60+ use datafusion_physical_expr:: expressions:: { BinaryExpr , Column , Literal , lit} ;
6161use datafusion_physical_expr:: intervals:: utils:: check_support;
6262use datafusion_physical_expr:: utils:: { collect_columns, reassign_expr_columns} ;
6363use datafusion_physical_expr:: {
@@ -359,18 +359,37 @@ impl FilterExec {
359359 if let Some ( binary) = conjunction. as_any ( ) . downcast_ref :: < BinaryExpr > ( )
360360 && binary. op ( ) == & Operator :: Eq
361361 {
362- // Filter evaluates to single value for all partitions
363- if input_eqs. is_expr_constant ( binary. left ( ) ) . is_some ( ) {
364- let across = input_eqs
365- . is_expr_constant ( binary. right ( ) )
366- . unwrap_or_default ( ) ;
362+ // Check if either side is constant — either already known
363+ // constant from the input equivalence properties, or a literal
364+ // value (which is inherently constant across all partitions).
365+ let left_const =
366+ input_eqs. is_expr_constant ( binary. left ( ) ) . or_else ( || {
367+ binary
368+ . left ( )
369+ . as_any ( )
370+ . downcast_ref :: < Literal > ( )
371+ . map ( |l| AcrossPartitions :: Uniform ( Some ( l. value ( ) . clone ( ) ) ) )
372+ } ) ;
373+ let right_const =
374+ input_eqs. is_expr_constant ( binary. right ( ) ) . or_else ( || {
375+ binary
376+ . right ( )
377+ . as_any ( )
378+ . downcast_ref :: < Literal > ( )
379+ . map ( |l| AcrossPartitions :: Uniform ( Some ( l. value ( ) . clone ( ) ) ) )
380+ } ) ;
381+
382+ if let Some ( left_across) = left_const {
383+ // LEFT is constant, so RIGHT must also be constant.
384+ // Use RIGHT's known across value if available, otherwise
385+ // propagate LEFT's (e.g. Uniform from a literal).
386+ let across = right_const. unwrap_or ( left_across) ;
367387 res_constants
368388 . push ( ConstExpr :: new ( Arc :: clone ( binary. right ( ) ) , across) ) ;
369- } else if input_eqs. is_expr_constant ( binary. right ( ) ) . is_some ( ) {
370- let across = input_eqs
371- . is_expr_constant ( binary. left ( ) )
372- . unwrap_or_default ( ) ;
373- res_constants. push ( ConstExpr :: new ( Arc :: clone ( binary. left ( ) ) , across) ) ;
389+ } else if let Some ( right_across) = right_const {
390+ // RIGHT is constant, so LEFT must also be constant.
391+ res_constants
392+ . push ( ConstExpr :: new ( Arc :: clone ( binary. left ( ) ) , right_across) ) ;
374393 }
375394 }
376395 }
@@ -979,6 +998,18 @@ fn collect_columns_from_predicate_inner(
979998 let predicates = split_conjunction ( predicate) ;
980999 predicates. into_iter ( ) . for_each ( |p| {
9811000 if let Some ( binary) = p. as_any ( ) . downcast_ref :: < BinaryExpr > ( ) {
1001+ // Only extract pairs where at least one side is a Column reference.
1002+ // Pairs like `complex_expr = literal` should not create equivalence
1003+ // classes — the literal could appear in many unrelated expressions
1004+ // (e.g. sort keys), and normalize_expr's deep traversal would
1005+ // replace those occurrences with the complex expression, corrupting
1006+ // sort orderings. Constant propagation for such pairs is handled
1007+ // separately by `extend_constants`.
1008+ let has_column = binary. left ( ) . as_any ( ) . downcast_ref :: < Column > ( ) . is_some ( )
1009+ || binary. right ( ) . as_any ( ) . downcast_ref :: < Column > ( ) . is_some ( ) ;
1010+ if !has_column {
1011+ return ;
1012+ }
9821013 match binary. op ( ) {
9831014 Operator :: Eq => {
9841015 eq_predicate_columns. push ( ( binary. left ( ) , binary. right ( ) ) )
@@ -2066,4 +2097,46 @@ mod tests {
20662097
20672098 Ok ( ( ) )
20682099 }
2100+
2101+ /// Regression test for https://github.com/apache/datafusion/issues/20194
2102+ ///
2103+ /// `collect_columns_from_predicate_inner` should only extract equality
2104+ /// pairs where at least one side is a Column. Pairs like
2105+ /// `complex_expr = literal` must not create equivalence classes because
2106+ /// `normalize_expr`'s deep traversal would replace the literal inside
2107+ /// unrelated expressions (e.g. sort keys) with the complex expression.
2108+ #[ tokio:: test]
2109+ async fn test_collect_columns_skips_non_column_pairs ( ) -> Result < ( ) > {
2110+ let schema = test:: aggr_test_schema ( ) ;
2111+
2112+ // Simulate: nvl(c2, 0) = 0 → (c2 IS DISTINCT FROM 0) = 0
2113+ // Neither side is a Column, so this should NOT be extracted.
2114+ let complex_expr: Arc < dyn PhysicalExpr > = binary (
2115+ col ( "c2" , & schema) ?,
2116+ Operator :: IsDistinctFrom ,
2117+ lit ( 0u32 ) ,
2118+ & schema,
2119+ ) ?;
2120+ let predicate: Arc < dyn PhysicalExpr > =
2121+ binary ( complex_expr, Operator :: Eq , lit ( 0u32 ) , & schema) ?;
2122+
2123+ let ( equal_pairs, _) = collect_columns_from_predicate_inner ( & predicate) ;
2124+ assert_eq ! (
2125+ 0 ,
2126+ equal_pairs. len( ) ,
2127+ "Should not extract equality pairs where neither side is a Column"
2128+ ) ;
2129+
2130+ // But col = literal should still be extracted
2131+ let predicate: Arc < dyn PhysicalExpr > =
2132+ binary ( col ( "c2" , & schema) ?, Operator :: Eq , lit ( 0u32 ) , & schema) ?;
2133+ let ( equal_pairs, _) = collect_columns_from_predicate_inner ( & predicate) ;
2134+ assert_eq ! (
2135+ 1 ,
2136+ equal_pairs. len( ) ,
2137+ "Should extract equality pairs where one side is a Column"
2138+ ) ;
2139+
2140+ Ok ( ( ) )
2141+ }
20692142}
0 commit comments