Skip to content

Commit f138ea1

Browse files
committed
try to fix
1 parent 3b269cf commit f138ea1

1 file changed

Lines changed: 64 additions & 10 deletions

File tree

datafusion/physical-expr/src/projection.rs

Lines changed: 64 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1120,9 +1120,11 @@ pub fn update_expr(
11201120
RewrittenInvalid,
11211121
}
11221122

1123-
// Track columns introduced by pass 1 (by name and index).
1123+
// Track Arc pointers of columns created by pass 1.
11241124
// These should not be modified by pass 2.
1125-
let mut valid_columns = HashSet::new();
1125+
// We use Arc pointer addresses (not name/index) to distinguish pass-1-created columns
1126+
// from original columns that happen to have the same name and index.
1127+
let mut pass1_created: HashSet<usize> = HashSet::new();
11261128

11271129
// First pass: try to rewrite the expression in terms of the projected expressions.
11281130
// For example, if the expression is `a + b > 5` and the projection is `a + b AS sum_ab`,
@@ -1140,12 +1142,12 @@ pub fn update_expr(
11401142
// If expr is equal to one of the projected expressions, we can short-circuit the rewrite:
11411143
for (idx, projected_expr) in projected_exprs.iter().enumerate() {
11421144
if expr.eq(&projected_expr.expr) {
1143-
// Track this column so pass 2 doesn't modify it
1144-
valid_columns.insert((projected_expr.alias.clone(), idx));
1145-
return Ok(Transformed::yes(Arc::new(Column::new(
1146-
&projected_expr.alias,
1147-
idx,
1148-
)) as _));
1145+
// Create new column and track its Arc pointer so pass 2 doesn't modify it
1146+
let new_col = Arc::new(Column::new(&projected_expr.alias, idx))
1147+
as Arc<dyn PhysicalExpr>;
1148+
// Use data pointer for trait object (ignores vtable)
1149+
pass1_created.insert(Arc::as_ptr(&new_col) as *const () as usize);
1150+
return Ok(Transformed::yes(new_col));
11491151
}
11501152
}
11511153
Ok(Transformed::no(expr))
@@ -1170,8 +1172,9 @@ pub fn update_expr(
11701172

11711173
// Skip columns introduced by pass 1 - they're already valid OUTPUT references.
11721174
// Mark state as valid since pass 1 successfully handled this column.
1173-
if valid_columns.contains(&(column.name().to_string(), column.index()))
1174-
{
1175+
// We check the Arc pointer address to distinguish pass-1-created columns from
1176+
// original columns that might have the same name and index.
1177+
if pass1_created.contains(&(Arc::as_ptr(&expr) as *const () as usize)) {
11751178
state = RewriteState::RewrittenValid;
11761179
return Ok(Transformed::no(expr));
11771180
}
@@ -3018,6 +3021,57 @@ pub(crate) mod tests {
30183021
Ok(())
30193022
}
30203023

3024+
#[test]
3025+
fn test_update_expr_column_name_collision() -> Result<()> {
3026+
// Regression test for a bug where an original column with the same (name, index)
3027+
// as a pass-1-rewritten column would incorrectly be considered "already handled".
3028+
//
3029+
// Example from SQLite tests:
3030+
// - Input schema: [col0, col1, col2]
3031+
// - Projection: [col2 AS col0] (col2@2 becomes col0@0)
3032+
// - Filter: col0 - col2 <= col2 / col2
3033+
//
3034+
// The bug: when pass 1 rewrites col2@2 to col0@0, it added ("col0", 0) to
3035+
// valid_columns. Then in pass 2, the ORIGINAL col0@0 in the filter would
3036+
// match ("col0", 0) and be incorrectly skipped, resulting in:
3037+
// col0 - col0 <= col0 / col0 = 0 - 0 <= 0 / 0 = always true (or NaN)
3038+
// instead of flagging the expression as invalid.
3039+
3040+
// Projection: [col2 AS col0] - note the alias matches another input column's name!
3041+
let projection = vec![ProjectionExpr {
3042+
expr: Arc::new(Column::new("col2", 2)),
3043+
alias: "col0".to_string(), // Alias collides with original col0!
3044+
}];
3045+
3046+
// Filter: col0@0 - col2@2 <= col2@2 / col2@2
3047+
// After correct rewrite, col2@2 becomes col0@0 (via pass 1 match)
3048+
// But col0@0 (the original) can't be resolved since the projection
3049+
// doesn't include it - should return None
3050+
let filter_predicate: Arc<dyn PhysicalExpr> = Arc::new(BinaryExpr::new(
3051+
Arc::new(BinaryExpr::new(
3052+
Arc::new(Column::new("col0", 0)), // Original col0 - NOT in projection!
3053+
Operator::Minus,
3054+
Arc::new(Column::new("col2", 2)), // This will be rewritten to col0@0
3055+
)),
3056+
Operator::LtEq,
3057+
Arc::new(BinaryExpr::new(
3058+
Arc::new(Column::new("col2", 2)),
3059+
Operator::Divide,
3060+
Arc::new(Column::new("col2", 2)),
3061+
)),
3062+
));
3063+
3064+
// With sync_with_child=false: should return None because original col0@0
3065+
// can't be resolved (only col2 is in projection, aliased as col0)
3066+
let result = update_expr(&filter_predicate, &projection, false)?;
3067+
assert!(
3068+
result.is_none(),
3069+
"Should return None when original column collides with rewritten alias but isn't in projection"
3070+
);
3071+
3072+
Ok(())
3073+
}
3074+
30213075
#[test]
30223076
fn test_project_schema_simple_columns() -> Result<()> {
30233077
// Input schema: [col0: Int64, col1: Utf8, col2: Float32]

0 commit comments

Comments
 (0)