Skip to content

Commit 822f3fa

Browse files
alambnuno-faria
andauthored
[branch-52] fix: Ensure columns are casted to the correct names with Unions (#20146) (#20879)
- Part of #20855 - Closes #20123 on branch-52 This PR: - Backports #20146 from @nuno-faria to the branch-52 line --------- Co-authored-by: Nuno Faria <nunofpfaria@gmail.com>
1 parent ff4c630 commit 822f3fa

4 files changed

Lines changed: 208 additions & 6 deletions

File tree

datafusion/expr/src/expr_rewriter/mod.rs

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -261,9 +261,16 @@ fn coerce_exprs_for_schema(
261261
#[expect(deprecated)]
262262
Expr::Wildcard { .. } => Ok(expr),
263263
_ => {
264-
// maintain the original name when casting
265-
let name = dst_schema.field(idx).name();
266-
Ok(expr.cast_to(new_type, src_schema)?.alias(name))
264+
match expr {
265+
// maintain the original name when casting a column, to avoid the
266+
// tablename being added to it when not explicitly set by the query
267+
// (see: https://github.com/apache/datafusion/issues/18818)
268+
Expr::Column(ref column) => {
269+
let name = column.name().to_owned();
270+
Ok(expr.cast_to(new_type, src_schema)?.alias(name))
271+
}
272+
_ => Ok(expr.cast_to(new_type, src_schema)?),
273+
}
267274
}
268275
}
269276
} else {

datafusion/optimizer/tests/optimizer_integration.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -543,7 +543,7 @@ fn recursive_cte_projection_pushdown() -> Result<()> {
543543
RecursiveQuery: is_distinct=false
544544
Projection: test.col_int32 AS id
545545
TableScan: test projection=[col_int32]
546-
Projection: CAST(CAST(nodes.id AS Int64) + Int64(1) AS Int32) AS id
546+
Projection: CAST(CAST(nodes.id AS Int64) + Int64(1) AS Int32)
547547
Filter: nodes.id < Int32(3)
548548
TableScan: nodes projection=[id]
549549
"
@@ -567,7 +567,7 @@ fn recursive_cte_with_aliased_self_reference() -> Result<()> {
567567
RecursiveQuery: is_distinct=false
568568
Projection: test.col_int32 AS id
569569
TableScan: test projection=[col_int32]
570-
Projection: CAST(CAST(child.id AS Int64) + Int64(1) AS Int32) AS id
570+
Projection: CAST(CAST(child.id AS Int64) + Int64(1) AS Int32)
571571
SubqueryAlias: child
572572
Filter: nodes.id < Int32(3)
573573
TableScan: nodes projection=[id]
@@ -630,7 +630,7 @@ fn recursive_cte_projection_pushdown_baseline() -> Result<()> {
630630
Projection: test.col_int32 AS n
631631
Filter: test.col_int32 = Int32(5)
632632
TableScan: test projection=[col_int32]
633-
Projection: CAST(CAST(countdown.n AS Int64) - Int64(1) AS Int32) AS n
633+
Projection: CAST(CAST(countdown.n AS Int64) - Int64(1) AS Int32)
634634
Filter: countdown.n > Int32(1)
635635
TableScan: countdown projection=[n]
636636
"

datafusion/substrait/tests/cases/logical_plans.rs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,30 @@ mod tests {
220220
// Trigger execution to ensure plan validity
221221
DataFrame::new(ctx.state(), plan).show().await?;
222222

223+
Ok(())
224+
}
225+
#[tokio::test]
226+
async fn duplicate_name_in_union() -> Result<()> {
227+
let proto_plan =
228+
read_json("tests/testdata/test_plans/duplicate_name_in_union.substrait.json");
229+
let ctx = add_plan_schemas_to_ctx(SessionContext::new(), &proto_plan)?;
230+
let plan = from_substrait_plan(&ctx.state(), &proto_plan).await?;
231+
232+
assert_snapshot!(
233+
plan,
234+
@r"
235+
Projection: foo AS col1, bar AS col2
236+
Union
237+
Projection: foo, bar
238+
Values: (Int64(100), Int64(200))
239+
Projection: x, foo
240+
Values: (Int32(300), Int64(400))
241+
"
242+
);
243+
244+
// Trigger execution to ensure plan validity
245+
DataFrame::new(ctx.state(), plan).show().await?;
246+
223247
Ok(())
224248
}
225249
}
Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
{
2+
"version": {
3+
"minorNumber": 54,
4+
"producer": "datafusion-test"
5+
},
6+
"relations": [
7+
{
8+
"root": {
9+
"input": {
10+
"set": {
11+
"common": {
12+
"direct": {}
13+
},
14+
"inputs": [
15+
{
16+
"project": {
17+
"common": {
18+
"emit": {
19+
"outputMapping": [2, 3]
20+
}
21+
},
22+
"input": {
23+
"read": {
24+
"common": {
25+
"direct": {}
26+
},
27+
"baseSchema": {
28+
"names": ["foo", "bar"],
29+
"struct": {
30+
"types": [
31+
{
32+
"i64": {
33+
"nullability": "NULLABILITY_REQUIRED"
34+
}
35+
},
36+
{
37+
"i64": {
38+
"nullability": "NULLABILITY_REQUIRED"
39+
}
40+
}
41+
],
42+
"nullability": "NULLABILITY_REQUIRED"
43+
}
44+
},
45+
"virtualTable": {
46+
"expressions": [
47+
{
48+
"fields": [
49+
{
50+
"literal": {
51+
"i64": "100"
52+
}
53+
},
54+
{
55+
"literal": {
56+
"i64": "200"
57+
}
58+
}
59+
]
60+
}
61+
]
62+
}
63+
}
64+
},
65+
"expressions": [
66+
{
67+
"selection": {
68+
"directReference": {
69+
"structField": {
70+
"field": 0
71+
}
72+
},
73+
"rootReference": {}
74+
}
75+
},
76+
{
77+
"selection": {
78+
"directReference": {
79+
"structField": {
80+
"field": 1
81+
}
82+
},
83+
"rootReference": {}
84+
}
85+
}
86+
]
87+
}
88+
},
89+
{
90+
"project": {
91+
"common": {
92+
"emit": {
93+
"outputMapping": [2, 3]
94+
}
95+
},
96+
"input": {
97+
"read": {
98+
"common": {
99+
"direct": {}
100+
},
101+
"baseSchema": {
102+
"names": ["x", "foo"],
103+
"struct": {
104+
"types": [
105+
{
106+
"i32": {
107+
"nullability": "NULLABILITY_REQUIRED"
108+
}
109+
},
110+
{
111+
"i64": {
112+
"nullability": "NULLABILITY_REQUIRED"
113+
}
114+
}
115+
],
116+
"nullability": "NULLABILITY_REQUIRED"
117+
}
118+
},
119+
"virtualTable": {
120+
"expressions": [
121+
{
122+
"fields": [
123+
{
124+
"literal": {
125+
"i32": 300
126+
}
127+
},
128+
{
129+
"literal": {
130+
"i64": "400"
131+
}
132+
}
133+
]
134+
}
135+
]
136+
}
137+
}
138+
},
139+
"expressions": [
140+
{
141+
"selection": {
142+
"directReference": {
143+
"structField": {
144+
"field": 0
145+
}
146+
},
147+
"rootReference": {}
148+
}
149+
},
150+
{
151+
"selection": {
152+
"directReference": {
153+
"structField": {
154+
"field": 1
155+
}
156+
},
157+
"rootReference": {}
158+
}
159+
}
160+
]
161+
}
162+
}
163+
],
164+
"op": "SET_OP_UNION_ALL"
165+
}
166+
},
167+
"names": ["col1", "col2"]
168+
}
169+
}
170+
]
171+
}

0 commit comments

Comments
 (0)