Skip to content

Commit 32dfbb0

Browse files
authored
feat: make parse_float_as_decimal work on negative numbers (#7648)
1 parent b1d134e commit 32dfbb0

File tree

3 files changed

+164
-83
lines changed

3 files changed

+164
-83
lines changed

datafusion/sql/src/expr/unary_op.rs

Lines changed: 11 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
1919
use datafusion_common::{not_impl_err, DFSchema, DataFusionError, Result};
20-
use datafusion_expr::{lit, Expr};
20+
use datafusion_expr::Expr;
2121
use sqlparser::ast::{Expr as SQLExpr, UnaryOperator, Value};
2222

2323
impl<'a, S: ContextProvider> SqlToRel<'a, S> {
@@ -39,23 +39,18 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
3939
match expr {
4040
// optimization: if it's a number literal, we apply the negative operator
4141
// here directly to calculate the new literal.
42-
SQLExpr::Value(Value::Number(n, _)) => match n.parse::<i64>() {
43-
Ok(n) => Ok(lit(-n)),
44-
Err(_) => Ok(lit(-n
45-
.parse::<f64>()
46-
.map_err(|_e| {
47-
DataFusionError::Plan(format!(
48-
"negative operator can be only applied to integer and float operands, got: {n}"))
49-
})?)),
50-
},
51-
SQLExpr::Interval(interval) => self.sql_interval_to_expr(
52-
true,
53-
interval,
42+
SQLExpr::Value(Value::Number(n, _)) => {
43+
self.parse_sql_number(&n, true)
44+
}
45+
SQLExpr::Interval(interval) => {
46+
self.sql_interval_to_expr(true, interval, schema, planner_context)
47+
}
48+
// not a literal, apply negative operator on expression
49+
_ => Ok(Expr::Negative(Box::new(self.sql_expr_to_logical_expr(
50+
expr,
5451
schema,
5552
planner_context,
56-
),
57-
// not a literal, apply negative operator on expression
58-
_ => Ok(Expr::Negative(Box::new(self.sql_expr_to_logical_expr(expr, schema, planner_context)?))),
53+
)?))),
5954
}
6055
}
6156
_ => not_impl_err!("Unsupported SQL unary operator {op:?}"),

datafusion/sql/src/expr/value.rs

Lines changed: 61 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ use datafusion_expr::{lit, Expr, Operator};
2727
use log::debug;
2828
use sqlparser::ast::{BinaryOperator, Expr as SQLExpr, Interval, Value};
2929
use sqlparser::parser::ParserError::ParserError;
30+
use std::borrow::Cow;
3031
use std::collections::HashSet;
3132

3233
impl<'a, S: ContextProvider> SqlToRel<'a, S> {
@@ -36,7 +37,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
3637
param_data_types: &[DataType],
3738
) -> Result<Expr> {
3839
match value {
39-
Value::Number(n, _) => self.parse_sql_number(&n),
40+
Value::Number(n, _) => self.parse_sql_number(&n, false),
4041
Value::SingleQuotedString(s) | Value::DoubleQuotedString(s) => Ok(lit(s)),
4142
Value::Null => Ok(Expr::Literal(ScalarValue::Null)),
4243
Value::Boolean(n) => Ok(lit(n)),
@@ -55,35 +56,36 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
5556
}
5657

5758
/// Parse number in sql string, convert to Expr::Literal
58-
fn parse_sql_number(&self, n: &str) -> Result<Expr> {
59-
if let Ok(n) = n.parse::<i64>() {
60-
Ok(lit(n))
61-
} else if let Ok(n) = n.parse::<u64>() {
62-
Ok(lit(n))
63-
} else if self.options.parse_float_as_decimal {
64-
// remove leading zeroes
65-
let str = n.trim_start_matches('0');
66-
if let Some(i) = str.find('.') {
67-
let p = str.len() - 1;
68-
let s = str.len() - i - 1;
69-
let str = str.replace('.', "");
70-
let n = parse_decimal_128_without_scale(&str)?;
71-
Ok(Expr::Literal(ScalarValue::Decimal128(
72-
Some(n),
73-
p as u8,
74-
s as i8,
75-
)))
76-
} else {
77-
let number = parse_decimal_128_without_scale(str)?;
78-
Ok(Expr::Literal(ScalarValue::Decimal128(
79-
Some(number),
80-
str.len() as u8,
81-
0,
82-
)))
59+
pub(super) fn parse_sql_number(
60+
&self,
61+
unsigned_number: &str,
62+
negative: bool,
63+
) -> Result<Expr> {
64+
let signed_number: Cow<str> = if negative {
65+
Cow::Owned(format!("-{unsigned_number}"))
66+
} else {
67+
Cow::Borrowed(unsigned_number)
68+
};
69+
70+
// Try to parse as i64 first, then u64 if negative is false, then decimal or f64
71+
72+
if let Ok(n) = signed_number.parse::<i64>() {
73+
return Ok(lit(n));
74+
}
75+
76+
if !negative {
77+
if let Ok(n) = unsigned_number.parse::<u64>() {
78+
return Ok(lit(n));
8379
}
80+
}
81+
82+
if self.options.parse_float_as_decimal {
83+
parse_decimal_128(unsigned_number, negative)
8484
} else {
85-
n.parse::<f64>().map(lit).map_err(|_| {
86-
DataFusionError::from(ParserError(format!("Cannot parse {n} as f64")))
85+
signed_number.parse::<f64>().map(lit).map_err(|_| {
86+
DataFusionError::from(ParserError(format!(
87+
"Cannot parse {signed_number} as f64"
88+
)))
8789
})
8890
}
8991
}
@@ -391,21 +393,45 @@ const fn try_decode_hex_char(c: u8) -> Option<u8> {
391393
}
392394
}
393395

394-
fn parse_decimal_128_without_scale(s: &str) -> Result<i128> {
395-
let number = s.parse::<i128>().map_err(|e| {
396+
/// Parse Decimal128 from a string
397+
///
398+
/// TODO: support parsing from scientific notation
399+
fn parse_decimal_128(unsigned_number: &str, negative: bool) -> Result<Expr> {
400+
// remove leading zeroes
401+
let trimmed = unsigned_number.trim_start_matches('0');
402+
// parse precision and scale, remove decimal point if exists
403+
let (precision, scale, replaced_str) = if trimmed == "." {
404+
// special cases for numbers such as “0.”, “000.”, and so on.
405+
(1, 0, Cow::Borrowed("0"))
406+
} else if let Some(i) = trimmed.find('.') {
407+
(
408+
trimmed.len() - 1,
409+
trimmed.len() - i - 1,
410+
Cow::Owned(trimmed.replace('.', "")),
411+
)
412+
} else {
413+
// no decimal point, keep as is
414+
(trimmed.len(), 0, Cow::Borrowed(trimmed))
415+
};
416+
417+
let number = replaced_str.parse::<i128>().map_err(|e| {
396418
DataFusionError::from(ParserError(format!(
397-
"Cannot parse {s} as i128 when building decimal: {e}"
419+
"Cannot parse {replaced_str} as i128 when building decimal: {e}"
398420
)))
399421
})?;
400422

401-
const DECIMAL128_MAX_VALUE: i128 = 10_i128.pow(DECIMAL128_MAX_PRECISION as u32) - 1;
402-
if number > DECIMAL128_MAX_VALUE {
423+
// check precision overflow
424+
if precision as u8 > DECIMAL128_MAX_PRECISION {
403425
return Err(DataFusionError::from(ParserError(format!(
404-
"Cannot parse {s} as i128 when building decimal: precision overflow"
426+
"Cannot parse {replaced_str} as i128 when building decimal: precision overflow"
405427
))));
406428
}
407429

408-
Ok(number)
430+
Ok(Expr::Literal(ScalarValue::Decimal128(
431+
Some(if negative { -number } else { number }),
432+
precision as u8,
433+
scale as i8,
434+
)))
409435
}
410436

411437
#[cfg(test)]

datafusion/sqllogictest/test_files/options.slt

Lines changed: 92 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -84,69 +84,129 @@ statement ok
8484
drop table a
8585

8686
# test datafusion.sql_parser.parse_float_as_decimal
87+
#
8788
# default option value is false
88-
query R
89-
select 10000000000000000000.01
89+
query RR
90+
select 10000000000000000000.01, -10000000000000000000.01
9091
----
91-
10000000000000000000
92+
10000000000000000000 -10000000000000000000
9293

93-
query T
94-
select arrow_typeof(10000000000000000000.01)
94+
query TT
95+
select arrow_typeof(10000000000000000000.01), arrow_typeof(-10000000000000000000.01)
96+
----
97+
Float64 Float64
98+
99+
# select 0, i64::MIN, i64::MIN-1, i64::MAX, i64::MAX + 1, u64::MAX, u64::MAX + 1
100+
query IIRIIIR
101+
select 0, -9223372036854775808, -9223372036854775809, 9223372036854775807,
102+
9223372036854775808, 18446744073709551615, 18446744073709551616
95103
----
96-
Float64
104+
0 -9223372036854775808 -9223372036854776000 9223372036854775807 9223372036854775808 18446744073709551615 18446744073709552000
105+
106+
query TTTTTTT
107+
select arrow_typeof(0), arrow_typeof(-9223372036854775808), arrow_typeof(-9223372036854775809),
108+
arrow_typeof(9223372036854775807), arrow_typeof(9223372036854775808),
109+
arrow_typeof(18446744073709551615), arrow_typeof(18446744073709551616)
110+
----
111+
Int64 Int64 Float64 Int64 UInt64 UInt64 Float64
112+
97113

98114
statement ok
99115
set datafusion.sql_parser.parse_float_as_decimal = true;
100116

101-
query R
102-
select 10000000000000000000.01
117+
query RR
118+
select 10000000000000000000.01, -10000000000000000000.01
119+
----
120+
10000000000000000000.01 -10000000000000000000.01
121+
122+
query TT
123+
select arrow_typeof(10000000000000000000.01), arrow_typeof(-10000000000000000000.01)
103124
----
104-
10000000000000000000.01
125+
Decimal128(22, 2) Decimal128(22, 2)
105126

106-
query T
107-
select arrow_typeof(10000000000000000000.01)
127+
# select 0, i64::MIN, i64::MIN-1, i64::MAX, i64::MAX + 1, u64::MAX, u64::MAX + 1
128+
query IIRIIIR
129+
select 0, -9223372036854775808, -9223372036854775809, 9223372036854775807,
130+
9223372036854775808, 18446744073709551615, 18446744073709551616
108131
----
109-
Decimal128(22, 2)
132+
0 -9223372036854775808 -9223372036854775809 9223372036854775807 9223372036854775808 18446744073709551615 18446744073709551616
110133

111-
query R
112-
select 999999999999999999999999999999999999
134+
query TTTTTTT
135+
select arrow_typeof(0), arrow_typeof(-9223372036854775808), arrow_typeof(-9223372036854775809),
136+
arrow_typeof(9223372036854775807), arrow_typeof(9223372036854775808),
137+
arrow_typeof(18446744073709551615), arrow_typeof(18446744073709551616)
113138
----
114-
999999999999999999999999999999999999
139+
Int64 Int64 Decimal128(19, 0) Int64 UInt64 UInt64 Decimal128(20, 0)
115140

116-
query T
117-
select arrow_typeof(999999999999999999999999999999999999)
141+
# special cases
142+
query RRRR
143+
select .0 as c1, 0. as c2, 0000. as c3, 00000.00 as c4
118144
----
119-
Decimal128(36, 0)
145+
0 0 0 0
120146

121-
query R
122-
select 99999999999999999999999999999999999999
147+
query TTTT
148+
select arrow_typeof(.0) as c1, arrow_typeof(0.) as c2, arrow_typeof(0000.) as c3, arrow_typeof(00000.00) as c4
123149
----
124-
99999999999999999999999999999999999999
150+
Decimal128(1, 1) Decimal128(1, 0) Decimal128(1, 0) Decimal128(2, 2)
125151

126-
query T
127-
select arrow_typeof(99999999999999999999999999999999999999)
152+
query RR
153+
select 999999999999999999999999999999999999, -999999999999999999999999999999999999
128154
----
129-
Decimal128(38, 0)
155+
999999999999999999999999999999999999 -999999999999999999999999999999999999
130156

131-
query R
132-
select 9999999999999999999999999999999999.9999
157+
query TT
158+
select arrow_typeof(999999999999999999999999999999999999), arrow_typeof(-999999999999999999999999999999999999)
133159
----
134-
9999999999999999999999999999999999.9999
160+
Decimal128(36, 0) Decimal128(36, 0)
135161

136-
query T
137-
select arrow_typeof(9999999999999999999999999999999999.9999)
162+
query RR
163+
select 99999999999999999999999999999999999999, -99999999999999999999999999999999999999
138164
----
139-
Decimal128(38, 4)
165+
99999999999999999999999999999999999999 -99999999999999999999999999999999999999
166+
167+
query TT
168+
select arrow_typeof(99999999999999999999999999999999999999), arrow_typeof(-99999999999999999999999999999999999999)
169+
----
170+
Decimal128(38, 0) Decimal128(38, 0)
171+
172+
query RR
173+
select 9999999999999999999999999999999999.9999, -9999999999999999999999999999999999.9999
174+
----
175+
9999999999999999999999999999999999.9999 -9999999999999999999999999999999999.9999
176+
177+
query TT
178+
select arrow_typeof(9999999999999999999999999999999999.9999), arrow_typeof(-9999999999999999999999999999999999.9999)
179+
----
180+
Decimal128(38, 4) Decimal128(38, 4)
181+
182+
# leading zeroes
183+
query RRR
184+
select 00009999999999999999999999999999999999.9999, -00009999999999999999999999999999999999.9999, 0018446744073709551616
185+
----
186+
9999999999999999999999999999999999.9999 -9999999999999999999999999999999999.9999 18446744073709551616
187+
188+
query TTT
189+
select arrow_typeof(00009999999999999999999999999999999999.9999),
190+
arrow_typeof(-00009999999999999999999999999999999999.9999),
191+
arrow_typeof(0018446744073709551616)
192+
----
193+
Decimal128(38, 4) Decimal128(38, 4) Decimal128(20, 0)
140194

141195
# precision overflow
142-
statement error SQL error: ParserError\("Cannot parse 123456789012345678901234567890123456789 as i128 when building decimal: precision overflow"\)
196+
statement error DataFusion error: SQL error: ParserError\("Cannot parse 123456789012345678901234567890123456789 as i128 when building decimal: precision overflow"\)
143197
select 123456789.012345678901234567890123456789
144198

199+
statement error SQL error: ParserError\("Cannot parse 123456789012345678901234567890123456789 as i128 when building decimal: precision overflow"\)
200+
select -123456789.012345678901234567890123456789
201+
145202
# can not fit in i128
146203
statement error SQL error: ParserError\("Cannot parse 1234567890123456789012345678901234567890 as i128 when building decimal: number too large to fit in target type"\)
147204
select 123456789.0123456789012345678901234567890
148205

149-
# Restore those to default value again
206+
statement error SQL error: ParserError\("Cannot parse 1234567890123456789012345678901234567890 as i128 when building decimal: number too large to fit in target type"\)
207+
select -123456789.0123456789012345678901234567890
208+
209+
# Restore option to default value
150210
statement ok
151211
set datafusion.sql_parser.parse_float_as_decimal = false;
152212

0 commit comments

Comments
 (0)