Skip to content

Commit 16621c4

Browse files
DrakeLinrtyler
authored andcommitted
Decimal fix
Signed-off-by: DrakeLin <drakelin18@gmail.com>
1 parent 2e5f7b0 commit 16621c4

1 file changed

Lines changed: 50 additions & 5 deletions

File tree

crates/core/src/writer/stats.rs

Lines changed: 50 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,8 @@ enum StatsScalar {
244244
Date(chrono::NaiveDate),
245245
Timestamp(chrono::NaiveDateTime),
246246
// We are serializing to f64 later and the ordering should be the same
247-
Decimal(f64),
247+
// Scale is stored to handle scale=0 serialization correctly
248+
Decimal { value: f64, scale: i32 },
248249
String(String),
249250
Bytes(Vec<u8>),
250251
Uuid(uuid::Uuid),
@@ -277,7 +278,10 @@ impl StatsScalar {
277278
(Statistics::Int32(v), Some(LogicalType::Decimal { scale, .. })) => {
278279
let val = get_stat!(v) as f64 / 10.0_f64.powi(*scale);
279280
// Spark serializes these as numbers
280-
Ok(Self::Decimal(val))
281+
Ok(Self::Decimal {
282+
value: val,
283+
scale: *scale,
284+
})
281285
}
282286
(Statistics::Int32(v), _) => Ok(Self::Int32(get_stat!(v))),
283287
// Int64 can be timestamp, decimal, or integer
@@ -304,7 +308,10 @@ impl StatsScalar {
304308
(Statistics::Int64(v), Some(LogicalType::Decimal { scale, .. })) => {
305309
let val = get_stat!(v) as f64 / 10.0_f64.powi(*scale);
306310
// Spark serializes these as numbers
307-
Ok(Self::Decimal(val))
311+
Ok(Self::Decimal {
312+
value: val,
313+
scale: *scale,
314+
})
308315
}
309316
(Statistics::Int64(v), _) => Ok(Self::Int64(get_stat!(v))),
310317
(Statistics::Float(v), _) => Ok(Self::Float32(get_stat!(v))),
@@ -362,7 +369,10 @@ impl StatsScalar {
362369
val = f64::from_bits(val.to_bits() - 1);
363370
}
364371

365-
Ok(Self::Decimal(val))
372+
Ok(Self::Decimal {
373+
value: val,
374+
scale: *scale,
375+
})
366376
}
367377
(Statistics::FixedLenByteArray(v), Some(LogicalType::Uuid)) => {
368378
let val = if use_min {
@@ -418,7 +428,15 @@ impl From<StatsScalar> for serde_json::Value {
418428
StatsScalar::Timestamp(v) => {
419429
serde_json::Value::from(v.format("%Y-%m-%dT%H:%M:%S%.fZ").to_string())
420430
}
421-
StatsScalar::Decimal(v) => serde_json::Value::from(v),
431+
StatsScalar::Decimal { value, scale } => {
432+
// For scale=0, serialize as integer since serde_json would otherwise
433+
// serialize f64 as "1234.0" instead of "1234"
434+
if scale == 0 {
435+
serde_json::Value::from(value.round() as i64)
436+
} else {
437+
serde_json::Value::from(value)
438+
}
439+
}
422440
StatsScalar::String(v) => serde_json::Value::from(v),
423441
StatsScalar::Bytes(v) => {
424442
let escaped_bytes = v
@@ -678,6 +696,14 @@ mod tests {
678696
}),
679697
Value::from(12340.0),
680698
),
699+
(
700+
simple_parquet_stat!(Statistics::Int32, 1234),
701+
Some(LogicalType::Decimal {
702+
scale: 0,
703+
precision: 4,
704+
}),
705+
Value::from(1234),
706+
),
681707
(
682708
simple_parquet_stat!(Statistics::Int32, 10561),
683709
Some(LogicalType::Date),
@@ -723,6 +749,14 @@ mod tests {
723749
}),
724750
Value::from(12340.0),
725751
),
752+
(
753+
simple_parquet_stat!(Statistics::Int64, 1234),
754+
Some(LogicalType::Decimal {
755+
scale: 0,
756+
precision: 4,
757+
}),
758+
Value::from(1234),
759+
),
726760
(
727761
simple_parquet_stat!(Statistics::Int64, 1234),
728762
None,
@@ -760,6 +794,17 @@ mod tests {
760794
}),
761795
Value::from(10.0),
762796
),
797+
(
798+
simple_parquet_stat!(
799+
Statistics::FixedLenByteArray,
800+
FixedLenByteArray::from(1234i128.to_be_bytes().to_vec())
801+
),
802+
Some(LogicalType::Decimal {
803+
scale: 0,
804+
precision: 4,
805+
}),
806+
Value::from(1234),
807+
),
763808
(
764809
simple_parquet_stat!(
765810
Statistics::FixedLenByteArray,

0 commit comments

Comments
 (0)