Skip to content

Commit 3e4f0e1

Browse files
committed
Revert "fix: respect inexact flags in row group metadata (apache#16412)"
This reverts commit afc90f7.
1 parent 6c11afe commit 3e4f0e1

3 files changed

Lines changed: 7 additions & 31 deletions

File tree

datafusion/core/src/datasource/file_format/parquet.rs

Lines changed: 1 addition & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -523,23 +523,11 @@ mod tests {
523523
let dic_array = DictionaryArray::<Int32Type>::try_new(keys, Arc::new(values))?;
524524
let c_dic: ArrayRef = Arc::new(dic_array);
525525

526-
// Data for column string_truncation: ["a".repeat(128), null, "b".repeat(128), null]
527-
let string_truncation: ArrayRef = Arc::new(StringArray::from(vec![
528-
Some("a".repeat(128)),
529-
None,
530-
Some("b".repeat(128)),
531-
None,
532-
]));
533-
534-
let batch1 = RecordBatch::try_from_iter(vec![
535-
("c_dic", c_dic),
536-
("string_truncation", string_truncation),
537-
])?;
526+
let batch1 = RecordBatch::try_from_iter(vec![("c_dic", c_dic)])?;
538527

539528
// Use store_parquet to write each batch to its own file
540529
// . batch1 written into first file and includes:
541530
// - column c_dic that has 4 rows with no null. Stats min and max of dictionary column is available.
542-
// - column string_truncation that has 4 rows with 2 nulls. Stats min and max of string column is available but not exact.
543531
let store = Arc::new(RequestCountingObjectStore::new(Arc::new(
544532
LocalFileSystem::new(),
545533
)));
@@ -575,19 +563,6 @@ mod tests {
575563
Precision::Exact(Utf8(Some("a".into())))
576564
);
577565

578-
// column string_truncation
579-
let string_truncation_stats = &stats.column_statistics[1];
580-
581-
assert_eq!(string_truncation_stats.null_count, Precision::Exact(2));
582-
assert_eq!(
583-
string_truncation_stats.max_value,
584-
Precision::Inexact(ScalarValue::Utf8View(Some("b".repeat(63) + "c")))
585-
);
586-
assert_eq!(
587-
string_truncation_stats.min_value,
588-
Precision::Inexact(ScalarValue::Utf8View(Some("a".repeat(64))))
589-
);
590-
591566
Ok(())
592567
}
593568

datafusion/datasource-parquet/src/file_format.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,11 @@
1919
2020
use std::any::Any;
2121
use std::cell::RefCell;
22+
use std::fmt;
2223
use std::fmt::Debug;
2324
use std::ops::Range;
2425
use std::rc::Rc;
2526
use std::sync::Arc;
26-
use std::{fmt, vec};
2727

2828
use arrow::array::RecordBatch;
2929
use arrow::datatypes::{Fields, Schema, SchemaRef, TimeUnit};
@@ -36,15 +36,16 @@ use datafusion_datasource::write::{
3636
use datafusion_datasource::file_format::{FileFormat, FileFormatFactory};
3737
use datafusion_datasource::write::demux::DemuxedStreamReceiver;
3838

39+
use arrow::compute::sum;
3940
use arrow::datatypes::{DataType, Field, FieldRef};
4041
use datafusion_common::config::{ConfigField, ConfigFileType, TableParquetOptions};
4142
#[cfg(feature = "parquet_encryption")]
4243
use datafusion_common::encryption::map_config_decryption_to_decryption;
4344
use datafusion_common::encryption::FileDecryptionProperties;
4445
use datafusion_common::parsers::CompressionTypeVariant;
4546
use datafusion_common::{
46-
internal_datafusion_err, internal_err, not_impl_err, DataFusionError, GetExt,
47-
HashSet, Result, DEFAULT_PARQUET_EXTENSION,
47+
internal_datafusion_err, internal_err, not_impl_err, ColumnStatistics,
48+
DataFusionError, GetExt, HashSet, Result, DEFAULT_PARQUET_EXTENSION,
4849
};
4950
use datafusion_common::{HashMap, Statistics};
5051
use datafusion_common_runtime::{JoinSet, SpawnedTask};

datafusion/functions-aggregate/src/min_max.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -736,7 +736,7 @@ macro_rules! min_max {
736736
}
737737

738738
/// An accumulator to compute the maximum value
739-
#[derive(Debug, Clone)]
739+
#[derive(Debug)]
740740
pub struct MaxAccumulator {
741741
max: ScalarValue,
742742
}
@@ -1057,7 +1057,7 @@ impl AggregateUDFImpl for Min {
10571057
}
10581058

10591059
/// An accumulator to compute the minimum value
1060-
#[derive(Debug, Clone)]
1060+
#[derive(Debug)]
10611061
pub struct MinAccumulator {
10621062
min: ScalarValue,
10631063
}

0 commit comments

Comments
 (0)