@@ -394,7 +394,14 @@ impl Snapshot {
394394 engine : Arc < dyn Engine > ,
395395 predicate : Option < PredicateRef > ,
396396 ) -> SendableRBStream {
397- let scan = match self . scan_builder ( ) . with_predicate ( predicate) . build ( ) {
397+ self . warn_if_skip_stats_with_predicate ( & predicate) ;
398+ let skip_stats = predicate. is_none ( ) && self . config . skip_stats ;
399+ let scan = match self
400+ . scan_builder ( )
401+ . with_predicate ( predicate)
402+ . with_skip_stats ( skip_stats)
403+ . build ( )
404+ {
398405 Ok ( scan) => scan,
399406 Err ( err) => return Box :: pin ( once ( ready ( Err ( err) ) ) ) ,
400407 } ;
@@ -403,12 +410,23 @@ impl Snapshot {
403410 . scan_metadata ( engine)
404411 . map ( |d| Ok ( rb_from_scan_meta ( d?) ?) ) ;
405412
406- match ScanRowOutStream :: try_new ( self . inner . clone ( ) , stream) {
413+ match ScanRowOutStream :: try_new ( self . inner . clone ( ) , stream, skip_stats ) {
407414 Ok ( s) => s. boxed ( ) ,
408415 Err ( err) => Box :: pin ( once ( ready ( Err ( err) ) ) ) ,
409416 }
410417 }
411418
419+ fn warn_if_skip_stats_with_predicate ( & self , predicate : & Option < PredicateRef > ) {
420+ if self . config . skip_stats && predicate. is_some ( ) {
421+ tracing:: warn!(
422+ "`DeltaTable` was opened with `skip_stats=true`, but this query has \
423+ a predicate. Every file in the table will be scanned. To avoid \
424+ this, open a separate `DeltaTable` without `skip_stats=true` for \
425+ query workloads."
426+ ) ;
427+ }
428+ }
429+
412430 pub ( crate ) fn files_from < T : Iterator < Item = RecordBatch > + Send + ' static > (
413431 & self ,
414432 engine : Arc < dyn Engine > ,
@@ -417,7 +435,14 @@ impl Snapshot {
417435 existing_data : Box < T > ,
418436 existing_predicate : Option < PredicateRef > ,
419437 ) -> SendableRBStream {
420- let scan = match self . scan_builder ( ) . with_predicate ( predicate) . build ( ) {
438+ self . warn_if_skip_stats_with_predicate ( & predicate) ;
439+ let skip_stats = predicate. is_none ( ) && self . config . skip_stats ;
440+ let scan = match self
441+ . scan_builder ( )
442+ . with_predicate ( predicate)
443+ . with_skip_stats ( skip_stats)
444+ . build ( )
445+ {
421446 Ok ( scan) => scan,
422447 Err ( err) => return Box :: pin ( once ( ready ( Err ( err) ) ) ) ,
423448 } ;
@@ -426,7 +451,7 @@ impl Snapshot {
426451 . scan_metadata_from ( engine, existing_version, existing_data, existing_predicate)
427452 . map ( |d| Ok ( rb_from_scan_meta ( d?) ?) ) ;
428453
429- match ScanRowOutStream :: try_new ( self . inner . clone ( ) , stream) {
454+ match ScanRowOutStream :: try_new ( self . inner . clone ( ) , stream, skip_stats ) {
430455 Ok ( s) => s. boxed ( ) ,
431456 Err ( err) => Box :: pin ( once ( ready ( Err ( err) ) ) ) ,
432457 }
@@ -1046,7 +1071,7 @@ mod tests {
10461071 // use super::replay::tests::test_log_replay;
10471072 use super :: * ;
10481073 use crate :: {
1049- DeltaTable , checkpoints,
1074+ DeltaTable , DeltaTableConfig , checkpoints,
10501075 kernel:: transaction:: CommitData ,
10511076 kernel:: transaction:: { CommitBuilder , TableReference } ,
10521077 kernel:: { Action , DataType , PrimitiveType , StructField , StructType } ,
@@ -1432,6 +1457,82 @@ mod tests {
14321457 Ok ( ( ) )
14331458 }
14341459
1460+ #[ tokio:: test]
1461+ async fn test_file_views_skip_stats_same_paths ( ) -> TestResult {
1462+ let base = TestTables :: Checkpoints . table_builder ( ) ?. build_storage ( ) ?;
1463+ let mut skip_cfg = DeltaTableConfig :: default ( ) ;
1464+ skip_cfg. skip_stats = true ;
1465+ let with_skip = EagerSnapshot :: try_new ( base. as_ref ( ) , skip_cfg, Some ( 12 ) ) . await ?;
1466+ let full = EagerSnapshot :: try_new ( base. as_ref ( ) , Default :: default ( ) , Some ( 12 ) ) . await ?;
1467+ let mut paths_skip: Vec < String > = with_skip
1468+ . file_views ( base. as_ref ( ) , None )
1469+ . map_ok ( |v| v. path ( ) . to_string ( ) )
1470+ . try_collect ( )
1471+ . await ?;
1472+ let mut paths_full: Vec < String > = full
1473+ . file_views ( base. as_ref ( ) , None )
1474+ . map_ok ( |v| v. path ( ) . to_string ( ) )
1475+ . try_collect ( )
1476+ . await ?;
1477+ paths_skip. sort ( ) ;
1478+ paths_full. sort ( ) ;
1479+ assert_eq ! ( paths_skip, paths_full) ;
1480+ Ok ( ( ) )
1481+ }
1482+
1483+ #[ tokio:: test]
1484+ async fn test_skip_stats_leaves_stats_parsed_null ( ) -> TestResult {
1485+ let base = TestTables :: Checkpoints . table_builder ( ) ?. build_storage ( ) ?;
1486+
1487+ let default_eager =
1488+ EagerSnapshot :: try_new ( base. as_ref ( ) , Default :: default ( ) , Some ( 12 ) ) . await ?;
1489+ let default_stats: Vec < bool > = default_eager
1490+ . file_views ( base. as_ref ( ) , None )
1491+ . map_ok ( |view| view. stats ( ) . is_some ( ) )
1492+ . try_collect ( )
1493+ . await ?;
1494+ assert ! ( !default_stats. is_empty( ) ) ;
1495+ assert ! ( default_stats. iter( ) . any( |b| * b) ) ;
1496+
1497+ let mut skip_cfg = DeltaTableConfig :: default ( ) ;
1498+ skip_cfg. skip_stats = true ;
1499+ let skip_eager = EagerSnapshot :: try_new ( base. as_ref ( ) , skip_cfg, Some ( 12 ) ) . await ?;
1500+ let skip_stats: Vec < Option < String > > = skip_eager
1501+ . file_views ( base. as_ref ( ) , None )
1502+ . map_ok ( |view| view. stats ( ) )
1503+ . try_collect ( )
1504+ . await ?;
1505+ assert ! ( !skip_stats. is_empty( ) ) ;
1506+ assert ! ( skip_stats. iter( ) . all( |s| s. is_none( ) ) ) ;
1507+
1508+ Ok ( ( ) )
1509+ }
1510+
1511+ #[ tokio:: test]
1512+ async fn test_skip_stats_bypassed_when_predicate_present ( ) -> TestResult {
1513+ use delta_kernel:: expressions:: Scalar ;
1514+
1515+ let base = TestTables :: Checkpoints . table_builder ( ) ?. build_storage ( ) ?;
1516+
1517+ let mut skip_cfg = DeltaTableConfig :: default ( ) ;
1518+ skip_cfg. skip_stats = true ;
1519+ let snapshot = Snapshot :: try_new ( base. as_ref ( ) , skip_cfg, Some ( 12 ) ) . await ?;
1520+
1521+ let predicate: PredicateRef =
1522+ Arc :: new ( Expression :: column ( [ "value" ] ) . gt ( Scalar :: String ( "" . to_string ( ) ) ) ) ;
1523+
1524+ let has_stats: Vec < bool > = snapshot
1525+ . file_views ( base. as_ref ( ) , Some ( predicate) )
1526+ . map_ok ( |view| view. stats ( ) . is_some ( ) )
1527+ . try_collect ( )
1528+ . await ?;
1529+
1530+ assert ! ( !has_stats. is_empty( ) ) ;
1531+ assert ! ( has_stats. iter( ) . any( |b| * b) ) ;
1532+
1533+ Ok ( ( ) )
1534+ }
1535+
14351536 #[ test]
14361537 fn test_materialized_files_full_table_seed_shares_batches ( ) {
14371538 let batch = RecordBatch :: new_empty ( Arc :: new ( arrow_schema:: Schema :: empty ( ) ) ) ;
0 commit comments