Skip to content

Commit 6a1f409

Browse files
committed
chore: enable snappy compression on checkpoints
our checkpoints are larger than they need to be, and this can lead to longer I/O times when loading large unoptimized checkpoints from storage Signed-off-by: R. Tyler Croy <rtyler@brokenco.de>
1 parent 5950bbc commit 6a1f409

1 file changed

Lines changed: 10 additions & 1 deletion

File tree

crates/core/src/protocol/checkpoints.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
33
use std::sync::LazyLock;
44

5+
use parquet::file::properties::WriterProperties;
56
use url::Url;
67

78
use arrow::compute::filter_record_batch;
@@ -65,7 +66,15 @@ pub(crate) async fn create_checkpoint_for(
6566

6667
let root_store = log_store.root_object_store(operation_id);
6768
let object_store_writer = ParquetObjectWriter::new(root_store.clone(), cp_path.clone());
68-
let mut writer = AsyncArrowWriter::try_new(object_store_writer, first_batch.schema(), None)?;
69+
let mut writer = AsyncArrowWriter::try_new(
70+
object_store_writer,
71+
first_batch.schema(),
72+
Some(
73+
WriterProperties::builder()
74+
.set_compression(parquet::basic::Compression::SNAPPY)
75+
.build(),
76+
),
77+
)?;
6978
writer.write(&first_batch).await?;
7079

7180
// Hold onto the schema used for future batches.

0 commit comments

Comments
 (0)