Skip to content

Commit cc3978e

Browse files
authored
Minor: add ticket references to parallel parquet writing code (#7592)
1 parent 22d03c1 commit cc3978e

File tree

1 file changed

+6
-0
lines changed
  • datafusion/core/src/datasource/file_format

1 file changed

+6
-0
lines changed

datafusion/core/src/datasource/file_format/parquet.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -844,6 +844,8 @@ async fn output_single_parquet_file_parallelized(
844844
parquet_props: &WriterProperties,
845845
) -> Result<usize> {
846846
let mut row_count = 0;
847+
// TODO decrease parallelism / buffering:
848+
// https://github.com/apache/arrow-datafusion/issues/7591
847849
let parallelism = data.len();
848850
let mut join_handles: Vec<JoinHandle<ParquetFileSerializedResult>> =
849851
Vec::with_capacity(parallelism);
@@ -877,6 +879,8 @@ async fn output_single_parquet_file_parallelized(
877879
>,
878880
> = tokio::task::spawn(async move {
879881
while let Some(data) = rx.recv().await {
882+
// TODO write incrementally
883+
// https://github.com/apache/arrow-datafusion/issues/7591
880884
object_store_writer.write_all(data.as_slice()).await?;
881885
}
882886
Ok(object_store_writer)
@@ -913,6 +917,8 @@ async fn output_single_parquet_file_parallelized(
913917
bytes_written: column.compressed_size() as _,
914918
rows_written: rg.num_rows() as _,
915919
metadata: column.clone(),
920+
// TODO need to populate the indexes when writing final file
921+
// see https://github.com/apache/arrow-datafusion/issues/7589
916922
bloom_filter: None,
917923
column_index: None,
918924
offset_index: None,

0 commit comments

Comments
 (0)