Skip to content

Commit 6b92fcf

Browse files
Page index udf (#371)
1 parent 233a984 commit 6b92fcf

4 files changed

Lines changed: 664 additions & 1 deletion

File tree

crates/datafusion-app/src/local.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,10 @@ impl ExecutionContext {
115115
"parquet_metadata",
116116
Arc::new(datafusion_functions_parquet::ParquetMetadataFunc {}),
117117
);
118+
session_ctx.register_udtf(
119+
"parquet_page_index",
120+
Arc::new(datafusion_functions_parquet::ParquetPageIndexFunc {}),
121+
);
118122

119123
let catalog = create_app_catalog(config, app_name, app_version)?;
120124
session_ctx.register_catalog(&config.catalog.name, catalog);

crates/datafusion-functions-parquet/Cargo.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,8 @@ arrow = { version = "57" }
1515
async-trait = "0.1.41"
1616
datafusion = { version = "51" }
1717
parquet = { default-features = false, version = "57" }
18+
19+
[dev-dependencies]
20+
parquet = { features = ["arrow"], version = "57" }
21+
tempfile = "3"
22+
tokio = { features = ["macros", "rt"], version = "1" }

crates/datafusion-functions-parquet/src/lib.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,9 @@ use parquet::file::statistics::Statistics;
3737
use std::fs::File;
3838
use std::sync::Arc;
3939

40+
mod page_index;
41+
pub use page_index::ParquetPageIndexFunc;
42+
4043
// Copied from https://github.com/apache/datafusion/blob/main/datafusion-cli/src/functions.rs
4144
/// PARQUET_META table function
4245
#[derive(Debug)]
@@ -219,7 +222,7 @@ impl TableFunctionImpl for ParquetMetadataFunc {
219222
column_id_arr.push(col_idx as i64);
220223
file_offset_arr.push(column.file_offset());
221224
num_values_arr.push(column.num_values());
222-
path_in_schema_arr.push(column.column_path().to_string());
225+
path_in_schema_arr.push(column.column_path().string());
223226
type_arr.push(column.column_type().to_string());
224227
logical_type_arr.push(
225228
column

0 commit comments

Comments
 (0)