Skip to content

Commit 9522508

Browse files
authored
Redesign json array streaming for datafusion (#31)
1 parent dbf7781 commit 9522508

26 files changed

Lines changed: 1616 additions & 474 deletions

File tree

Cargo.lock

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,7 @@ tempfile = "3"
181181
testcontainers = { version = "0.25.2", features = ["default"] }
182182
testcontainers-modules = { version = "0.13" }
183183
tokio = { version = "1.48", features = ["macros", "rt", "sync"] }
184+
tokio-stream = "0.1"
184185
url = "2.5.7"
185186

186187
[workspace.lints.clippy]

datafusion-examples/examples/csv_json_opener.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ async fn json_opener() -> Result<()> {
121121
projected,
122122
FileCompressionType::UNCOMPRESSED,
123123
Arc::new(object_store),
124-
false,
124+
true,
125125
);
126126

127127
let scan_config = FileScanConfigBuilder::new(

datafusion/common/src/config.rs

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2813,23 +2813,22 @@ config_namespace! {
28132813
pub struct JsonOptions {
28142814
pub compression: CompressionTypeVariant, default = CompressionTypeVariant::UNCOMPRESSED
28152815
pub schema_infer_max_rec: Option<usize>, default = None
2816-
pub compression_level: Option<usize>, default = None
2817-
/// The format of JSON input files.
2818-
///
2819-
/// When `false` (default), expects newline-delimited JSON (NDJSON):
2820-
/// ```text
2821-
/// {"key1": 1, "key2": "val"}
2822-
/// {"key1": 2, "key2": "vals"}
2823-
/// ```
2824-
///
2825-
/// When `true`, expects JSON array format:
2826-
/// ```text
2827-
/// [
2828-
/// {"key1": 1, "key2": "val"},
2829-
/// {"key1": 2, "key2": "vals"}
2830-
/// ]
2831-
/// ```
2832-
pub format_array: bool, default = false
2816+
/// The JSON format to use when reading files.
2817+
///
2818+
/// When `true` (default), expects newline-delimited JSON (NDJSON):
2819+
/// ```text
2820+
/// {"key1": 1, "key2": "val"}
2821+
/// {"key1": 2, "key2": "vals"}
2822+
/// ```
2823+
///
2824+
/// When `false`, expects JSON array format:
2825+
/// ```text
2826+
/// [
2827+
/// {"key1": 1, "key2": "val"},
2828+
/// {"key1": 2, "key2": "vals"}
2829+
/// ]
2830+
/// ```
2831+
pub newline_delimited: bool, default = true
28332832
}
28342833
}
28352834

datafusion/core/src/dataframe/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -479,7 +479,7 @@ impl DataFrame {
479479
/// # #[tokio::main]
480480
/// # async fn main() -> Result<()> {
481481
/// let ctx = SessionContext::new();
482-
/// let df = ctx.read_json("tests/data/unnest.json", NdJsonReadOptions::default()).await?;
482+
/// let df = ctx.read_json("tests/data/unnest.json", JsonReadOptions::default()).await?;
483483
/// // expand into multiple columns if it's json array, flatten field name if it's nested structure
484484
/// let df = df.unnest_columns(&["b","c","d"])?;
485485
/// let expected = vec![

0 commit comments

Comments
 (0)