Skip to content

Commit 42fa12b

Browse files
authored
Merge branch 'main' into alamb/cleanup_inlist_builder
2 parents 10dd0a1 + b9328b9 commit 42fa12b

26 files changed

Lines changed: 1096 additions & 162 deletions

File tree

.github/workflows/rust.yml

Lines changed: 27 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
# specific language governing permissions and limitations
1616
# under the License.
1717

18+
# For some actions, we use Runs-On to run them on ASF infrastructure: https://datafusion.apache.org/contributor-guide/#ci-runners
19+
1820
name: Rust
1921

2022
concurrency:
@@ -45,7 +47,7 @@ jobs:
4547
# Check crate compiles and base cargo check passes
4648
linux-build-lib:
4749
name: linux build test
48-
runs-on: ${{ github.repository_owner == 'apache' && format('runs-on={0},family=m7a,cpu=16,image=ubuntu24-full-x64,extras=s3-cache,disk=large,tag=datafusion', github.run_id) || 'ubuntu-latest' }}
50+
runs-on: ${{ github.repository_owner == 'apache' && format('runs-on={0},family=m8a,cpu=8,image=ubuntu24-full-x64,extras=s3-cache,disk=large,tag=datafusion', github.run_id) || 'ubuntu-latest' }}
4951
container:
5052
image: amd64/rust
5153
steps:
@@ -99,7 +101,7 @@ jobs:
99101
linux-datafusion-substrait-features:
100102
name: cargo check datafusion-substrait features
101103
needs: linux-build-lib
102-
runs-on: ubuntu-latest
104+
runs-on: ${{ github.repository_owner == 'apache' && format('runs-on={0},family=m8a,cpu=16,image=ubuntu24-full-x64,extras=s3-cache,disk=large,tag=datafusion', github.run_id) || 'ubuntu-latest' }}
103105
container:
104106
image: amd64/rust
105107
steps:
@@ -136,10 +138,11 @@ jobs:
136138
linux-datafusion-proto-features:
137139
name: cargo check datafusion-proto features
138140
needs: linux-build-lib
139-
runs-on: ubuntu-latest
141+
runs-on: ${{ github.repository_owner == 'apache' && format('runs-on={0},family=m8a,cpu=16,image=ubuntu24-full-x64,extras=s3-cache,disk=large,tag=datafusion', github.run_id) || 'ubuntu-latest' }}
140142
container:
141143
image: amd64/rust
142144
steps:
145+
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # v2.0.3
143146
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
144147
- name: Setup Rust toolchain
145148
uses: ./.github/actions/setup-builder
@@ -167,10 +170,11 @@ jobs:
167170
linux-cargo-check-datafusion:
168171
name: cargo check datafusion features
169172
needs: linux-build-lib
170-
runs-on: ubuntu-latest
173+
runs-on: ${{ github.repository_owner == 'apache' && format('runs-on={0},family=m8a,cpu=16,image=ubuntu24-full-x64,extras=s3-cache,disk=large,tag=datafusion', github.run_id) || 'ubuntu-latest' }}
171174
container:
172175
image: amd64/rust
173176
steps:
177+
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # v2.0.3
174178
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
175179
- name: Setup Rust toolchain
176180
uses: ./.github/actions/setup-builder
@@ -267,7 +271,7 @@ jobs:
267271
linux-test:
268272
name: cargo test (amd64)
269273
needs: linux-build-lib
270-
runs-on: ${{ github.repository_owner == 'apache' && format('runs-on={0},family=m7a,cpu=16,image=ubuntu24-full-x64,extras=s3-cache,disk=large,tag=datafusion', github.run_id) || 'ubuntu-latest' }}
274+
runs-on: ${{ github.repository_owner == 'apache' && format('runs-on={0},family=m8a,cpu=16,image=ubuntu24-full-x64,extras=s3-cache,disk=large,tag=datafusion', github.run_id) || 'ubuntu-latest' }}
271275
container:
272276
image: amd64/rust
273277
volumes:
@@ -318,8 +322,9 @@ jobs:
318322
linux-test-datafusion-cli:
319323
name: cargo test datafusion-cli (amd64)
320324
needs: linux-build-lib
321-
runs-on: ubuntu-latest
325+
runs-on: ${{ github.repository_owner == 'apache' && format('runs-on={0},family=m8a,cpu=16,image=ubuntu24-full-x64,extras=s3-cache,disk=large,tag=datafusion', github.run_id) || 'ubuntu-latest' }}
322326
steps:
327+
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # v2.0.3
323328
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
324329
with:
325330
submodules: true
@@ -347,10 +352,11 @@ jobs:
347352
linux-test-example:
348353
name: cargo examples (amd64)
349354
needs: linux-build-lib
350-
runs-on: ubuntu-latest
355+
runs-on: ${{ github.repository_owner == 'apache' && format('runs-on={0},family=m8a,cpu=16,image=ubuntu24-full-x64,extras=s3-cache,disk=large,tag=datafusion', github.run_id) || 'ubuntu-latest' }}
351356
container:
352357
image: amd64/rust
353358
steps:
359+
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # v2.0.3
354360
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
355361
with:
356362
submodules: true
@@ -377,10 +383,11 @@ jobs:
377383
linux-test-doc:
378384
name: cargo test doc (amd64)
379385
needs: linux-build-lib
380-
runs-on: ubuntu-latest
386+
runs-on: ${{ github.repository_owner == 'apache' && format('runs-on={0},family=m8a,cpu=16,image=ubuntu24-full-x64,extras=s3-cache,disk=large,tag=datafusion', github.run_id) || 'ubuntu-latest' }}
381387
container:
382388
image: amd64/rust
383389
steps:
390+
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # v2.0.3
384391
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
385392
with:
386393
submodules: true
@@ -398,10 +405,11 @@ jobs:
398405
linux-rustdoc:
399406
name: cargo doc
400407
needs: linux-build-lib
401-
runs-on: ubuntu-latest
408+
runs-on: ${{ github.repository_owner == 'apache' && format('runs-on={0},family=m8a,cpu=16,image=ubuntu24-full-x64,extras=s3-cache,disk=large,tag=datafusion', github.run_id) || 'ubuntu-latest' }}
402409
container:
403410
image: amd64/rust
404411
steps:
412+
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # v2.0.3
405413
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
406414
- name: Setup Rust toolchain
407415
uses: ./.github/actions/setup-builder
@@ -438,10 +446,11 @@ jobs:
438446
verify-benchmark-results:
439447
name: verify benchmark results (amd64)
440448
needs: linux-build-lib
441-
runs-on: ubuntu-latest
449+
runs-on: ${{ github.repository_owner == 'apache' && format('runs-on={0},family=m8a,cpu=16,image=ubuntu24-full-x64,extras=s3-cache,disk=large,tag=datafusion', github.run_id) || 'ubuntu-latest' }}
442450
container:
443451
image: amd64/rust
444452
steps:
453+
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # v2.0.3
445454
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
446455
with:
447456
submodules: true
@@ -471,7 +480,7 @@ jobs:
471480
sqllogictest-postgres:
472481
name: "Run sqllogictest with Postgres runner"
473482
needs: linux-build-lib
474-
runs-on: ubuntu-latest
483+
runs-on: ${{ github.repository_owner == 'apache' && format('runs-on={0},family=m8a,cpu=16,image=ubuntu24-full-x64,extras=s3-cache,disk=large,tag=datafusion', github.run_id) || 'ubuntu-latest' }}
475484
container:
476485
image: amd64/rust
477486
services:
@@ -489,6 +498,7 @@ jobs:
489498
--health-timeout 5s
490499
--health-retries 5
491500
steps:
501+
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # v2.0.3
492502
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
493503
with:
494504
submodules: true
@@ -509,10 +519,11 @@ jobs:
509519
sqllogictest-substrait:
510520
name: "Run sqllogictest in Substrait round-trip mode"
511521
needs: linux-build-lib
512-
runs-on: ubuntu-latest
522+
runs-on: ${{ github.repository_owner == 'apache' && format('runs-on={0},family=m8a,cpu=16,image=ubuntu24-full-x64,extras=s3-cache,disk=large,tag=datafusion', github.run_id) || 'ubuntu-latest' }}
513523
container:
514524
image: amd64/rust
515525
steps:
526+
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # v2.0.3
516527
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
517528
with:
518529
submodules: true
@@ -639,10 +650,11 @@ jobs:
639650
clippy:
640651
name: clippy
641652
needs: linux-build-lib
642-
runs-on: ubuntu-latest
653+
runs-on: ${{ github.repository_owner == 'apache' && format('runs-on={0},family=m8a,cpu=16,image=ubuntu24-full-x64,extras=s3-cache,disk=large,tag=datafusion', github.run_id) || 'ubuntu-latest' }}
643654
container:
644655
image: amd64/rust
645656
steps:
657+
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # v2.0.3
646658
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
647659
with:
648660
submodules: true
@@ -685,10 +697,11 @@ jobs:
685697
config-docs-check:
686698
name: check configs.md and ***_functions.md is up-to-date
687699
needs: linux-build-lib
688-
runs-on: ubuntu-latest
700+
runs-on: ${{ github.repository_owner == 'apache' && format('runs-on={0},family=m8a,cpu=16,image=ubuntu24-full-x64,extras=s3-cache,disk=large,tag=datafusion', github.run_id) || 'ubuntu-latest' }}
689701
container:
690702
image: amd64/rust
691703
steps:
704+
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # v2.0.3
692705
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
693706
with:
694707
submodules: true

Cargo.lock

Lines changed: 5 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ regex = "1.12"
183183
rstest = "0.26.1"
184184
serde_json = "1"
185185
sha2 = "^0.10.9"
186-
sqlparser = { version = "0.60.0", default-features = false, features = ["std", "visitor"] }
186+
sqlparser = { version = "0.61.0", default-features = false, features = ["std", "visitor"] }
187187
strum = "0.27.2"
188188
strum_macros = "0.27.2"
189189
tempfile = "3"

datafusion-cli/src/object_storage.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -749,7 +749,6 @@ mod tests {
749749
eprintln!("{e}");
750750
return Ok(());
751751
}
752-
let expected_region = "eu-central-1";
753752
let location = "s3://test-bucket/path/file.parquet";
754753
// Set it to a non-existent file to avoid reading the default configuration file
755754
unsafe {
@@ -766,9 +765,10 @@ mod tests {
766765
get_s3_object_store_builder(table_url.as_ref(), &aws_options, false).await?;
767766

768767
// Verify that the region was auto-detected in test environment
769-
assert_eq!(
770-
builder.get_config_value(&AmazonS3ConfigKey::Region),
771-
Some(expected_region.to_string())
768+
assert!(
769+
builder
770+
.get_config_value(&AmazonS3ConfigKey::Region)
771+
.is_some()
772772
);
773773

774774
Ok(())

datafusion-cli/tests/cli_integration.rs

Lines changed: 40 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ fn make_settings() -> Settings {
4444
settings
4545
}
4646

47-
async fn setup_minio_container() -> ContainerAsync<minio::MinIO> {
47+
async fn setup_minio_container() -> Result<ContainerAsync<minio::MinIO>, String> {
4848
const MINIO_ROOT_USER: &str = "TEST-DataFusionLogin";
4949
const MINIO_ROOT_PASSWORD: &str = "TEST-DataFusionPassword";
5050

@@ -99,27 +99,23 @@ async fn setup_minio_container() -> ContainerAsync<minio::MinIO> {
9999
let stdout = container.stdout_to_vec().await.unwrap_or_default();
100100
let stderr = container.stderr_to_vec().await.unwrap_or_default();
101101

102-
panic!(
102+
return Err(format!(
103103
"Failed to execute command: {}\nError: {}\nStdout: {:?}\nStderr: {:?}",
104104
cmd_ref,
105105
e,
106106
String::from_utf8_lossy(&stdout),
107107
String::from_utf8_lossy(&stderr)
108-
);
108+
));
109109
}
110110
}
111111

112-
container
112+
Ok(container)
113113
}
114114

115-
Err(TestcontainersError::Client(e)) => {
116-
panic!(
117-
"Failed to start MinIO container. Ensure Docker is running and accessible: {e}"
118-
);
119-
}
120-
Err(e) => {
121-
panic!("Failed to start MinIO container: {e}");
122-
}
115+
Err(TestcontainersError::Client(e)) => Err(format!(
116+
"Failed to start MinIO container. Ensure Docker is running and accessible: {e}"
117+
)),
118+
Err(e) => Err(format!("Failed to start MinIO container: {e}")),
123119
}
124120
}
125121

@@ -253,7 +249,14 @@ async fn test_cli() {
253249
return;
254250
}
255251

256-
let container = setup_minio_container().await;
252+
let container = match setup_minio_container().await {
253+
Ok(c) => c,
254+
Err(e) if e.contains("toomanyrequests") => {
255+
eprintln!("Skipping test: Docker pull rate limit reached: {e}");
256+
return;
257+
}
258+
e @ Err(_) => e.unwrap(),
259+
};
257260

258261
let settings = make_settings();
259262
let _bound = settings.bind_to_scope();
@@ -286,7 +289,14 @@ async fn test_aws_options() {
286289
let settings = make_settings();
287290
let _bound = settings.bind_to_scope();
288291

289-
let container = setup_minio_container().await;
292+
let container = match setup_minio_container().await {
293+
Ok(c) => c,
294+
Err(e) if e.contains("toomanyrequests") => {
295+
eprintln!("Skipping test: Docker pull rate limit reached: {e}");
296+
return;
297+
}
298+
e @ Err(_) => e.unwrap(),
299+
};
290300
let port = container.get_host_port_ipv4(9000).await.unwrap();
291301

292302
let input = format!(
@@ -377,7 +387,14 @@ async fn test_s3_url_fallback() {
377387
return;
378388
}
379389

380-
let container = setup_minio_container().await;
390+
let container = match setup_minio_container().await {
391+
Ok(c) => c,
392+
Err(e) if e.contains("toomanyrequests") => {
393+
eprintln!("Skipping test: Docker pull rate limit reached: {e}");
394+
return;
395+
}
396+
e @ Err(_) => e.unwrap(),
397+
};
381398

382399
let mut settings = make_settings();
383400
settings.set_snapshot_suffix("s3_url_fallback");
@@ -407,8 +424,14 @@ async fn test_object_store_profiling() {
407424
return;
408425
}
409426

410-
let container = setup_minio_container().await;
411-
427+
let container = match setup_minio_container().await {
428+
Ok(c) => c,
429+
Err(e) if e.contains("toomanyrequests") => {
430+
eprintln!("Skipping test: Docker pull rate limit reached: {e}");
431+
return;
432+
}
433+
e @ Err(_) => e.unwrap(),
434+
};
412435
let mut settings = make_settings();
413436

414437
// as the object store profiling contains timestamps and durations, we must

datafusion/core/benches/aggregate_query_sql.rs

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,39 @@ fn criterion_benchmark(c: &mut Criterion) {
251251
)
252252
})
253253
});
254+
255+
c.bench_function("array_agg_query_group_by_few_groups", |b| {
256+
b.iter(|| {
257+
query(
258+
ctx.clone(),
259+
&rt,
260+
"SELECT u64_narrow, array_agg(f64) \
261+
FROM t GROUP BY u64_narrow",
262+
)
263+
})
264+
});
265+
266+
c.bench_function("array_agg_query_group_by_mid_groups", |b| {
267+
b.iter(|| {
268+
query(
269+
ctx.clone(),
270+
&rt,
271+
"SELECT u64_mid, array_agg(f64) \
272+
FROM t GROUP BY u64_mid",
273+
)
274+
})
275+
});
276+
277+
c.bench_function("array_agg_query_group_by_many_groups", |b| {
278+
b.iter(|| {
279+
query(
280+
ctx.clone(),
281+
&rt,
282+
"SELECT u64_wide, array_agg(f64) \
283+
FROM t GROUP BY u64_wide",
284+
)
285+
})
286+
});
254287
}
255288

256289
criterion_group!(benches, criterion_benchmark);

0 commit comments

Comments
 (0)