Skip to content

Commit 5818732

Browse files
authored
[Minor] Use std::thread::available_parallelism instead of num_cpus (apache#13579)
* Use std::thread::available_parallelism * Use std::thread::available_parallelism * Use std::thread::available_parallelism * Use std::thread::available_parallelism * Use std::thread::available_parallelism * Use std::thread::available_parallelism
1 parent fc49e8d commit 5818732

File tree

18 files changed

+77
-31
lines changed

18 files changed

+77
-31
lines changed

Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,6 @@ hashbrown = { version = "0.14.5", features = ["raw"] }
129129
indexmap = "2.0.0"
130130
itertools = "0.13"
131131
log = "^0.4"
132-
num_cpus = "1.13.0"
133132
object_store = { version = "0.11.0", default-features = false }
134133
parking_lot = "0.12"
135134
parquet = { version = "53.3.0", default-features = false, features = [

benchmarks/Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@ env_logger = { workspace = true }
4242
futures = { workspace = true }
4343
log = { workspace = true }
4444
mimalloc = { version = "0.1", optional = true, default-features = false }
45-
num_cpus = { workspace = true }
4645
parquet = { workspace = true, default-features = true }
4746
serde = { version = "1.0.136", features = ["derive"] }
4847
serde_json = { workspace = true }

benchmarks/src/bin/external_aggr.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,11 @@
1818
//! external_aggr binary entrypoint
1919
2020
use std::collections::HashMap;
21+
use std::num::NonZero;
2122
use std::path::PathBuf;
2223
use std::sync::Arc;
2324
use std::sync::OnceLock;
25+
use std::thread::available_parallelism;
2426
use structopt::StructOpt;
2527

2628
use arrow::record_batch::RecordBatch;
@@ -325,7 +327,11 @@ impl ExternalAggrConfig {
325327
}
326328

327329
fn partitions(&self) -> usize {
328-
self.common.partitions.unwrap_or(num_cpus::get())
330+
self.common.partitions.unwrap_or(
331+
available_parallelism()
332+
.unwrap_or(NonZero::new(1).unwrap())
333+
.get(),
334+
)
329335
}
330336

331337
/// Parse memory limit from string to number of bytes

benchmarks/src/bin/h2o.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,10 @@ use datafusion::datasource::MemTable;
2727
use datafusion::prelude::CsvReadOptions;
2828
use datafusion::{arrow::util::pretty, error::Result, prelude::SessionContext};
2929
use datafusion_benchmarks::util::BenchmarkRun;
30+
use std::num::NonZero;
3031
use std::path::PathBuf;
3132
use std::sync::Arc;
33+
use std::thread::available_parallelism;
3234
use structopt::StructOpt;
3335
use tokio::time::Instant;
3436

@@ -91,7 +93,9 @@ async fn group_by(opt: &GroupBy) -> Result<()> {
9193
.with_listing_options(ListingOptions::new(Arc::new(CsvFormat::default())))
9294
.with_schema(Arc::new(schema));
9395
let csv = ListingTable::try_new(listing_config)?;
94-
let partition_size = num_cpus::get();
96+
let partition_size = available_parallelism()
97+
.unwrap_or(NonZero::new(1).unwrap())
98+
.get();
9599
let memtable =
96100
MemTable::load(Arc::new(csv), Some(partition_size), &ctx.state()).await?;
97101
ctx.register_table("x", Arc::new(memtable))?;

benchmarks/src/imdb/run.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,10 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18+
use std::num::NonZero;
1819
use std::path::PathBuf;
1920
use std::sync::Arc;
21+
use std::thread::available_parallelism;
2022

2123
use super::{get_imdb_table_schema, get_query_sql, IMDB_TABLES};
2224
use crate::util::{BenchmarkRun, CommonOpt};
@@ -468,7 +470,11 @@ impl RunOpt {
468470
}
469471

470472
fn partitions(&self) -> usize {
471-
self.common.partitions.unwrap_or(num_cpus::get())
473+
self.common.partitions.unwrap_or(
474+
available_parallelism()
475+
.unwrap_or(NonZero::new(1).unwrap())
476+
.get(),
477+
)
472478
}
473479
}
474480

benchmarks/src/sort.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,10 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18+
use std::num::NonZero;
1819
use std::path::PathBuf;
1920
use std::sync::Arc;
21+
use std::thread::available_parallelism;
2022

2123
use crate::util::{AccessLogOpt, BenchmarkRun, CommonOpt};
2224

@@ -147,7 +149,11 @@ impl RunOpt {
147149
rundata.start_new_case(title);
148150
for i in 0..self.common.iterations {
149151
let config = SessionConfig::new().with_target_partitions(
150-
self.common.partitions.unwrap_or(num_cpus::get()),
152+
self.common.partitions.unwrap_or(
153+
available_parallelism()
154+
.unwrap_or(NonZero::new(1).unwrap())
155+
.get(),
156+
),
151157
);
152158
let ctx = SessionContext::new_with_config(config);
153159
let (rows, elapsed) =

benchmarks/src/sort_tpch.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,10 @@
2222
//! runs end-to-end sort queries and test the performance on multiple CPU cores.
2323
2424
use futures::StreamExt;
25+
use std::num::NonZero;
2526
use std::path::PathBuf;
2627
use std::sync::Arc;
28+
use std::thread::available_parallelism;
2729
use structopt::StructOpt;
2830

2931
use datafusion::datasource::file_format::parquet::ParquetFormat;
@@ -315,6 +317,10 @@ impl RunOpt {
315317
}
316318

317319
fn partitions(&self) -> usize {
318-
self.common.partitions.unwrap_or(num_cpus::get())
320+
self.common.partitions.unwrap_or(
321+
available_parallelism()
322+
.unwrap_or(NonZero::new(1).unwrap())
323+
.get(),
324+
)
319325
}
320326
}

benchmarks/src/tpch/run.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,10 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18+
use std::num::NonZero;
1819
use std::path::PathBuf;
1920
use std::sync::Arc;
21+
use std::thread::available_parallelism;
2022

2123
use super::{
2224
get_query_sql, get_tbl_tpch_table_schema, get_tpch_table_schema, TPCH_TABLES,
@@ -296,7 +298,11 @@ impl RunOpt {
296298
}
297299

298300
fn partitions(&self) -> usize {
299-
self.common.partitions.unwrap_or(num_cpus::get())
301+
self.common.partitions.unwrap_or(
302+
available_parallelism()
303+
.unwrap_or(NonZero::new(1).unwrap())
304+
.get(),
305+
)
300306
}
301307
}
302308

benchmarks/src/util/options.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18+
use std::{num::NonZero, thread::available_parallelism};
19+
1820
use datafusion::prelude::SessionConfig;
1921
use structopt::StructOpt;
2022

@@ -48,7 +50,13 @@ impl CommonOpt {
4850
/// Modify the existing config appropriately
4951
pub fn update_config(&self, config: SessionConfig) -> SessionConfig {
5052
config
51-
.with_target_partitions(self.partitions.unwrap_or(num_cpus::get()))
53+
.with_target_partitions(
54+
self.partitions.unwrap_or(
55+
available_parallelism()
56+
.unwrap_or(NonZero::new(1).unwrap())
57+
.get(),
58+
),
59+
)
5260
.with_batch_size(self.batch_size)
5361
}
5462
}

benchmarks/src/util/run.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,9 @@ use serde::{Serialize, Serializer};
2020
use serde_json::Value;
2121
use std::{
2222
collections::HashMap,
23+
num::NonZero,
2324
path::Path,
25+
thread::available_parallelism,
2426
time::{Duration, SystemTime},
2527
};
2628

@@ -68,7 +70,9 @@ impl RunContext {
6870
Self {
6971
benchmark_version: env!("CARGO_PKG_VERSION").to_owned(),
7072
datafusion_version: DATAFUSION_VERSION.to_owned(),
71-
num_cpus: num_cpus::get(),
73+
num_cpus: available_parallelism()
74+
.unwrap_or(NonZero::new(1).unwrap())
75+
.get(),
7276
start_time: SystemTime::now(),
7377
arguments: std::env::args().skip(1).collect::<Vec<String>>(),
7478
}

0 commit comments

Comments
 (0)