Skip to content

Commit 17c3ed8

Browse files
committed
Add a builder to SimplifyContext to avoid allocating default values
Signed-off-by: Adam Gutglick <adamgsal@gmail.com>
1 parent 415bd42 commit 17c3ed8

14 files changed

Lines changed: 197 additions & 58 deletions

File tree

datafusion/core/src/execution/context/mod.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1488,12 +1488,13 @@ impl SessionContext {
14881488
})?;
14891489

14901490
let state = self.state.read();
1491-
let context = SimplifyContext::default()
1491+
let context = SimplifyContext::builder()
14921492
.with_schema(Arc::clone(prepared.plan.schema()))
14931493
.with_config_options(Arc::clone(state.config_options()))
14941494
.with_query_execution_start_time(
14951495
state.execution_props().query_execution_start_time,
1496-
);
1496+
)
1497+
.build();
14971498
let simplifier = ExprSimplifier::new(context);
14981499

14991500
// Only allow literals as parameters for now.

datafusion/core/src/execution/session_state.rs

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -743,12 +743,13 @@ impl SessionState {
743743
df_schema: &DFSchema,
744744
) -> datafusion_common::Result<Arc<dyn PhysicalExpr>> {
745745
let config_options = self.config_options();
746-
let simplify_context = SimplifyContext::default()
746+
let simplify_context = SimplifyContext::builder()
747747
.with_schema(Arc::new(df_schema.clone()))
748748
.with_config_options(Arc::clone(config_options))
749749
.with_query_execution_start_time(
750750
self.execution_props().query_execution_start_time,
751-
);
751+
)
752+
.build();
752753
let simplifier = ExprSimplifier::new(simplify_context);
753754
// apply type coercion here to ensure types match
754755
let mut expr = simplifier.coerce(expr, df_schema)?;
@@ -1835,11 +1836,12 @@ impl ContextProvider for SessionContextProvider<'_> {
18351836
.get(name)
18361837
.cloned()
18371838
.ok_or_else(|| plan_datafusion_err!("table function '{name}' not found"))?;
1838-
let simplify_context = SimplifyContext::default()
1839+
let simplify_context = SimplifyContext::builder()
18391840
.with_config_options(Arc::clone(self.state.config_options()))
18401841
.with_query_execution_start_time(
18411842
self.state.execution_props().query_execution_start_time,
1842-
);
1843+
)
1844+
.build();
18431845
let simplifier = ExprSimplifier::new(simplify_context);
18441846
let schema = DFSchema::empty();
18451847
let args = args

datafusion/core/src/test_util/parquet.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,9 @@ impl TestParquetFile {
166166
let df_schema = Arc::clone(&self.schema).to_dfschema_ref()?;
167167

168168
// run coercion on the filters to coerce types etc.
169-
let context = SimplifyContext::default().with_schema(Arc::clone(&df_schema));
169+
let context = SimplifyContext::builder()
170+
.with_schema(Arc::clone(&df_schema))
171+
.build();
170172
if let Some(filter) = maybe_filter {
171173
let simplifier = ExprSimplifier::new(context);
172174
let filter = simplifier.coerce(filter, &df_schema).unwrap();

datafusion/core/tests/expr_api/mod.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -421,7 +421,9 @@ fn create_simplified_expr_test(expr: Expr, expected_expr: &str) {
421421
let df_schema = DFSchema::try_from(batch.schema()).unwrap();
422422

423423
// Simplify the expression first
424-
let simplify_context = SimplifyContext::default().with_schema(Arc::new(df_schema));
424+
let simplify_context = SimplifyContext::builder()
425+
.with_schema(Arc::new(df_schema))
426+
.build();
425427
let simplifier = ExprSimplifier::new(simplify_context).with_max_cycles(10);
426428
let simplified = simplifier.simplify(expr).unwrap();
427429
create_expr_test(simplified, expected_expr);

datafusion/core/tests/expr_api/simplification.rs

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -88,9 +88,10 @@ fn test_evaluate_with_start_time(
8888
expected_expr: Expr,
8989
date_time: &DateTime<Utc>,
9090
) {
91-
let context = SimplifyContext::default()
91+
let context = SimplifyContext::builder()
9292
.with_schema(schema())
93-
.with_query_execution_start_time(Some(*date_time));
93+
.with_query_execution_start_time(Some(*date_time))
94+
.build();
9495
let simplifier = ExprSimplifier::new(context);
9596
let simplified_expr = simplifier
9697
.simplify(input_expr.clone())
@@ -153,9 +154,10 @@ fn to_timestamp_expr(arg: impl Into<String>) -> Expr {
153154

154155
#[test]
155156
fn basic() {
156-
let context = SimplifyContext::default()
157+
let context = SimplifyContext::builder()
157158
.with_schema(schema())
158-
.with_query_execution_start_time(Some(Utc::now()));
159+
.with_query_execution_start_time(Some(Utc::now()))
160+
.build();
159161

160162
// The `Expr` is a core concept in DataFusion, and DataFusion can
161163
// help simplify it.
@@ -171,7 +173,7 @@ fn basic() {
171173

172174
#[test]
173175
fn fold_and_simplify() {
174-
let context = SimplifyContext::default().with_schema(schema());
176+
let context = SimplifyContext::builder().with_schema(schema()).build();
175177

176178
// What will it do with the expression `concat('foo', 'bar') == 'foobar')`?
177179
let expr = concat(vec![lit("foo"), lit("bar")]).eq(lit("foobar"));
@@ -565,7 +567,9 @@ fn expr_test_schema() -> DFSchemaRef {
565567
}
566568

567569
fn test_simplify(input_expr: Expr, expected_expr: Expr) {
568-
let context = SimplifyContext::default().with_schema(expr_test_schema());
570+
let context = SimplifyContext::builder()
571+
.with_schema(expr_test_schema())
572+
.build();
569573
let simplifier = ExprSimplifier::new(context);
570574
let simplified_expr = simplifier
571575
.simplify(input_expr.clone())
@@ -581,9 +585,10 @@ fn test_simplify_with_cycle_count(
581585
expected_expr: Expr,
582586
expected_count: u32,
583587
) {
584-
let context = SimplifyContext::default()
588+
let context = SimplifyContext::builder()
585589
.with_schema(expr_test_schema())
586-
.with_query_execution_start_time(Some(Utc::now()));
590+
.with_query_execution_start_time(Some(Utc::now()))
591+
.build();
587592
let simplifier = ExprSimplifier::new(context);
588593
let (simplified_expr, count) = simplifier
589594
.simplify_with_cycle_count_transformed(input_expr.clone())

datafusion/expr/src/simplify.rs

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,14 @@ pub struct SimplifyContext {
4040
config_options: Arc<ConfigOptions>,
4141
}
4242

43+
/// Builder for [`SimplifyContext`].
44+
#[derive(Debug, Default)]
45+
pub struct SimplifyContextBuilder {
46+
schema: Option<DFSchemaRef>,
47+
query_execution_start_time: Option<DateTime<Utc>>,
48+
config_options: Option<Arc<ConfigOptions>>,
49+
}
50+
4351
impl Default for SimplifyContext {
4452
fn default() -> Self {
4553
Self {
@@ -51,6 +59,11 @@ impl Default for SimplifyContext {
5159
}
5260

5361
impl SimplifyContext {
62+
/// Returns a builder for [`SimplifyContext`].
63+
pub fn builder() -> SimplifyContextBuilder {
64+
SimplifyContextBuilder::default()
65+
}
66+
5467
/// Set the [`ConfigOptions`] for this context
5568
pub fn with_config_options(mut self, config_options: Arc<ConfigOptions>) -> Self {
5669
self.config_options = config_options;
@@ -110,6 +123,46 @@ impl SimplifyContext {
110123
}
111124
}
112125

126+
impl SimplifyContextBuilder {
127+
/// Set the [`ConfigOptions`] for this context.
128+
pub fn with_config_options(mut self, config_options: Arc<ConfigOptions>) -> Self {
129+
self.config_options = Some(config_options);
130+
self
131+
}
132+
133+
/// Set the schema for this context.
134+
pub fn with_schema(mut self, schema: DFSchemaRef) -> Self {
135+
self.schema = Some(schema);
136+
self
137+
}
138+
139+
/// Set the query execution start time.
140+
pub fn with_query_execution_start_time(
141+
mut self,
142+
query_execution_start_time: Option<DateTime<Utc>>,
143+
) -> Self {
144+
self.query_execution_start_time = query_execution_start_time;
145+
self
146+
}
147+
148+
/// Set the query execution start to the current time.
149+
pub fn with_current_time(mut self) -> Self {
150+
self.query_execution_start_time = Some(Utc::now());
151+
self
152+
}
153+
154+
/// Build a [`SimplifyContext`], filling in any unspecified fields with defaults.
155+
pub fn build(self) -> SimplifyContext {
156+
SimplifyContext {
157+
schema: self.schema.unwrap_or_else(|| Arc::new(DFSchema::empty())),
158+
query_execution_start_time: self.query_execution_start_time,
159+
config_options: self
160+
.config_options
161+
.unwrap_or_else(|| Arc::new(ConfigOptions::default())),
162+
}
163+
}
164+
}
165+
113166
/// Was the expression simplified?
114167
#[derive(Debug)]
115168
pub enum ExprSimplifyResult {
@@ -119,3 +172,38 @@ pub enum ExprSimplifyResult {
119172
/// are return unmodified.
120173
Original(Vec<Expr>),
121174
}
175+
176+
#[cfg(test)]
177+
mod tests {
178+
use super::*;
179+
180+
#[test]
181+
fn simplify_context_builder_builds_default_context() {
182+
let context = SimplifyContext::builder().build();
183+
let default_options = ConfigOptions::default();
184+
185+
assert_eq!(context.schema().as_ref(), &DFSchema::empty());
186+
assert_eq!(context.query_execution_start_time(), None);
187+
assert_eq!(
188+
context.config_options().optimizer.max_passes,
189+
default_options.optimizer.max_passes
190+
);
191+
}
192+
193+
#[test]
194+
fn simplify_context_builder_uses_overrides() {
195+
let schema = Arc::new(DFSchema::empty());
196+
let config_options = Arc::new(ConfigOptions::default());
197+
let current_time = Utc::now();
198+
199+
let context = SimplifyContext::builder()
200+
.with_schema(Arc::clone(&schema))
201+
.with_config_options(Arc::clone(&config_options))
202+
.with_query_execution_start_time(Some(current_time))
203+
.build();
204+
205+
assert_eq!(context.schema().as_ref(), schema.as_ref());
206+
assert_eq!(context.query_execution_start_time(), Some(current_time));
207+
assert!(Arc::ptr_eq(context.config_options(), &config_options));
208+
}
209+
}

datafusion/functions-nested/src/array_has.rs

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -999,6 +999,7 @@ mod tests {
999999
DataFusionError, ScalarValue, config::ConfigOptions,
10001000
utils::SingleRowListArrayBuilder,
10011001
};
1002+
use datafusion_expr::simplify::SimplifyContext;
10021003
use datafusion_expr::{
10031004
ColumnarValue, Expr, ScalarFunctionArgs, ScalarUDFImpl, col, lit,
10041005
simplify::ExprSimplifyResult,
@@ -1017,7 +1018,7 @@ mod tests {
10171018
.build_list_scalar());
10181019
let needle = col("c");
10191020

1020-
let context = datafusion_expr::simplify::SimplifyContext::default();
1021+
let context = SimplifyContext::default();
10211022

10221023
let Ok(ExprSimplifyResult::Simplified(Expr::InList(in_list))) =
10231024
ArrayHas::new().simplify(vec![haystack, needle.clone()], &context)
@@ -1040,7 +1041,7 @@ mod tests {
10401041
let haystack = make_array(vec![lit(1), lit(2), lit(3)]);
10411042
let needle = col("c");
10421043

1043-
let context = datafusion_expr::simplify::SimplifyContext::default();
1044+
let context = SimplifyContext::default();
10441045

10451046
let Ok(ExprSimplifyResult::Simplified(Expr::InList(in_list))) =
10461047
ArrayHas::new().simplify(vec![haystack, needle.clone()], &context)
@@ -1063,7 +1064,7 @@ mod tests {
10631064
let haystack = Expr::Literal(ScalarValue::Null, None);
10641065
let needle = col("c");
10651066

1066-
let context = datafusion_expr::simplify::SimplifyContext::default();
1067+
let context = SimplifyContext::default();
10671068
let Ok(ExprSimplifyResult::Simplified(simplified)) =
10681069
ArrayHas::new().simplify(vec![haystack, needle], &context)
10691070
else {
@@ -1080,7 +1081,7 @@ mod tests {
10801081
let haystack = Expr::Literal(ScalarValue::List(Arc::new(haystack)), None);
10811082
let needle = col("c");
10821083

1083-
let context = datafusion_expr::simplify::SimplifyContext::default();
1084+
let context = SimplifyContext::default();
10841085
let Ok(ExprSimplifyResult::Simplified(simplified)) =
10851086
ArrayHas::new().simplify(vec![haystack, needle], &context)
10861087
else {
@@ -1095,7 +1096,7 @@ mod tests {
10951096
let haystack = col("c1");
10961097
let needle = col("c2");
10971098

1098-
let context = datafusion_expr::simplify::SimplifyContext::default();
1099+
let context = SimplifyContext::default();
10991100

11001101
let Ok(ExprSimplifyResult::Original(args)) =
11011102
ArrayHas::new().simplify(vec![haystack, needle.clone()], &context)

datafusion/functions/src/datetime/current_time.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,10 +151,11 @@ mod tests {
151151
Some(tz.to_string())
152152
};
153153
let schema = Arc::new(DFSchema::empty());
154-
SimplifyContext::default()
154+
SimplifyContext::builder()
155155
.with_schema(schema)
156156
.with_config_options(Arc::new(config))
157157
.with_query_execution_start_time(Some(start_time))
158+
.build()
158159
}
159160

160161
#[test]

datafusion/optimizer/src/decorrelate.rs

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -508,7 +508,9 @@ fn agg_exprs_evaluation_result_on_empty_batch(
508508
.data()?;
509509

510510
let result_expr = result_expr.unalias();
511-
let info = SimplifyContext::default().with_schema(Arc::clone(schema));
511+
let info = SimplifyContext::builder()
512+
.with_schema(Arc::clone(schema))
513+
.build();
512514
let simplifier = ExprSimplifier::new(info);
513515
let result_expr = simplifier.simplify(result_expr)?;
514516
expr_result_map_for_count_bug.insert(e.schema_name().to_string(), result_expr);
@@ -541,7 +543,9 @@ fn proj_exprs_evaluation_result_on_empty_batch(
541543
.data()?;
542544

543545
if result_expr.ne(expr) {
544-
let info = SimplifyContext::default().with_schema(Arc::clone(schema));
546+
let info = SimplifyContext::builder()
547+
.with_schema(Arc::clone(schema))
548+
.build();
545549
let simplifier = ExprSimplifier::new(info);
546550
let result_expr = simplifier.simplify(result_expr)?;
547551
let expr_name = match expr {
@@ -581,7 +585,7 @@ fn filter_exprs_evaluation_result_on_empty_batch(
581585
.data()?;
582586

583587
let pull_up_expr = if result_expr.ne(filter_expr) {
584-
let info = SimplifyContext::default().with_schema(schema);
588+
let info = SimplifyContext::builder().with_schema(schema).build();
585589
let simplifier = ExprSimplifier::new(info);
586590
let result_expr = simplifier.simplify(result_expr)?;
587591
match &result_expr {

0 commit comments

Comments
 (0)