Skip to content

Commit de897ca

Browse files
committed
Review comments
1 parent dc95119 commit de897ca

3 files changed

Lines changed: 45 additions & 15 deletions

File tree

datafusion/physical-plan/src/joins/utils.rs

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -977,6 +977,13 @@ pub(crate) fn apply_join_filter_to_indices(
977977
))
978978
}
979979

980+
/// Creates a [RecordBatch] with zero columns but the given row count.
981+
/// Used when a join has an empty projection (e.g. `SELECT count(1) ...`).
982+
fn new_empty_schema_batch(schema: &Schema, row_count: usize) -> Result<RecordBatch> {
983+
let options = RecordBatchOptions::new().with_row_count(Some(row_count));
984+
RecordBatch::try_new_with_options(Arc::new(schema.clone()), vec![], &options)
985+
}
986+
980987
/// Returns a new [RecordBatch] by combining the `left` and `right` according to `indices`.
981988
/// The resulting batch has [Schema] `schema`.
982989
pub(crate) fn build_batch_from_indices(
@@ -989,15 +996,7 @@ pub(crate) fn build_batch_from_indices(
989996
build_side: JoinSide,
990997
) -> Result<RecordBatch> {
991998
if schema.fields().is_empty() {
992-
let options = RecordBatchOptions::new()
993-
.with_match_field_names(true)
994-
.with_row_count(Some(build_indices.len()));
995-
996-
return Ok(RecordBatch::try_new_with_options(
997-
Arc::new(schema.clone()),
998-
vec![],
999-
&options,
1000-
)?);
999+
return new_empty_schema_batch(schema, build_indices.len());
10011000
}
10021001

10031002
// build the columns of the new [RecordBatch]:
@@ -1058,12 +1057,7 @@ pub(crate) fn build_batch_empty_build_side(
10581057
JoinType::Right | JoinType::Full | JoinType::RightAnti | JoinType::RightMark => {
10591058
let num_rows = probe_batch.num_rows();
10601059
if schema.fields().is_empty() {
1061-
let options = RecordBatchOptions::new().with_row_count(Some(num_rows));
1062-
return Ok(RecordBatch::try_new_with_options(
1063-
Arc::new(schema.clone()),
1064-
vec![],
1065-
&options,
1066-
)?);
1060+
return new_empty_schema_batch(schema, num_rows);
10671061
}
10681062
let mut columns: Vec<Arc<dyn Array>> =
10691063
Vec::with_capacity(schema.fields().len());

datafusion/physical-plan/src/projection.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -553,6 +553,15 @@ impl RecordBatchStream for ProjectionStream {
553553
}
554554
}
555555

556+
/// Trait for execution plans that can embed a projection, avoiding a separate
557+
/// [`ProjectionExec`] wrapper.
558+
///
559+
/// # Empty projections
560+
///
561+
/// `Some(vec![])` is a valid projection that produces zero output columns while
562+
/// preserving the correct row count. Implementors must ensure that runtime batch
563+
/// construction still returns batches with the right number of rows even when no
564+
/// columns are selected (e.g. for `SELECT count(1) … JOIN …`).
556565
pub trait EmbeddedProjection: ExecutionPlan + Sized {
557566
fn with_projection(&self, projection: Option<Vec<usize>>) -> Result<Self>;
558567
}

datafusion/sqllogictest/test_files/joins.slt

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5267,3 +5267,30 @@ DROP TABLE issue_19067_left;
52675267

52685268
statement count 0
52695269
DROP TABLE issue_19067_right;
5270+
5271+
# Test that empty projections pushed into joins produce correct row counts at runtime.
5272+
# When count(1) is used over a RIGHT/FULL JOIN, the optimizer embeds an empty projection
5273+
# (projection=[]) into the HashJoinExec. This validates that the runtime batch construction
5274+
# handles zero-column output correctly, preserving the correct number of rows.
5275+
5276+
statement ok
5277+
CREATE TABLE empty_proj_left AS VALUES (1, 'a'), (2, 'b'), (3, 'c');
5278+
5279+
statement ok
5280+
CREATE TABLE empty_proj_right AS VALUES (1, 'x'), (2, 'y'), (4, 'z');
5281+
5282+
query I
5283+
SELECT count(1) FROM empty_proj_left RIGHT JOIN empty_proj_right ON empty_proj_left.column1 = empty_proj_right.column1;
5284+
----
5285+
3
5286+
5287+
query I
5288+
SELECT count(1) FROM empty_proj_left FULL JOIN empty_proj_right ON empty_proj_left.column1 = empty_proj_right.column1;
5289+
----
5290+
4
5291+
5292+
statement count 0
5293+
DROP TABLE empty_proj_left;
5294+
5295+
statement count 0
5296+
DROP TABLE empty_proj_right;

0 commit comments

Comments
 (0)