Skip to content

Commit 0b5a9c9

Browse files
committed
Add truth table
1 parent 2f2c60d commit 0b5a9c9

1 file changed

Lines changed: 40 additions & 17 deletions

File tree

  • datafusion/physical-expr/src/expressions

datafusion/physical-expr/src/expressions/in_list.rs

Lines changed: 40 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,22 @@ macro_rules! primitive_static_filter {
314314
let needle_nulls = v.nulls();
315315
let needle_has_nulls = v.null_count() > 0;
316316

317+
// Truth table for `value [NOT] IN (set)` with SQL three-valued logic:
318+
// ("-" means the value doesn't affect the result)
319+
//
320+
// | needle_null | haystack_null | negated | in set? | result |
321+
// |-------------|---------------|---------|---------|--------|
322+
// | true | - | false | - | null |
323+
// | true | - | true | - | null |
324+
// | false | true | false | yes | true |
325+
// | false | true | false | no | null |
326+
// | false | true | true | yes | false |
327+
// | false | true | true | no | null |
328+
// | false | false | false | yes | true |
329+
// | false | false | false | no | false |
330+
// | false | false | true | yes | false |
331+
// | false | false | true | no | true |
332+
317333
// Compute the "contains" result using collect_bool (fast batched approach)
318334
// This ignores nulls - we handle them separately
319335
let contains_buffer = if negated {
@@ -340,24 +356,12 @@ macro_rules! primitive_static_filter {
340356
needle_nulls.cloned()
341357
}
342358
(false, true) => {
343-
// Only haystack has nulls - null where not-in-set
344-
// For IN: null where contains is false
345-
// For NOT IN: null where contains is true (before negation, i.e., where original contains was false)
346-
// Since we already negated contains_buffer for NOT IN, we need to handle this:
347-
// - IN (negated=false): null where !contains_buffer
348-
// - NOT IN (negated=true): null where contains_buffer (which is !original_contains)
349-
// Actually both cases: null where the "not found" condition is true
350-
// For IN: not found = !contains_buffer
351-
// For NOT IN: not found = contains_buffer (since contains_buffer = !original_contains)
352-
// So the validity mask (valid = not null) is:
353-
// - IN: contains_buffer (found = valid)
354-
// - NOT IN: !contains_buffer (found in original = valid, but contains_buffer is negated)
359+
// Only haystack has nulls - result is null when value not in set
360+
// Valid (not null) when original "in set" is true
361+
// For NOT IN: contains_buffer = !original, so validity = !contains_buffer
355362
let validity = if negated {
356-
// For NOT IN: we want valid where original contains was true
357-
// contains_buffer = !original_contains, so validity = !contains_buffer
358363
!&contains_buffer
359364
} else {
360-
// For IN: valid where contains is true
361365
contains_buffer.clone()
362366
};
363367
Some(NullBuffer::new(validity))
@@ -367,7 +371,7 @@ macro_rules! primitive_static_filter {
367371
let needle_validity = needle_nulls.map(|n| n.inner().clone())
368372
.unwrap_or_else(|| BooleanBuffer::new_set(needle_values.len()));
369373

370-
// Haystack-induced validity (same logic as above)
374+
// Valid when original "in set" is true (see above)
371375
let haystack_validity = if negated {
372376
!&contains_buffer
373377
} else {
@@ -448,6 +452,22 @@ macro_rules! float_static_filter {
448452
let needle_nulls = v.nulls();
449453
let needle_has_nulls = v.null_count() > 0;
450454

455+
// Truth table for `value [NOT] IN (set)` with SQL three-valued logic:
456+
// ("-" means the value doesn't affect the result)
457+
//
458+
// | needle_null | haystack_null | negated | in set? | result |
459+
// |-------------|---------------|---------|---------|--------|
460+
// | true | - | false | - | null |
461+
// | true | - | true | - | null |
462+
// | false | true | false | yes | true |
463+
// | false | true | false | no | null |
464+
// | false | true | true | yes | false |
465+
// | false | true | true | no | null |
466+
// | false | false | false | yes | true |
467+
// | false | false | false | no | false |
468+
// | false | false | true | yes | false |
469+
// | false | false | true | no | true |
470+
451471
// Compute the "contains" result using collect_bool (fast batched approach)
452472
// This ignores nulls - we handle them separately
453473
let contains_buffer = if negated {
@@ -474,7 +494,9 @@ macro_rules! float_static_filter {
474494
needle_nulls.cloned()
475495
}
476496
(false, true) => {
477-
// Only haystack has nulls - null where not-in-set
497+
// Only haystack has nulls - result is null when value not in set
498+
// Valid (not null) when original "in set" is true
499+
// For NOT IN: contains_buffer = !original, so validity = !contains_buffer
478500
let validity = if negated {
479501
!&contains_buffer
480502
} else {
@@ -487,6 +509,7 @@ macro_rules! float_static_filter {
487509
let needle_validity = needle_nulls.map(|n| n.inner().clone())
488510
.unwrap_or_else(|| BooleanBuffer::new_set(needle_values.len()));
489511

512+
// Valid when original "in set" is true (see above)
490513
let haystack_validity = if negated {
491514
!&contains_buffer
492515
} else {

0 commit comments

Comments
 (0)