@@ -314,6 +314,22 @@ macro_rules! primitive_static_filter {
314314 let needle_nulls = v. nulls( ) ;
315315 let needle_has_nulls = v. null_count( ) > 0 ;
316316
317+ // Truth table for `value [NOT] IN (set)` with SQL three-valued logic:
318+ // ("-" means the value doesn't affect the result)
319+ //
320+ // | needle_null | haystack_null | negated | in set? | result |
321+ // |-------------|---------------|---------|---------|--------|
322+ // | true | - | false | - | null |
323+ // | true | - | true | - | null |
324+ // | false | true | false | yes | true |
325+ // | false | true | false | no | null |
326+ // | false | true | true | yes | false |
327+ // | false | true | true | no | null |
328+ // | false | false | false | yes | true |
329+ // | false | false | false | no | false |
330+ // | false | false | true | yes | false |
331+ // | false | false | true | no | true |
332+
317333 // Compute the "contains" result using collect_bool (fast batched approach)
318334 // This ignores nulls - we handle them separately
319335 let contains_buffer = if negated {
@@ -340,24 +356,12 @@ macro_rules! primitive_static_filter {
340356 needle_nulls. cloned( )
341357 }
342358 ( false , true ) => {
343- // Only haystack has nulls - null where not-in-set
344- // For IN: null where contains is false
345- // For NOT IN: null where contains is true (before negation, i.e., where original contains was false)
346- // Since we already negated contains_buffer for NOT IN, we need to handle this:
347- // - IN (negated=false): null where !contains_buffer
348- // - NOT IN (negated=true): null where contains_buffer (which is !original_contains)
349- // Actually both cases: null where the "not found" condition is true
350- // For IN: not found = !contains_buffer
351- // For NOT IN: not found = contains_buffer (since contains_buffer = !original_contains)
352- // So the validity mask (valid = not null) is:
353- // - IN: contains_buffer (found = valid)
354- // - NOT IN: !contains_buffer (found in original = valid, but contains_buffer is negated)
359+ // Only haystack has nulls - result is null when value not in set
360+ // Valid (not null) when original "in set" is true
361+ // For NOT IN: contains_buffer = !original, so validity = !contains_buffer
355362 let validity = if negated {
356- // For NOT IN: we want valid where original contains was true
357- // contains_buffer = !original_contains, so validity = !contains_buffer
358363 !& contains_buffer
359364 } else {
360- // For IN: valid where contains is true
361365 contains_buffer. clone( )
362366 } ;
363367 Some ( NullBuffer :: new( validity) )
@@ -367,7 +371,7 @@ macro_rules! primitive_static_filter {
367371 let needle_validity = needle_nulls. map( |n| n. inner( ) . clone( ) )
368372 . unwrap_or_else( || BooleanBuffer :: new_set( needle_values. len( ) ) ) ;
369373
370- // Haystack-induced validity (same logic as above)
374+ // Valid when original "in set" is true (see above)
371375 let haystack_validity = if negated {
372376 !& contains_buffer
373377 } else {
@@ -448,6 +452,22 @@ macro_rules! float_static_filter {
448452 let needle_nulls = v. nulls( ) ;
449453 let needle_has_nulls = v. null_count( ) > 0 ;
450454
455+ // Truth table for `value [NOT] IN (set)` with SQL three-valued logic:
456+ // ("-" means the value doesn't affect the result)
457+ //
458+ // | needle_null | haystack_null | negated | in set? | result |
459+ // |-------------|---------------|---------|---------|--------|
460+ // | true | - | false | - | null |
461+ // | true | - | true | - | null |
462+ // | false | true | false | yes | true |
463+ // | false | true | false | no | null |
464+ // | false | true | true | yes | false |
465+ // | false | true | true | no | null |
466+ // | false | false | false | yes | true |
467+ // | false | false | false | no | false |
468+ // | false | false | true | yes | false |
469+ // | false | false | true | no | true |
470+
451471 // Compute the "contains" result using collect_bool (fast batched approach)
452472 // This ignores nulls - we handle them separately
453473 let contains_buffer = if negated {
@@ -474,7 +494,9 @@ macro_rules! float_static_filter {
474494 needle_nulls. cloned( )
475495 }
476496 ( false , true ) => {
477- // Only haystack has nulls - null where not-in-set
497+ // Only haystack has nulls - result is null when value not in set
498+ // Valid (not null) when original "in set" is true
499+ // For NOT IN: contains_buffer = !original, so validity = !contains_buffer
478500 let validity = if negated {
479501 !& contains_buffer
480502 } else {
@@ -487,6 +509,7 @@ macro_rules! float_static_filter {
487509 let needle_validity = needle_nulls. map( |n| n. inner( ) . clone( ) )
488510 . unwrap_or_else( || BooleanBuffer :: new_set( needle_values. len( ) ) ) ;
489511
512+ // Valid when original "in set" is true (see above)
490513 let haystack_validity = if negated {
491514 !& contains_buffer
492515 } else {
0 commit comments