@@ -289,6 +289,120 @@ impl CursorArray for StringViewArray {
289289 }
290290}
291291
292+ /// Todo use arrow-rs side api after: <https://github.com/apache/arrow-rs/pull/7748> and <https://github.com/apache/arrow-rs/pull/7875> released
293+ /// Builds a 128-bit composite key for an inline value:
294+ ///
295+ /// - High 96 bits: the inline data in big-endian byte order (for correct lexicographical sorting).
296+ /// - Low 32 bits: the length in big-endian byte order, acting as a tiebreaker so shorter strings
297+ /// (or those with fewer meaningful bytes) always numerically sort before longer ones.
298+ ///
299+ /// This function extracts the length and the 12-byte inline string data from the raw
300+ /// little-endian `u128` representation, converts them to big-endian ordering, and packs them
301+ /// into a single `u128` value suitable for fast, branchless comparisons.
302+ ///
303+ /// # Why include length?
304+ ///
305+ /// A pure 96-bit content comparison can’t distinguish between two values whose inline bytes
306+ /// compare equal—either because one is a true prefix of the other or because zero-padding
307+ /// hides extra bytes. By tucking the 32-bit length into the lower bits, a single `u128` compare
308+ /// handles both content and length in one go.
309+ ///
310+ /// Example: comparing "bar" (3 bytes) vs "bar\0" (4 bytes)
311+ ///
312+ /// | String | Bytes 0–4 (length LE) | Bytes 4–16 (data + padding) |
313+ /// |------------|-----------------------|---------------------------------|
314+ /// | `"bar"` | `03 00 00 00` | `62 61 72` + 9 × `00` |
315+ /// | `"bar\0"`| `04 00 00 00` | `62 61 72 00` + 8 × `00` |
316+ ///
317+ /// Both inline parts become `62 61 72 00…00`, so they tie on content. The length field
318+ /// then differentiates:
319+ ///
320+ /// ```text
321+ /// key("bar") = 0x0000000000000000000062617200000003
322+ /// key("bar\0") = 0x0000000000000000000062617200000004
323+ /// ⇒ key("bar") < key("bar\0")
324+ /// ```
325+ /// # Inlining and Endianness
326+ ///
327+ /// - We start by calling `.to_le_bytes()` on the `raw` `u128`, because Rust’s native in‑memory
328+ /// representation is little‑endian on x86/ARM.
329+ /// - We extract the low 32 bits numerically (`raw as u32`)—this step is endianness‑free.
330+ /// - We copy the 12 bytes of inline data (original order) into `buf[0..12]`.
331+ /// - We serialize `length` as big‑endian into `buf[12..16]`.
332+ /// - Finally, `u128::from_be_bytes(buf)` treats `buf[0]` as the most significant byte
333+ /// and `buf[15]` as the least significant, producing a `u128` whose integer value
334+ /// directly encodes “inline data then length” in big‑endian form.
335+ ///
336+ /// This ensures that a simple `u128` comparison is equivalent to the desired
337+ /// lexicographical comparison of the inline bytes followed by length.
338+ #[ inline( always) ]
339+ pub fn inline_key_fast ( raw : u128 ) -> u128 {
340+ // 1. Decompose `raw` into little‑endian bytes:
341+ // - raw_bytes[0..4] = length in LE
342+ // - raw_bytes[4..16] = inline string data
343+ let raw_bytes = raw. to_le_bytes ( ) ;
344+
345+ // 2. Numerically truncate to get the low 32‑bit length (endianness‑free).
346+ let length = raw as u32 ;
347+
348+ // 3. Build a 16‑byte buffer in big‑endian order:
349+ // - buf[0..12] = inline string bytes (in original order)
350+ // - buf[12..16] = length.to_be_bytes() (BE)
351+ let mut buf = [ 0u8 ; 16 ] ;
352+ buf[ 0 ..12 ] . copy_from_slice ( & raw_bytes[ 4 ..16 ] ) ; // inline data
353+
354+ // Why convert length to big-endian for comparison?
355+ //
356+ // Rust (on most platforms) stores integers in little-endian format,
357+ // meaning the least significant byte is at the lowest memory address.
358+ // For example, an u32 value like 0x22345677 is stored in memory as:
359+ //
360+ // [0x77, 0x56, 0x34, 0x22] // little-endian layout
361+ // ^ ^ ^ ^
362+ // LSB ↑↑↑ MSB
363+ //
364+ // This layout is efficient for arithmetic but *not* suitable for
365+ // lexicographic (dictionary-style) comparison of byte arrays.
366+ //
367+ // To compare values by byte order—e.g., for sorted keys or binary trees—
368+ // we must convert them to **big-endian**, where:
369+ //
370+ // - The most significant byte (MSB) comes first (index 0)
371+ // - The least significant byte (LSB) comes last (index N-1)
372+ //
373+ // In big-endian, the same u32 = 0x22345677 would be represented as:
374+ //
375+ // [0x22, 0x34, 0x56, 0x77]
376+ //
377+ // This ordering aligns with natural string/byte sorting, so calling
378+ // `.to_be_bytes()` allows us to construct
379+ // keys where standard numeric comparison (e.g., `<`, `>`) behaves
380+ // like lexicographic byte comparison.
381+ buf[ 12 ..16 ] . copy_from_slice ( & length. to_be_bytes ( ) ) ; // length in BE
382+
383+ // 4. Deserialize the buffer as a big‑endian u128:
384+ // buf[0] is MSB, buf[15] is LSB.
385+ // Details:
386+ // Note on endianness and layout:
387+ //
388+ // Although `buf[0]` is stored at the lowest memory address,
389+ // calling `u128::from_be_bytes(buf)` interprets it as the **most significant byte (MSB)**,
390+ // and `buf[15]` as the **least significant byte (LSB)**.
391+ //
392+ // This is the core principle of **big-endian decoding**:
393+ // - Byte at index 0 maps to bits 127..120 (highest)
394+ // - Byte at index 1 maps to bits 119..112
395+ // - ...
396+ // - Byte at index 15 maps to bits 7..0 (lowest)
397+ //
398+ // So even though memory layout goes from low to high (left to right),
399+ // big-endian treats the **first byte** as highest in value.
400+ //
401+ // This guarantees that comparing two `u128` keys is equivalent to lexicographically
402+ // comparing the original inline bytes, followed by length.
403+ u128:: from_be_bytes ( buf)
404+ }
405+
292406impl CursorValues for StringViewArray {
293407 fn len ( & self ) -> usize {
294408 self . views ( ) . len ( )
@@ -346,8 +460,7 @@ impl CursorValues for StringViewArray {
346460 if l. data_buffers ( ) . is_empty ( ) && r. data_buffers ( ) . is_empty ( ) {
347461 let l_view = unsafe { l. views ( ) . get_unchecked ( l_idx) } ;
348462 let r_view = unsafe { r. views ( ) . get_unchecked ( r_idx) } ;
349- return StringViewArray :: inline_key_fast ( * l_view)
350- . cmp ( & StringViewArray :: inline_key_fast ( * r_view) ) ;
463+ return inline_key_fast ( * l_view) . cmp ( & inline_key_fast ( * r_view) ) ;
351464 }
352465
353466 unsafe { GenericByteViewArray :: compare_unchecked ( l, l_idx, r, r_idx) }
@@ -442,6 +555,7 @@ impl<T: CursorValues> CursorValues for ArrayValues<T> {
442555
443556#[ cfg( test) ]
444557mod tests {
558+ use arrow:: array:: GenericBinaryArray ;
445559 use datafusion_execution:: memory_pool:: {
446560 GreedyMemoryPool , MemoryConsumer , MemoryPool ,
447561 } ;
@@ -606,4 +720,100 @@ mod tests {
606720 b. advance ( ) ;
607721 assert_eq ! ( a. cmp( & b) , Ordering :: Less ) ;
608722 }
723+
724+ /// Integration tests for `inline_key_fast` covering:
725+ ///
726+ /// 1. Monotonic ordering across increasing lengths and lexical variations.
727+ /// 2. Cross-check against `GenericBinaryArray` comparison to ensure semantic equivalence.
728+ ///
729+ /// This also includes a specific test for the “bar” vs. “bar\0” case, demonstrating why
730+ /// the length field is required even when all inline bytes fit in 12 bytes.
731+ ///
732+ /// The test includes strings that verify correct byte order (prevent reversal bugs),
733+ /// and length-based tie-breaking in the composite key.
734+ ///
735+ /// The test confirms that `inline_key_fast` produces keys which sort consistently
736+ /// with the expected lexicographical order of the raw byte arrays.
737+ #[ test]
738+ fn test_inline_key_fast_various_lengths_and_lexical ( ) {
739+ /// Helper to create a raw u128 value representing an inline ByteView:
740+ /// - `length`: number of meaningful bytes (must be ≤ 12)
741+ /// - `data`: the actual inline data bytes
742+ ///
743+ /// The first 4 bytes encode length in little-endian,
744+ /// the following 12 bytes contain the inline string data (unpadded).
745+ fn make_raw_inline ( length : u32 , data : & [ u8 ] ) -> u128 {
746+ assert ! ( length as usize <= 12 , "Inline length must be ≤ 12" ) ;
747+ assert ! (
748+ data. len( ) == length as usize ,
749+ "Data length must match `length`"
750+ ) ;
751+
752+ let mut raw_bytes = [ 0u8 ; 16 ] ;
753+ raw_bytes[ 0 ..4 ] . copy_from_slice ( & length. to_le_bytes ( ) ) ; // length stored little-endian
754+ raw_bytes[ 4 ..( 4 + data. len ( ) ) ] . copy_from_slice ( data) ; // inline data
755+ u128:: from_le_bytes ( raw_bytes)
756+ }
757+
758+ // Test inputs: various lengths and lexical orders,
759+ // plus special cases for byte order and length tie-breaking
760+ let test_inputs: Vec < & [ u8 ] > = vec ! [
761+ b"a" ,
762+ b"aa" ,
763+ b"aaa" ,
764+ b"aab" ,
765+ b"abcd" ,
766+ b"abcde" ,
767+ b"abcdef" ,
768+ b"abcdefg" ,
769+ b"abcdefgh" ,
770+ b"abcdefghi" ,
771+ b"abcdefghij" ,
772+ b"abcdefghijk" ,
773+ b"abcdefghijkl" ,
774+ // Tests for byte-order reversal bug:
775+ // Without the fix, "backend one" would compare as "eno dnekcab",
776+ // causing incorrect sort order relative to "backend two".
777+ b"backend one" ,
778+ b"backend two" ,
779+ // Tests length-tiebreaker logic:
780+ // "bar" (3 bytes) and "bar\0" (4 bytes) have identical inline data,
781+ // so only the length differentiates their ordering.
782+ b"bar" ,
783+ b"bar\0 " ,
784+ // Additional lexical and length tie-breaking cases with same prefix, in correct lex order:
785+ b"than12Byt" ,
786+ b"than12Bytes" ,
787+ b"than12Bytes\0 " ,
788+ b"than12Bytesx" ,
789+ b"than12Bytex" ,
790+ b"than12Bytez" ,
791+ // Additional lexical tests
792+ b"xyy" ,
793+ b"xyz" ,
794+ b"xza" ,
795+ ] ;
796+
797+ // Create a GenericBinaryArray for cross-comparison of lex order
798+ let array: GenericBinaryArray < i32 > = GenericBinaryArray :: from (
799+ test_inputs. iter ( ) . map ( |s| Some ( * s) ) . collect :: < Vec < _ > > ( ) ,
800+ ) ;
801+
802+ for i in 0 ..array. len ( ) - 1 {
803+ let v1 = array. value ( i) ;
804+ let v2 = array. value ( i + 1 ) ;
805+
806+ // Assert the array's natural lexical ordering is correct
807+ assert ! ( v1 < v2, "Array compare failed: {v1:?} !< {v2:?}" ) ;
808+
809+ // Assert the keys produced by inline_key_fast reflect the same ordering
810+ let key1 = inline_key_fast ( make_raw_inline ( v1. len ( ) as u32 , v1) ) ;
811+ let key2 = inline_key_fast ( make_raw_inline ( v2. len ( ) as u32 , v2) ) ;
812+
813+ assert ! (
814+ key1 < key2,
815+ "Key compare failed: key({v1:?})=0x{key1:032x} !< key({v2:?})=0x{key2:032x}" ,
816+ ) ;
817+ }
818+ }
609819}
0 commit comments