Skip to content

Commit 7b4c86b

Browse files
committed
Optimize 128-bit integer formatting
The compiler is unaware of the restricted range of the input, so it is unable to optimize out the final division and modulus. By doing this manually, we get a nontrivial performance gain.
1 parent 0d8a489 commit 7b4c86b

1 file changed

Lines changed: 10 additions & 1 deletion

File tree

src/lib.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -410,7 +410,7 @@ fn enc_16lsd<const OFFSET: usize>(buf: &mut [MaybeUninit<u8>], n: u64) {
410410
let mut remain = n;
411411

412412
// Format per four digits from the lookup table.
413-
for quad_index in (0..4).rev() {
413+
for quad_index in (1..4).rev() {
414414
// pull two pairs
415415
let quad = remain % 1_00_00;
416416
remain /= 1_00_00;
@@ -426,6 +426,15 @@ fn enc_16lsd<const OFFSET: usize>(buf: &mut [MaybeUninit<u8>], n: u64) {
426426
.write(*DECIMAL_PAIRS.0.get_unchecked(pair2 as usize * 2 + 1));
427427
}
428428
}
429+
430+
// final two pairs
431+
let (pair1, pair2) = divmod100(remain as u32);
432+
unsafe {
433+
buf[OFFSET + 0].write(*DECIMAL_PAIRS.0.get_unchecked(pair1 as usize * 2 + 0));
434+
buf[OFFSET + 1].write(*DECIMAL_PAIRS.0.get_unchecked(pair1 as usize * 2 + 1));
435+
buf[OFFSET + 2].write(*DECIMAL_PAIRS.0.get_unchecked(pair2 as usize * 2 + 0));
436+
buf[OFFSET + 3].write(*DECIMAL_PAIRS.0.get_unchecked(pair2 as usize * 2 + 1));
437+
}
429438
}
430439

431440
// Euclidean division plus remainder with constant 1E16 basically consumes 16

0 commit comments

Comments
 (0)