Improve codegen of align_offset when stride == 1

Previously checking for `pmoda == 0` would get LLVM to generate branchy
code, when, for `stride = 1` the offset can be computed without such a
branch by doing effectively a `-p % a`.

For well-known (constant) alignments, with the new ordering of these
conditionals, we end up generating 2 to 3 cheap instructions on x86_64:

    movq    %rdi, %rax
    negl    %eax
    andl    $7, %eax

instead of 5+ as previously.

For unknown alignments the new code also generates just 3 instructions:

    negq    %rdi
    leaq    -1(%rsi), %rax
    andq    %rdi, %rax
This commit is contained in:
Simonas Kazlauskas 2020-08-16 17:10:54 +03:00
parent e7271da69a
commit 5d22b18bf2

View File

@ -1172,7 +1172,7 @@ pub(crate) unsafe fn align_offset<T: Sized>(p: *const T, a: usize) -> usize {
/// Calculate multiplicative modular inverse of `x` modulo `m`.
///
/// This implementation is tailored for align_offset and has following preconditions:
/// This implementation is tailored for `align_offset` and has following preconditions:
///
/// * `m` is a power-of-two;
/// * `x < m`; (if `x ≥ m`, pass in `x % m` instead)
@ -1220,23 +1220,21 @@ pub(crate) unsafe fn align_offset<T: Sized>(p: *const T, a: usize) -> usize {
}
let stride = mem::size_of::<T>();
// SAFETY: `a` is a power-of-two, hence non-zero.
// SAFETY: `a` is a power-of-two, therefore non-zero.
let a_minus_one = unsafe { unchecked_sub(a, 1) };
let pmoda = p as usize & a_minus_one;
if stride == 1 {
// `stride == 1` case can be computed more efficiently through `-p (mod a)`.
return wrapping_sub(0, p as usize) & a_minus_one;
}
let pmoda = p as usize & a_minus_one;
if pmoda == 0 {
// Already aligned. Yay!
return 0;
}
if stride <= 1 {
return if stride == 0 {
// If the pointer is not aligned, and the element is zero-sized, then no amount of
// elements will ever align the pointer.
!0
} else {
wrapping_sub(a, pmoda)
};
} else if stride == 0 {
// If the pointer is not aligned, and the element is zero-sized, then no amount of
// elements will ever align the pointer.
return usize::MAX;
}
let smoda = stride & a_minus_one;