mirror of
https://github.com/rust-lang/rust.git
synced 2024-11-23 07:14:28 +00:00
Further Implement Power of Two Optimization
This commit is contained in:
parent
971e37ff7e
commit
9dccd5dce1
@ -901,26 +901,59 @@ macro_rules! int_impl {
|
||||
#[rustc_const_stable(feature = "const_int_pow", since = "1.50.0")]
|
||||
#[must_use = "this returns the result of the operation, \
|
||||
without modifying the original"]
|
||||
#[rustc_allow_const_fn_unstable(is_val_statically_known, const_int_unchecked_arith)]
|
||||
#[inline]
|
||||
pub const fn checked_pow(self, mut exp: u32) -> Option<Self> {
|
||||
if exp == 0 {
|
||||
return Some(1);
|
||||
}
|
||||
let mut base = self;
|
||||
let mut acc: Self = 1;
|
||||
|
||||
while exp > 1 {
|
||||
if (exp & 1) == 1 {
|
||||
acc = try_opt!(acc.checked_mul(base));
|
||||
// SAFETY: This path has the same behavior as the other.
|
||||
if unsafe { intrinsics::is_val_statically_known(self) }
|
||||
&& self.unsigned_abs().is_power_of_two()
|
||||
{
|
||||
if self == 1 { // Avoid divide by zero
|
||||
return Some(1);
|
||||
}
|
||||
exp /= 2;
|
||||
base = try_opt!(base.checked_mul(base));
|
||||
if self == -1 { // Avoid divide by zero
|
||||
return Some(if exp & 1 != 0 { -1 } else { 1 });
|
||||
}
|
||||
// SAFETY: We just checked this is a power of two. and above zero.
|
||||
let power_used = unsafe { intrinsics::cttz_nonzero(self.wrapping_abs()) as u32 };
|
||||
if exp > Self::BITS / power_used { return None; } // Division of constants is free
|
||||
|
||||
// SAFETY: exp <= Self::BITS / power_used
|
||||
let res = unsafe { intrinsics::unchecked_shl(
|
||||
1 as Self,
|
||||
intrinsics::unchecked_mul(power_used, exp) as Self
|
||||
)};
|
||||
// LLVM doesn't always optimize out the checks
|
||||
// at the ir level.
|
||||
|
||||
let sign = self.is_negative() && exp & 1 != 0;
|
||||
if !sign && res == Self::MIN {
|
||||
None
|
||||
} else if sign {
|
||||
Some(res.wrapping_neg())
|
||||
} else {
|
||||
Some(res)
|
||||
}
|
||||
} else {
|
||||
if exp == 0 {
|
||||
return Some(1);
|
||||
}
|
||||
let mut base = self;
|
||||
let mut acc: Self = 1;
|
||||
|
||||
while exp > 1 {
|
||||
if (exp & 1) == 1 {
|
||||
acc = try_opt!(acc.checked_mul(base));
|
||||
}
|
||||
exp /= 2;
|
||||
base = try_opt!(base.checked_mul(base));
|
||||
}
|
||||
// since exp!=0, finally the exp must be 1.
|
||||
// Deal with the final bit of the exponent separately, since
|
||||
// squaring the base afterwards is not necessary and may cause a
|
||||
// needless overflow.
|
||||
acc.checked_mul(base)
|
||||
}
|
||||
// since exp!=0, finally the exp must be 1.
|
||||
// Deal with the final bit of the exponent separately, since
|
||||
// squaring the base afterwards is not necessary and may cause a
|
||||
// needless overflow.
|
||||
acc.checked_mul(base)
|
||||
}
|
||||
|
||||
/// Returns the square root of the number, rounded down.
|
||||
@ -1537,27 +1570,58 @@ macro_rules! int_impl {
|
||||
#[rustc_const_stable(feature = "const_int_pow", since = "1.50.0")]
|
||||
#[must_use = "this returns the result of the operation, \
|
||||
without modifying the original"]
|
||||
#[rustc_allow_const_fn_unstable(is_val_statically_known, const_int_unchecked_arith)]
|
||||
#[inline]
|
||||
pub const fn wrapping_pow(self, mut exp: u32) -> Self {
|
||||
if exp == 0 {
|
||||
return 1;
|
||||
}
|
||||
let mut base = self;
|
||||
let mut acc: Self = 1;
|
||||
|
||||
while exp > 1 {
|
||||
if (exp & 1) == 1 {
|
||||
acc = acc.wrapping_mul(base);
|
||||
// SAFETY: This path has the same behavior as the other.
|
||||
if unsafe { intrinsics::is_val_statically_known(self) }
|
||||
&& self.unsigned_abs().is_power_of_two()
|
||||
{
|
||||
if self == 1 { // Avoid divide by zero
|
||||
return 1;
|
||||
}
|
||||
exp /= 2;
|
||||
base = base.wrapping_mul(base);
|
||||
}
|
||||
if self == -1 { // Avoid divide by zero
|
||||
return if exp & 1 != 0 { -1 } else { 1 };
|
||||
}
|
||||
// SAFETY: We just checked this is a power of two. and above zero.
|
||||
let power_used = unsafe { intrinsics::cttz_nonzero(self.wrapping_abs()) as u32 };
|
||||
if exp > Self::BITS / power_used { return 0; } // Division of constants is free
|
||||
|
||||
// since exp!=0, finally the exp must be 1.
|
||||
// Deal with the final bit of the exponent separately, since
|
||||
// squaring the base afterwards is not necessary and may cause a
|
||||
// needless overflow.
|
||||
acc.wrapping_mul(base)
|
||||
// SAFETY: exp <= Self::BITS / power_used
|
||||
let res = unsafe { intrinsics::unchecked_shl(
|
||||
1 as Self,
|
||||
intrinsics::unchecked_mul(power_used, exp) as Self
|
||||
)};
|
||||
// LLVM doesn't always optimize out the checks
|
||||
// at the ir level.
|
||||
|
||||
let sign = self.is_negative() && exp & 1 != 0;
|
||||
if sign {
|
||||
res.wrapping_neg()
|
||||
} else {
|
||||
res
|
||||
}
|
||||
} else {
|
||||
if exp == 0 {
|
||||
return 1;
|
||||
}
|
||||
let mut base = self;
|
||||
let mut acc: Self = 1;
|
||||
|
||||
while exp > 1 {
|
||||
if (exp & 1) == 1 {
|
||||
acc = acc.wrapping_mul(base);
|
||||
}
|
||||
exp /= 2;
|
||||
base = base.wrapping_mul(base);
|
||||
}
|
||||
|
||||
// since exp!=0, finally the exp must be 1.
|
||||
// Deal with the final bit of the exponent separately, since
|
||||
// squaring the base afterwards is not necessary and may cause a
|
||||
// needless overflow.
|
||||
acc.wrapping_mul(base)
|
||||
}
|
||||
}
|
||||
|
||||
/// Calculates `self` + `rhs`
|
||||
@ -2039,36 +2103,68 @@ macro_rules! int_impl {
|
||||
#[rustc_const_stable(feature = "const_int_pow", since = "1.50.0")]
|
||||
#[must_use = "this returns the result of the operation, \
|
||||
without modifying the original"]
|
||||
#[rustc_allow_const_fn_unstable(is_val_statically_known, const_int_unchecked_arith)]
|
||||
#[inline]
|
||||
pub const fn overflowing_pow(self, mut exp: u32) -> (Self, bool) {
|
||||
if exp == 0 {
|
||||
return (1,false);
|
||||
}
|
||||
let mut base = self;
|
||||
let mut acc: Self = 1;
|
||||
let mut overflown = false;
|
||||
// Scratch space for storing results of overflowing_mul.
|
||||
let mut r;
|
||||
// SAFETY: This path has the same behavior as the other.
|
||||
if unsafe { intrinsics::is_val_statically_known(self) }
|
||||
&& self.unsigned_abs().is_power_of_two()
|
||||
{
|
||||
if self == 1 { // Avoid divide by zero
|
||||
return (1, false);
|
||||
}
|
||||
if self == -1 { // Avoid divide by zero
|
||||
return (if exp & 1 != 0 { -1 } else { 1 }, false);
|
||||
}
|
||||
// SAFETY: We just checked this is a power of two. and above zero.
|
||||
let power_used = unsafe { intrinsics::cttz_nonzero(self.wrapping_abs()) as u32 };
|
||||
if exp > Self::BITS / power_used { return (0, true); } // Division of constants is free
|
||||
|
||||
while exp > 1 {
|
||||
if (exp & 1) == 1 {
|
||||
r = acc.overflowing_mul(base);
|
||||
acc = r.0;
|
||||
// SAFETY: exp <= Self::BITS / power_used
|
||||
let res = unsafe { intrinsics::unchecked_shl(
|
||||
1 as Self,
|
||||
intrinsics::unchecked_mul(power_used, exp) as Self
|
||||
)};
|
||||
// LLVM doesn't always optimize out the checks
|
||||
// at the ir level.
|
||||
|
||||
let sign = self.is_negative() && exp & 1 != 0;
|
||||
let overflow = res == Self::MIN;
|
||||
if sign {
|
||||
(res.wrapping_neg(), overflow)
|
||||
} else {
|
||||
(res, overflow)
|
||||
}
|
||||
} else {
|
||||
if exp == 0 {
|
||||
return (1,false);
|
||||
}
|
||||
let mut base = self;
|
||||
let mut acc: Self = 1;
|
||||
let mut overflown = false;
|
||||
// Scratch space for storing results of overflowing_mul.
|
||||
let mut r;
|
||||
|
||||
while exp > 1 {
|
||||
if (exp & 1) == 1 {
|
||||
r = acc.overflowing_mul(base);
|
||||
acc = r.0;
|
||||
overflown |= r.1;
|
||||
}
|
||||
exp /= 2;
|
||||
r = base.overflowing_mul(base);
|
||||
base = r.0;
|
||||
overflown |= r.1;
|
||||
}
|
||||
exp /= 2;
|
||||
r = base.overflowing_mul(base);
|
||||
base = r.0;
|
||||
overflown |= r.1;
|
||||
}
|
||||
|
||||
// since exp!=0, finally the exp must be 1.
|
||||
// Deal with the final bit of the exponent separately, since
|
||||
// squaring the base afterwards is not necessary and may cause a
|
||||
// needless overflow.
|
||||
r = acc.overflowing_mul(base);
|
||||
r.1 |= overflown;
|
||||
r
|
||||
// since exp!=0, finally the exp must be 1.
|
||||
// Deal with the final bit of the exponent separately, since
|
||||
// squaring the base afterwards is not necessary and may cause a
|
||||
// needless overflow.
|
||||
r = acc.overflowing_mul(base);
|
||||
r.1 |= overflown;
|
||||
r
|
||||
}
|
||||
}
|
||||
|
||||
/// Raises self to the power of `exp`, using exponentiation by squaring.
|
||||
@ -2086,30 +2182,47 @@ macro_rules! int_impl {
|
||||
#[rustc_const_stable(feature = "const_int_pow", since = "1.50.0")]
|
||||
#[must_use = "this returns the result of the operation, \
|
||||
without modifying the original"]
|
||||
#[rustc_allow_const_fn_unstable(is_val_statically_known, const_int_unchecked_arith)]
|
||||
#[inline]
|
||||
#[rustc_inherit_overflow_checks]
|
||||
#[rustc_allow_const_fn_unstable(is_val_statically_known)]
|
||||
#[track_caller] // Hides the hackish overflow check for powers of two.
|
||||
pub const fn pow(self, mut exp: u32) -> Self {
|
||||
// SAFETY: This path has the same behavior as the other.
|
||||
if unsafe { intrinsics::is_val_statically_known(self) }
|
||||
&& self > 0
|
||||
&& (self & (self - 1) == 0)
|
||||
&& self.unsigned_abs().is_power_of_two()
|
||||
{
|
||||
let power_used = match self.checked_ilog2() {
|
||||
Some(v) => v,
|
||||
// SAFETY: We just checked this is a power of two. and above zero.
|
||||
None => unsafe { core::hint::unreachable_unchecked() },
|
||||
};
|
||||
// So it panics. Have to use `overflowing_mul` to efficiently set the
|
||||
// result to 0 if not.
|
||||
#[cfg(debug_assertions)]
|
||||
{
|
||||
_ = power_used * exp;
|
||||
if self == 1 { // Avoid divide by zero
|
||||
return 1;
|
||||
}
|
||||
if self == -1 { // Avoid divide by zero
|
||||
return if exp & 1 != 0 { -1 } else { 1 };
|
||||
}
|
||||
// SAFETY: We just checked this is a power of two. and above zero.
|
||||
let power_used = unsafe { intrinsics::cttz_nonzero(self.wrapping_abs()) as u32 };
|
||||
if exp > Self::BITS / power_used { // Division of constants is free
|
||||
#[allow(arithmetic_overflow)]
|
||||
return Self::MAX * Self::MAX * 0;
|
||||
}
|
||||
|
||||
// SAFETY: exp <= Self::BITS / power_used
|
||||
let res = unsafe { intrinsics::unchecked_shl(
|
||||
1 as Self,
|
||||
intrinsics::unchecked_mul(power_used, exp) as Self
|
||||
)};
|
||||
// LLVM doesn't always optimize out the checks
|
||||
// at the ir level.
|
||||
|
||||
let sign = self.is_negative() && exp & 1 != 0;
|
||||
#[allow(arithmetic_overflow)]
|
||||
if !sign && res == Self::MIN {
|
||||
// So it panics.
|
||||
_ = Self::MAX * Self::MAX;
|
||||
}
|
||||
if sign {
|
||||
res.wrapping_neg()
|
||||
} else {
|
||||
res
|
||||
}
|
||||
let (num_shl, overflowed) = power_used.overflowing_mul(exp);
|
||||
let fine = !overflowed
|
||||
& (num_shl < (mem::size_of::<Self>() * 8) as u32);
|
||||
(1 << num_shl) * fine as Self
|
||||
} else {
|
||||
if exp == 0 {
|
||||
return 1;
|
||||
|
@ -1005,28 +1005,49 @@ macro_rules! uint_impl {
|
||||
#[rustc_const_stable(feature = "const_int_pow", since = "1.50.0")]
|
||||
#[must_use = "this returns the result of the operation, \
|
||||
without modifying the original"]
|
||||
#[rustc_allow_const_fn_unstable(is_val_statically_known, const_int_unchecked_arith)]
|
||||
#[inline]
|
||||
pub const fn checked_pow(self, mut exp: u32) -> Option<Self> {
|
||||
if exp == 0 {
|
||||
return Some(1);
|
||||
}
|
||||
let mut base = self;
|
||||
let mut acc: Self = 1;
|
||||
|
||||
while exp > 1 {
|
||||
if (exp & 1) == 1 {
|
||||
acc = try_opt!(acc.checked_mul(base));
|
||||
// SAFETY: This path has the same behavior as the other.
|
||||
if unsafe { intrinsics::is_val_statically_known(self) }
|
||||
&& self.is_power_of_two()
|
||||
{
|
||||
if self == 1 { // Avoid divide by zero
|
||||
return Some(1);
|
||||
}
|
||||
exp /= 2;
|
||||
base = try_opt!(base.checked_mul(base));
|
||||
// SAFETY: We just checked this is a power of two. and above zero.
|
||||
let power_used = unsafe { intrinsics::cttz_nonzero(self) as u32 };
|
||||
if exp > Self::BITS / power_used { return None; } // Division of constants is free
|
||||
|
||||
// SAFETY: exp <= Self::BITS / power_used
|
||||
unsafe { Some(intrinsics::unchecked_shl(
|
||||
1 as Self,
|
||||
intrinsics::unchecked_mul(power_used, exp) as Self
|
||||
)) }
|
||||
// LLVM doesn't always optimize out the checks
|
||||
// at the ir level.
|
||||
} else {
|
||||
if exp == 0 {
|
||||
return Some(1);
|
||||
}
|
||||
let mut base = self;
|
||||
let mut acc: Self = 1;
|
||||
|
||||
while exp > 1 {
|
||||
if (exp & 1) == 1 {
|
||||
acc = try_opt!(acc.checked_mul(base));
|
||||
}
|
||||
exp /= 2;
|
||||
base = try_opt!(base.checked_mul(base));
|
||||
}
|
||||
|
||||
// since exp!=0, finally the exp must be 1.
|
||||
// Deal with the final bit of the exponent separately, since
|
||||
// squaring the base afterwards is not necessary and may cause a
|
||||
// needless overflow.
|
||||
|
||||
acc.checked_mul(base)
|
||||
}
|
||||
|
||||
// since exp!=0, finally the exp must be 1.
|
||||
// Deal with the final bit of the exponent separately, since
|
||||
// squaring the base afterwards is not necessary and may cause a
|
||||
// needless overflow.
|
||||
|
||||
acc.checked_mul(base)
|
||||
}
|
||||
|
||||
/// Saturating integer addition. Computes `self + rhs`, saturating at
|
||||
@ -1475,27 +1496,48 @@ macro_rules! uint_impl {
|
||||
#[rustc_const_stable(feature = "const_int_pow", since = "1.50.0")]
|
||||
#[must_use = "this returns the result of the operation, \
|
||||
without modifying the original"]
|
||||
#[rustc_allow_const_fn_unstable(is_val_statically_known, const_int_unchecked_arith)]
|
||||
#[inline]
|
||||
pub const fn wrapping_pow(self, mut exp: u32) -> Self {
|
||||
if exp == 0 {
|
||||
return 1;
|
||||
}
|
||||
let mut base = self;
|
||||
let mut acc: Self = 1;
|
||||
|
||||
while exp > 1 {
|
||||
if (exp & 1) == 1 {
|
||||
acc = acc.wrapping_mul(base);
|
||||
// SAFETY: This path has the same behavior as the other.
|
||||
if unsafe { intrinsics::is_val_statically_known(self) }
|
||||
&& self.is_power_of_two()
|
||||
{
|
||||
if self == 1 { // Avoid divide by zero
|
||||
return 1;
|
||||
}
|
||||
exp /= 2;
|
||||
base = base.wrapping_mul(base);
|
||||
}
|
||||
// SAFETY: We just checked this is a power of two. and above zero.
|
||||
let power_used = unsafe { intrinsics::cttz_nonzero(self) as u32 };
|
||||
if exp > Self::BITS / power_used { return 0; } // Division of constants is free
|
||||
|
||||
// since exp!=0, finally the exp must be 1.
|
||||
// Deal with the final bit of the exponent separately, since
|
||||
// squaring the base afterwards is not necessary and may cause a
|
||||
// needless overflow.
|
||||
acc.wrapping_mul(base)
|
||||
// SAFETY: exp <= Self::BITS / power_used
|
||||
unsafe { intrinsics::unchecked_shl(
|
||||
1 as Self,
|
||||
intrinsics::unchecked_mul(power_used, exp) as Self
|
||||
)}
|
||||
// LLVM doesn't always optimize out the checks
|
||||
// at the ir level.
|
||||
} else {
|
||||
if exp == 0 {
|
||||
return 1;
|
||||
}
|
||||
let mut base = self;
|
||||
let mut acc: Self = 1;
|
||||
|
||||
while exp > 1 {
|
||||
if (exp & 1) == 1 {
|
||||
acc = acc.wrapping_mul(base);
|
||||
}
|
||||
exp /= 2;
|
||||
base = base.wrapping_mul(base);
|
||||
}
|
||||
|
||||
// since exp!=0, finally the exp must be 1.
|
||||
// Deal with the final bit of the exponent separately, since
|
||||
// squaring the base afterwards is not necessary and may cause a
|
||||
// needless overflow.
|
||||
acc.wrapping_mul(base)
|
||||
}
|
||||
}
|
||||
|
||||
/// Calculates `self` + `rhs`
|
||||
@ -1925,37 +1967,58 @@ macro_rules! uint_impl {
|
||||
#[rustc_const_stable(feature = "const_int_pow", since = "1.50.0")]
|
||||
#[must_use = "this returns the result of the operation, \
|
||||
without modifying the original"]
|
||||
#[rustc_allow_const_fn_unstable(is_val_statically_known, const_int_unchecked_arith)]
|
||||
#[inline]
|
||||
pub const fn overflowing_pow(self, mut exp: u32) -> (Self, bool) {
|
||||
if exp == 0{
|
||||
return (1,false);
|
||||
}
|
||||
let mut base = self;
|
||||
let mut acc: Self = 1;
|
||||
let mut overflown = false;
|
||||
// Scratch space for storing results of overflowing_mul.
|
||||
let mut r;
|
||||
// SAFETY: This path has the same behavior as the other.
|
||||
if unsafe { intrinsics::is_val_statically_known(self) }
|
||||
&& self.is_power_of_two()
|
||||
{
|
||||
if self == 1 { // Avoid divide by zero
|
||||
return (1, false);
|
||||
}
|
||||
// SAFETY: We just checked this is a power of two. and above zero.
|
||||
let power_used = unsafe { intrinsics::cttz_nonzero(self) as u32 };
|
||||
if exp > Self::BITS / power_used { return (0, true); } // Division of constants is free
|
||||
|
||||
while exp > 1 {
|
||||
if (exp & 1) == 1 {
|
||||
r = acc.overflowing_mul(base);
|
||||
acc = r.0;
|
||||
// SAFETY: exp <= Self::BITS / power_used
|
||||
unsafe { (intrinsics::unchecked_shl(
|
||||
1 as Self,
|
||||
intrinsics::unchecked_mul(power_used, exp) as Self
|
||||
), false) }
|
||||
// LLVM doesn't always optimize out the checks
|
||||
// at the ir level.
|
||||
} else {
|
||||
if exp == 0{
|
||||
return (1,false);
|
||||
}
|
||||
let mut base = self;
|
||||
let mut acc: Self = 1;
|
||||
let mut overflown = false;
|
||||
// Scratch space for storing results of overflowing_mul.
|
||||
let mut r;
|
||||
|
||||
while exp > 1 {
|
||||
if (exp & 1) == 1 {
|
||||
r = acc.overflowing_mul(base);
|
||||
acc = r.0;
|
||||
overflown |= r.1;
|
||||
}
|
||||
exp /= 2;
|
||||
r = base.overflowing_mul(base);
|
||||
base = r.0;
|
||||
overflown |= r.1;
|
||||
}
|
||||
exp /= 2;
|
||||
r = base.overflowing_mul(base);
|
||||
base = r.0;
|
||||
overflown |= r.1;
|
||||
|
||||
// since exp!=0, finally the exp must be 1.
|
||||
// Deal with the final bit of the exponent separately, since
|
||||
// squaring the base afterwards is not necessary and may cause a
|
||||
// needless overflow.
|
||||
r = acc.overflowing_mul(base);
|
||||
r.1 |= overflown;
|
||||
|
||||
r
|
||||
}
|
||||
|
||||
// since exp!=0, finally the exp must be 1.
|
||||
// Deal with the final bit of the exponent separately, since
|
||||
// squaring the base afterwards is not necessary and may cause a
|
||||
// needless overflow.
|
||||
r = acc.overflowing_mul(base);
|
||||
r.1 |= overflown;
|
||||
|
||||
r
|
||||
}
|
||||
|
||||
/// Raises self to the power of `exp`, using exponentiation by squaring.
|
||||
@ -1971,9 +2034,10 @@ macro_rules! uint_impl {
|
||||
#[rustc_const_stable(feature = "const_int_pow", since = "1.50.0")]
|
||||
#[must_use = "this returns the result of the operation, \
|
||||
without modifying the original"]
|
||||
#[rustc_allow_const_fn_unstable(is_val_statically_known, const_int_unchecked_arith)]
|
||||
#[inline]
|
||||
#[rustc_inherit_overflow_checks]
|
||||
#[rustc_allow_const_fn_unstable(is_val_statically_known)]
|
||||
#[track_caller] // Hides the hackish overflow check for powers of two.
|
||||
pub const fn pow(self, mut exp: u32) -> Self {
|
||||
// LLVM now knows that `self` is a constant value, but not a
|
||||
// constant in Rust. This allows us to compute the power used at
|
||||
@ -1990,22 +2054,23 @@ macro_rules! uint_impl {
|
||||
if unsafe { intrinsics::is_val_statically_known(self) }
|
||||
&& self.is_power_of_two()
|
||||
{
|
||||
let power_used = match self.checked_ilog2() {
|
||||
Some(v) => v,
|
||||
// SAFETY: We just checked this is a power of two. `0` is not a
|
||||
// power of two.
|
||||
None => unsafe { core::hint::unreachable_unchecked() },
|
||||
};
|
||||
// So it panics. Have to use `overflowing_mul` to efficiently set the
|
||||
// result to 0 if not.
|
||||
#[cfg(debug_assertions)]
|
||||
{
|
||||
_ = power_used * exp;
|
||||
if self == 1 { // Avoid divide by zero
|
||||
return 1;
|
||||
}
|
||||
let (num_shl, overflowed) = power_used.overflowing_mul(exp);
|
||||
let fine = !overflowed
|
||||
& (num_shl < (mem::size_of::<Self>() * 8) as u32);
|
||||
(1 << num_shl) * fine as Self
|
||||
// SAFETY: We just checked this is a power of two. and above zero.
|
||||
let power_used = unsafe { intrinsics::cttz_nonzero(self) as u32 };
|
||||
if exp > Self::BITS / power_used { // Division of constants is free
|
||||
#[allow(arithmetic_overflow)]
|
||||
return Self::MAX * Self::MAX * 0;
|
||||
}
|
||||
|
||||
// SAFETY: exp <= Self::BITS / power_used
|
||||
unsafe { intrinsics::unchecked_shl(
|
||||
1 as Self,
|
||||
intrinsics::unchecked_mul(power_used, exp) as Self
|
||||
)}
|
||||
// LLVM doesn't always optimize out the checks
|
||||
// at the ir level.
|
||||
} else {
|
||||
if exp == 0 {
|
||||
return 1;
|
||||
|
@ -1,68 +1,55 @@
|
||||
// #[cfg(bootstrap)]
|
||||
// ignore-stage1
|
||||
// compile-flags: --crate-type=lib -Zmerge-functions=disabled
|
||||
// compile-flags: --crate-type=lib -Zmerge-functions=disabled -O -C overflow-checks=false
|
||||
|
||||
// CHECK-LABEL: @a(
|
||||
#[no_mangle]
|
||||
pub fn a(exp: u32) -> u64 {
|
||||
// CHECK: %[[R:.+]] = and i32 %exp, 63
|
||||
// CHECK: %[[R:.+]] = zext i32 %[[R:.+]] to i64
|
||||
// CHECK: %[[R:.+]] = shl nuw i64 %[[R:.+]].i, %[[R:.+]]
|
||||
// CHECK: ret i64 %[[R:.+]]
|
||||
// CHECK: %{{[^ ]+}} = icmp ugt i32 %exp, 64
|
||||
// CHECK: %{{[^ ]+}} = zext i32 %exp to i64
|
||||
// CHECK: %{{[^ ]+}} = shl nuw i64 {{[^ ]+}}, %{{[^ ]+}}
|
||||
// CHECK: ret i64 %{{[^ ]+}}
|
||||
2u64.pow(exp)
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @b(
|
||||
#[no_mangle]
|
||||
pub fn b(exp: u32) -> i64 {
|
||||
// CHECK: %[[R:.+]] = and i32 %exp, 63
|
||||
// CHECK: %[[R:.+]] = zext i32 %[[R:.+]] to i64
|
||||
// CHECK: %[[R:.+]] = shl nuw i64 %[[R:.+]].i, %[[R:.+]]
|
||||
// CHECK: ret i64 %[[R:.+]]
|
||||
// CHECK: %{{[^ ]+}} = icmp ugt i32 %exp, 64
|
||||
// CHECK: %{{[^ ]+}} = zext i32 %exp to i64
|
||||
// CHECK: %{{[^ ]+}} = shl nuw i64 {{[^ ]+}}, %{{[^ ]+}}
|
||||
// CHECK: ret i64 %{{[^ ]+}}
|
||||
2i64.pow(exp)
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @c(
|
||||
#[no_mangle]
|
||||
pub fn c(exp: u32) -> u32 {
|
||||
// CHECK: %[[R:.+]].0.i = shl i32 %exp, 1
|
||||
// CHECK: %[[R:.+]].1.i = icmp sgt i32 %exp, -1
|
||||
// CHECK: %[[R:.+]].i = icmp ult i32 %[[R:.+]].0.i, 32
|
||||
// CHECK: %fine.i = and i1 %[[R:.+]].1.i, %[[R:.+]].i
|
||||
// CHECK: %0 = and i32 %[[R:.+]].0.i, 30
|
||||
// CHECK: %[[R:.+]].i = zext i1 %fine.i to i32
|
||||
// CHECK: %[[R:.+]] = shl nuw nsw i32 %[[R:.+]].i, %0
|
||||
// CHECK: ret i32 %[[R:.+]]
|
||||
// CHECK: %{{[^ ]+}} = icmp ugt i32 %exp, 16
|
||||
// CHECK: %{{[^ ]+}} = shl nuw nsw i32 %exp, 1
|
||||
// CHECK: %{{[^ ]+}} = shl nuw i32 1, %{{[^ ]+}}
|
||||
// CHECK: %{{[^ ]+}} = select i1 %{{[^ ]+}}, i32 0, i32 %{{[^ ]+}}
|
||||
// CHECK: ret i32 %{{[^ ]+}}
|
||||
4u32.pow(exp)
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @d(
|
||||
#[no_mangle]
|
||||
pub fn d(exp: u32) -> u32 {
|
||||
// CHECK: tail call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %exp, i32 5)
|
||||
// CHECK: %[[R:.+]].0.i = extractvalue { i32, i1 } %[[R:.+]], 0
|
||||
// CHECK: %[[R:.+]].1.i = extractvalue { i32, i1 } %[[R:.+]], 1
|
||||
// CHECK: %[[R:.+]].i = xor i1 %[[R:.+]].1.i, true
|
||||
// CHECK: %[[R:.+]].i = icmp ult i32 %[[R:.+]].0.i, 32
|
||||
// CHECK: %fine.i = and i1 %[[R:.+]].i, %[[R:.+]].i
|
||||
// CHECK: %[[R:.+]] = and i32 %[[R:.+]].0.i, 31
|
||||
// CHECK: %[[R:.+]].i = zext i1 %fine.i to i32
|
||||
// CHECK: %[[R:.+]] = shl nuw i32 %[[R:.+]].i, %1
|
||||
// CHECK: ret i32 %[[R:.+]]
|
||||
// CHECK: %{{[^ ]+}} = icmp ugt i32 %exp, 6
|
||||
// CHECK: %{{[^ ]+}} = mul nuw nsw i32 %exp, 5
|
||||
// CHECK: %{{[^ ]+}} = shl nuw nsw i32 1, %{{[^ ]+}}
|
||||
// CHECK: %{{[^ ]+}} = select i1 {{[^ ]+}}, i32 0, i32 %{{[^ ]+}}
|
||||
// CHECK: ret i32 %{{[^ ]+}}
|
||||
32u32.pow(exp)
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @e(
|
||||
#[no_mangle]
|
||||
pub fn e(exp: u32) -> i32 {
|
||||
// CHECK: tail call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %exp, i32 5)
|
||||
// CHECK: %[[R:.+]].0.i = extractvalue { i32, i1 } %[[R:.+]], 0
|
||||
// CHECK: %[[R:.+]].i = icmp ult i32 %[[R:.+]].0.i, 32
|
||||
// CHECK: %[[R:.+]].1.i = extractvalue { i32, i1 } %[[R:.+]], 1
|
||||
// CHECK: %[[R:.+]].i = xor i1 %[[R:.+]].1.i, true
|
||||
// CHECK: %fine.i = and i1 %[[R:.+]].i, %[[R:.+]].i
|
||||
// CHECK: %[[R:.+]].i = zext i1 %fine.i to i32
|
||||
// CHECK: %[[R:.+]] = and i32 %[[R:.+]].0.i, 31
|
||||
// CHECK: %[[R:.+]] = shl nuw i32 %[[R:.+]].i, %1
|
||||
// CHECK: ret i32 %[[R:.+]]
|
||||
// CHECK: %{{[^ ]+}} = icmp ugt i32 %exp, 6
|
||||
// CHECK: %{{[^ ]+}} = mul nuw {{(nsw )?}}i32 %exp, 5
|
||||
// CHECK: %{{[^ ]+}} = shl nuw {{(nsw )?}}i32 1, %{{[^ ]+}}
|
||||
// CHECK: %{{[^ ]+}} = select i1 {{[^ ]+}}, i32 0, i32 %{{[^ ]+}}
|
||||
// CHECK: ret i32 %{{[^ ]+}}
|
||||
32i32.pow(exp)
|
||||
}
|
||||
// note: d and e are expected to yield the same IR
|
||||
|
Loading…
Reference in New Issue
Block a user