Further Implement Power of Two Optimization

2024-11-23 07:14:28 +00:00 · 2024-01-23 12:03:50 -05:00 · 2024-01-23 12:03:50 -05:00 · 9dccd5dce1
commit 9dccd5dce1
parent 971e37ff7e
3 changed files with 354 additions and 189 deletions
--- a/library/core/src/num/int_macros.rs
+++ b/library/core/src/num/int_macros.rs
@ -901,26 +901,59 @@ macro_rules! int_impl {
        #[rustc_const_stable(feature = "const_int_pow", since = "1.50.0")]
        #[must_use = "this returns the result of the operation, \
                      without modifying the original"]
+        #[rustc_allow_const_fn_unstable(is_val_statically_known, const_int_unchecked_arith)]
        #[inline]
        pub const fn checked_pow(self, mut exp: u32) -> Option<Self> {
-            if exp == 0 {
-                return Some(1);
-            }
-            let mut base = self;
-            let mut acc: Self = 1;
-
-            while exp > 1 {
-                if (exp & 1) == 1 {
-                    acc = try_opt!(acc.checked_mul(base));
+            // SAFETY: This path has the same behavior as the other.
+            if unsafe { intrinsics::is_val_statically_known(self) }
+                && self.unsigned_abs().is_power_of_two()
+            {
+                if self == 1 { // Avoid divide by zero
+                    return Some(1);
                }
-                exp /= 2;
-                base = try_opt!(base.checked_mul(base));
+                if self == -1 { // Avoid divide by zero
+                    return Some(if exp & 1 != 0 { -1 } else { 1 });
+                }
+                // SAFETY: We just checked this is a power of two. and above zero.
+                let power_used = unsafe { intrinsics::cttz_nonzero(self.wrapping_abs()) as u32 };
+                if exp > Self::BITS / power_used { return None; } // Division of constants is free
+
+                // SAFETY: exp <= Self::BITS / power_used
+                let res = unsafe { intrinsics::unchecked_shl(
+                    1 as Self,
+                    intrinsics::unchecked_mul(power_used, exp) as Self
+                )};
+                // LLVM doesn't always optimize out the checks
+                // at the ir level.
+
+                let sign = self.is_negative() && exp & 1 != 0;
+                if !sign && res == Self::MIN  {
+                    None
+                } else if sign {
+                    Some(res.wrapping_neg())
+                } else {
+                    Some(res)
+                }
+            } else {
+                if exp == 0 {
+                    return Some(1);
+                }
+                let mut base = self;
+                let mut acc: Self = 1;
+
+                while exp > 1 {
+                    if (exp & 1) == 1 {
+                        acc = try_opt!(acc.checked_mul(base));
+                    }
+                    exp /= 2;
+                    base = try_opt!(base.checked_mul(base));
+                }
+                // since exp!=0, finally the exp must be 1.
+                // Deal with the final bit of the exponent separately, since
+                // squaring the base afterwards is not necessary and may cause a
+                // needless overflow.
+                acc.checked_mul(base)
            }
-            // since exp!=0, finally the exp must be 1.
-            // Deal with the final bit of the exponent separately, since
-            // squaring the base afterwards is not necessary and may cause a
-            // needless overflow.
-            acc.checked_mul(base)
        }

        /// Returns the square root of the number, rounded down.
@ -1537,27 +1570,58 @@ macro_rules! int_impl {
        #[rustc_const_stable(feature = "const_int_pow", since = "1.50.0")]
        #[must_use = "this returns the result of the operation, \
                      without modifying the original"]
+        #[rustc_allow_const_fn_unstable(is_val_statically_known, const_int_unchecked_arith)]
        #[inline]
        pub const fn wrapping_pow(self, mut exp: u32) -> Self {
-            if exp == 0 {
-                return 1;
-            }
-            let mut base = self;
-            let mut acc: Self = 1;
-
-            while exp > 1 {
-                if (exp & 1) == 1 {
-                    acc = acc.wrapping_mul(base);
+            // SAFETY: This path has the same behavior as the other.
+            if unsafe { intrinsics::is_val_statically_known(self) }
+                && self.unsigned_abs().is_power_of_two()
+            {
+                if self == 1 { // Avoid divide by zero
+                    return 1;
                }
-                exp /= 2;
-                base = base.wrapping_mul(base);
-            }
+                if self == -1 { // Avoid divide by zero
+                    return if exp & 1 != 0 { -1 } else { 1 };
+                }
+                // SAFETY: We just checked this is a power of two. and above zero.
+                let power_used = unsafe { intrinsics::cttz_nonzero(self.wrapping_abs()) as u32 };
+                if exp > Self::BITS / power_used { return 0; } // Division of constants is free

-            // since exp!=0, finally the exp must be 1.
-            // Deal with the final bit of the exponent separately, since
-            // squaring the base afterwards is not necessary and may cause a
-            // needless overflow.
-            acc.wrapping_mul(base)
+                // SAFETY: exp <= Self::BITS / power_used
+                let res = unsafe { intrinsics::unchecked_shl(
+                    1 as Self,
+                    intrinsics::unchecked_mul(power_used, exp) as Self
+                )};
+                // LLVM doesn't always optimize out the checks
+                // at the ir level.
+
+                let sign = self.is_negative() && exp & 1 != 0;
+                if sign {
+                    res.wrapping_neg()
+                } else {
+                    res
+                }
+            } else {
+                if exp == 0 {
+                    return 1;
+                }
+                let mut base = self;
+                let mut acc: Self = 1;
+
+                while exp > 1 {
+                    if (exp & 1) == 1 {
+                        acc = acc.wrapping_mul(base);
+                    }
+                    exp /= 2;
+                    base = base.wrapping_mul(base);
+                }
+
+                // since exp!=0, finally the exp must be 1.
+                // Deal with the final bit of the exponent separately, since
+                // squaring the base afterwards is not necessary and may cause a
+                // needless overflow.
+                acc.wrapping_mul(base)
+            }
        }

        /// Calculates `self` + `rhs`
@ -2039,36 +2103,68 @@ macro_rules! int_impl {
        #[rustc_const_stable(feature = "const_int_pow", since = "1.50.0")]
        #[must_use = "this returns the result of the operation, \
                      without modifying the original"]
+        #[rustc_allow_const_fn_unstable(is_val_statically_known, const_int_unchecked_arith)]
        #[inline]
        pub const fn overflowing_pow(self, mut exp: u32) -> (Self, bool) {
-            if exp == 0 {
-                return (1,false);
-            }
-            let mut base = self;
-            let mut acc: Self = 1;
-            let mut overflown = false;
-            // Scratch space for storing results of overflowing_mul.
-            let mut r;
+            // SAFETY: This path has the same behavior as the other.
+            if unsafe { intrinsics::is_val_statically_known(self) }
+                && self.unsigned_abs().is_power_of_two()
+            {
+                if self == 1 { // Avoid divide by zero
+                    return (1, false);
+                }
+                if self == -1 { // Avoid divide by zero
+                    return (if exp & 1 != 0 { -1 } else { 1 }, false);
+                }
+                // SAFETY: We just checked this is a power of two. and above zero.
+                let power_used = unsafe { intrinsics::cttz_nonzero(self.wrapping_abs()) as u32 };
+                if exp > Self::BITS / power_used { return (0, true); } // Division of constants is free

-            while exp > 1 {
-                if (exp & 1) == 1 {
-                    r = acc.overflowing_mul(base);
-                    acc = r.0;
+                // SAFETY: exp <= Self::BITS / power_used
+                let res = unsafe { intrinsics::unchecked_shl(
+                    1 as Self,
+                    intrinsics::unchecked_mul(power_used, exp) as Self
+                )};
+                // LLVM doesn't always optimize out the checks
+                // at the ir level.
+
+                let sign = self.is_negative() && exp & 1 != 0;
+                let overflow = res == Self::MIN;
+                if sign {
+                    (res.wrapping_neg(), overflow)
+                } else {
+                    (res, overflow)
+                }
+            } else {
+                if exp == 0 {
+                    return (1,false);
+                }
+                let mut base = self;
+                let mut acc: Self = 1;
+                let mut overflown = false;
+                // Scratch space for storing results of overflowing_mul.
+                let mut r;
+
+                while exp > 1 {
+                    if (exp & 1) == 1 {
+                        r = acc.overflowing_mul(base);
+                        acc = r.0;
+                        overflown |= r.1;
+                    }
+                    exp /= 2;
+                    r = base.overflowing_mul(base);
+                    base = r.0;
                    overflown |= r.1;
                }
-                exp /= 2;
-                r = base.overflowing_mul(base);
-                base = r.0;
-                overflown |= r.1;
-            }

-            // since exp!=0, finally the exp must be 1.
-            // Deal with the final bit of the exponent separately, since
-            // squaring the base afterwards is not necessary and may cause a
-            // needless overflow.
-            r = acc.overflowing_mul(base);
-            r.1 |= overflown;
-            r
+                // since exp!=0, finally the exp must be 1.
+                // Deal with the final bit of the exponent separately, since
+                // squaring the base afterwards is not necessary and may cause a
+                // needless overflow.
+                r = acc.overflowing_mul(base);
+                r.1 |= overflown;
+                r
+            }
        }

        /// Raises self to the power of `exp`, using exponentiation by squaring.
@ -2086,30 +2182,47 @@ macro_rules! int_impl {
        #[rustc_const_stable(feature = "const_int_pow", since = "1.50.0")]
        #[must_use = "this returns the result of the operation, \
                      without modifying the original"]
+        #[rustc_allow_const_fn_unstable(is_val_statically_known, const_int_unchecked_arith)]
        #[inline]
        #[rustc_inherit_overflow_checks]
-        #[rustc_allow_const_fn_unstable(is_val_statically_known)]
+        #[track_caller] // Hides the hackish overflow check for powers of two.
        pub const fn pow(self, mut exp: u32) -> Self {
            // SAFETY: This path has the same behavior as the other.
            if unsafe { intrinsics::is_val_statically_known(self) }
-                && self > 0
-                && (self & (self - 1) == 0)
+                && self.unsigned_abs().is_power_of_two()
            {
-                let power_used = match self.checked_ilog2() {
-                    Some(v) => v,
-                    // SAFETY: We just checked this is a power of two. and above zero.
-                    None => unsafe { core::hint::unreachable_unchecked() },
-                };
-                // So it panics. Have to use `overflowing_mul` to efficiently set the
-                // result to 0 if not.
-                #[cfg(debug_assertions)]
-                {
-                    _ = power_used * exp;
+                if self == 1 { // Avoid divide by zero
+                    return 1;
+                }
+                if self == -1 { // Avoid divide by zero
+                    return if exp & 1 != 0 { -1 } else { 1 };
+                }
+                // SAFETY: We just checked this is a power of two. and above zero.
+                let power_used = unsafe { intrinsics::cttz_nonzero(self.wrapping_abs()) as u32 };
+                if exp > Self::BITS / power_used { // Division of constants is free
+                    #[allow(arithmetic_overflow)]
+                    return Self::MAX * Self::MAX * 0;
+                }
+
+                // SAFETY: exp <= Self::BITS / power_used
+                let res = unsafe { intrinsics::unchecked_shl(
+                    1 as Self,
+                    intrinsics::unchecked_mul(power_used, exp) as Self
+                )};
+                // LLVM doesn't always optimize out the checks
+                // at the ir level.
+
+                let sign = self.is_negative() && exp & 1 != 0;
+                #[allow(arithmetic_overflow)]
+                if !sign && res == Self::MIN  {
+                    // So it panics.
+                    _ = Self::MAX * Self::MAX;
+                }
+                if sign {
+                    res.wrapping_neg()
+                } else {
+                    res
                }
-                let (num_shl, overflowed) = power_used.overflowing_mul(exp);
-                let fine = !overflowed
-                    & (num_shl < (mem::size_of::<Self>() * 8) as u32);
-                (1 << num_shl) * fine as Self
            } else {
                if exp == 0 {
                    return 1;
--- a/library/core/src/num/uint_macros.rs
+++ b/library/core/src/num/uint_macros.rs
@ -1005,28 +1005,49 @@ macro_rules! uint_impl {
        #[rustc_const_stable(feature = "const_int_pow", since = "1.50.0")]
        #[must_use = "this returns the result of the operation, \
                      without modifying the original"]
+        #[rustc_allow_const_fn_unstable(is_val_statically_known, const_int_unchecked_arith)]
        #[inline]
        pub const fn checked_pow(self, mut exp: u32) -> Option<Self> {
-            if exp == 0 {
-                return Some(1);
-            }
-            let mut base = self;
-            let mut acc: Self = 1;
-
-            while exp > 1 {
-                if (exp & 1) == 1 {
-                    acc = try_opt!(acc.checked_mul(base));
+            // SAFETY: This path has the same behavior as the other.
+            if unsafe { intrinsics::is_val_statically_known(self) }
+                && self.is_power_of_two()
+            {
+                if self == 1 { // Avoid divide by zero
+                    return Some(1);
                }
-                exp /= 2;
-                base = try_opt!(base.checked_mul(base));
+                // SAFETY: We just checked this is a power of two. and above zero.
+                let power_used = unsafe { intrinsics::cttz_nonzero(self) as u32 };
+                if exp > Self::BITS / power_used { return None; } // Division of constants is free
+
+                // SAFETY: exp <= Self::BITS / power_used
+                unsafe { Some(intrinsics::unchecked_shl(
+                    1 as Self,
+                    intrinsics::unchecked_mul(power_used, exp) as Self
+                )) }
+                // LLVM doesn't always optimize out the checks
+                // at the ir level.
+            } else {
+                if exp == 0 {
+                    return Some(1);
+                }
+                let mut base = self;
+                let mut acc: Self = 1;
+
+                while exp > 1 {
+                    if (exp & 1) == 1 {
+                        acc = try_opt!(acc.checked_mul(base));
+                    }
+                    exp /= 2;
+                    base = try_opt!(base.checked_mul(base));
+                }
+
+                // since exp!=0, finally the exp must be 1.
+                // Deal with the final bit of the exponent separately, since
+                // squaring the base afterwards is not necessary and may cause a
+                // needless overflow.
+
+                acc.checked_mul(base)
            }
-
-            // since exp!=0, finally the exp must be 1.
-            // Deal with the final bit of the exponent separately, since
-            // squaring the base afterwards is not necessary and may cause a
-            // needless overflow.
-
-            acc.checked_mul(base)
        }

        /// Saturating integer addition. Computes `self + rhs`, saturating at
@ -1475,27 +1496,48 @@ macro_rules! uint_impl {
        #[rustc_const_stable(feature = "const_int_pow", since = "1.50.0")]
        #[must_use = "this returns the result of the operation, \
                      without modifying the original"]
+        #[rustc_allow_const_fn_unstable(is_val_statically_known, const_int_unchecked_arith)]
        #[inline]
        pub const fn wrapping_pow(self, mut exp: u32) -> Self {
-            if exp == 0 {
-                return 1;
-            }
-            let mut base = self;
-            let mut acc: Self = 1;
-
-            while exp > 1 {
-                if (exp & 1) == 1 {
-                    acc = acc.wrapping_mul(base);
+            // SAFETY: This path has the same behavior as the other.
+            if unsafe { intrinsics::is_val_statically_known(self) }
+                && self.is_power_of_two()
+            {
+                if self == 1 { // Avoid divide by zero
+                    return 1;
                }
-                exp /= 2;
-                base = base.wrapping_mul(base);
-            }
+                // SAFETY: We just checked this is a power of two. and above zero.
+                let power_used = unsafe { intrinsics::cttz_nonzero(self) as u32 };
+                if exp > Self::BITS / power_used {  return 0; } // Division of constants is free

-            // since exp!=0, finally the exp must be 1.
-            // Deal with the final bit of the exponent separately, since
-            // squaring the base afterwards is not necessary and may cause a
-            // needless overflow.
-            acc.wrapping_mul(base)
+                // SAFETY: exp <= Self::BITS / power_used
+                unsafe { intrinsics::unchecked_shl(
+                    1 as Self,
+                    intrinsics::unchecked_mul(power_used, exp) as Self
+                )}
+                // LLVM doesn't always optimize out the checks
+                // at the ir level.
+            } else {
+                if exp == 0 {
+                    return 1;
+                }
+                let mut base = self;
+                let mut acc: Self = 1;
+
+                while exp > 1 {
+                    if (exp & 1) == 1 {
+                        acc = acc.wrapping_mul(base);
+                    }
+                    exp /= 2;
+                    base = base.wrapping_mul(base);
+                }
+
+                // since exp!=0, finally the exp must be 1.
+                // Deal with the final bit of the exponent separately, since
+                // squaring the base afterwards is not necessary and may cause a
+                // needless overflow.
+                acc.wrapping_mul(base)
+            }
        }

        /// Calculates `self` + `rhs`
@ -1925,37 +1967,58 @@ macro_rules! uint_impl {
        #[rustc_const_stable(feature = "const_int_pow", since = "1.50.0")]
        #[must_use = "this returns the result of the operation, \
                      without modifying the original"]
+        #[rustc_allow_const_fn_unstable(is_val_statically_known, const_int_unchecked_arith)]
        #[inline]
        pub const fn overflowing_pow(self, mut exp: u32) -> (Self, bool) {
-            if exp == 0{
-                return (1,false);
-            }
-            let mut base = self;
-            let mut acc: Self = 1;
-            let mut overflown = false;
-            // Scratch space for storing results of overflowing_mul.
-            let mut r;
+            // SAFETY: This path has the same behavior as the other.
+            if unsafe { intrinsics::is_val_statically_known(self) }
+                && self.is_power_of_two()
+            {
+                if self == 1 { // Avoid divide by zero
+                    return (1, false);
+                }
+                // SAFETY: We just checked this is a power of two. and above zero.
+                let power_used = unsafe { intrinsics::cttz_nonzero(self) as u32 };
+                if exp > Self::BITS / power_used {  return (0, true); } // Division of constants is free

-            while exp > 1 {
-                if (exp & 1) == 1 {
-                    r = acc.overflowing_mul(base);
-                    acc = r.0;
+                // SAFETY: exp <= Self::BITS / power_used
+                unsafe { (intrinsics::unchecked_shl(
+                    1 as Self,
+                    intrinsics::unchecked_mul(power_used, exp) as Self
+                ), false) }
+                // LLVM doesn't always optimize out the checks
+                // at the ir level.
+            } else {
+                if exp == 0{
+                    return (1,false);
+                }
+                let mut base = self;
+                let mut acc: Self = 1;
+                let mut overflown = false;
+                // Scratch space for storing results of overflowing_mul.
+                let mut r;
+
+                while exp > 1 {
+                    if (exp & 1) == 1 {
+                        r = acc.overflowing_mul(base);
+                        acc = r.0;
+                        overflown |= r.1;
+                    }
+                    exp /= 2;
+                    r = base.overflowing_mul(base);
+                    base = r.0;
                    overflown |= r.1;
                }
-                exp /= 2;
-                r = base.overflowing_mul(base);
-                base = r.0;
-                overflown |= r.1;
+
+                // since exp!=0, finally the exp must be 1.
+                // Deal with the final bit of the exponent separately, since
+                // squaring the base afterwards is not necessary and may cause a
+                // needless overflow.
+                r = acc.overflowing_mul(base);
+                r.1 |= overflown;
+
+                r
            }
-
-            // since exp!=0, finally the exp must be 1.
-            // Deal with the final bit of the exponent separately, since
-            // squaring the base afterwards is not necessary and may cause a
-            // needless overflow.
-            r = acc.overflowing_mul(base);
-            r.1 |= overflown;
-
-            r
        }

        /// Raises self to the power of `exp`, using exponentiation by squaring.
@ -1971,9 +2034,10 @@ macro_rules! uint_impl {
        #[rustc_const_stable(feature = "const_int_pow", since = "1.50.0")]
        #[must_use = "this returns the result of the operation, \
                      without modifying the original"]
+        #[rustc_allow_const_fn_unstable(is_val_statically_known, const_int_unchecked_arith)]
        #[inline]
        #[rustc_inherit_overflow_checks]
-        #[rustc_allow_const_fn_unstable(is_val_statically_known)]
+        #[track_caller] // Hides the hackish overflow check for powers of two.
        pub const fn pow(self, mut exp: u32) -> Self {
            // LLVM now knows that `self` is a constant value, but not a
            // constant in Rust. This allows us to compute the power used at
@ -1990,22 +2054,23 @@ macro_rules! uint_impl {
            if unsafe { intrinsics::is_val_statically_known(self) }
                && self.is_power_of_two()
            {
-                let power_used = match self.checked_ilog2() {
-                    Some(v) => v,
-                    // SAFETY: We just checked this is a power of two. `0` is not a
-                    // power of two.
-                    None => unsafe { core::hint::unreachable_unchecked() },
-                };
-                // So it panics. Have to use `overflowing_mul` to efficiently set the
-                // result to 0 if not.
-                #[cfg(debug_assertions)]
-                {
-                    _ = power_used * exp;
+                if self == 1 { // Avoid divide by zero
+                    return 1;
                }
-                let (num_shl, overflowed) = power_used.overflowing_mul(exp);
-                let fine = !overflowed
-                    & (num_shl < (mem::size_of::<Self>() * 8) as u32);
-                (1 << num_shl) * fine as Self
+                // SAFETY: We just checked this is a power of two. and above zero.
+                let power_used = unsafe { intrinsics::cttz_nonzero(self) as u32 };
+                if exp > Self::BITS / power_used { // Division of constants is free
+                    #[allow(arithmetic_overflow)]
+                    return Self::MAX * Self::MAX * 0;
+                }
+
+                // SAFETY: exp <= Self::BITS / power_used
+                unsafe { intrinsics::unchecked_shl(
+                    1 as Self,
+                    intrinsics::unchecked_mul(power_used, exp) as Self
+                )}
+                // LLVM doesn't always optimize out the checks
+                // at the ir level.
            } else {
                if exp == 0 {
                    return 1;
--- a/tests/codegen/pow_of_two.rs
+++ b/tests/codegen/pow_of_two.rs
@ -1,68 +1,55 @@
-// #[cfg(bootstrap)]
-// ignore-stage1
-// compile-flags: --crate-type=lib -Zmerge-functions=disabled
+// compile-flags: --crate-type=lib -Zmerge-functions=disabled -O -C overflow-checks=false

 // CHECK-LABEL: @a(
 #[no_mangle]
 pub fn a(exp: u32) -> u64 {
-    // CHECK: %[[R:.+]] = and i32 %exp, 63
-    // CHECK: %[[R:.+]] = zext i32 %[[R:.+]] to i64
-    // CHECK: %[[R:.+]] = shl nuw i64 %[[R:.+]].i, %[[R:.+]]
-    // CHECK: ret i64 %[[R:.+]]
+    // CHECK: %{{[^ ]+}} = icmp ugt i32 %exp, 64
+    // CHECK: %{{[^ ]+}} = zext i32 %exp to i64
+    // CHECK: %{{[^ ]+}} = shl nuw i64 {{[^ ]+}}, %{{[^ ]+}}
+    // CHECK: ret i64 %{{[^ ]+}}
    2u64.pow(exp)
 }

+// CHECK-LABEL: @b(
 #[no_mangle]
 pub fn b(exp: u32) -> i64 {
-    // CHECK: %[[R:.+]] = and i32 %exp, 63
-    // CHECK: %[[R:.+]] = zext i32 %[[R:.+]] to i64
-    // CHECK: %[[R:.+]] = shl nuw i64 %[[R:.+]].i, %[[R:.+]]
-    // CHECK: ret i64 %[[R:.+]]
+    // CHECK: %{{[^ ]+}} = icmp ugt i32 %exp, 64
+    // CHECK: %{{[^ ]+}} = zext i32 %exp to i64
+    // CHECK: %{{[^ ]+}} = shl nuw i64 {{[^ ]+}}, %{{[^ ]+}}
+    // CHECK: ret i64 %{{[^ ]+}}
    2i64.pow(exp)
 }

 // CHECK-LABEL: @c(
 #[no_mangle]
 pub fn c(exp: u32) -> u32 {
-    // CHECK: %[[R:.+]].0.i = shl i32 %exp, 1
-    // CHECK: %[[R:.+]].1.i = icmp sgt i32 %exp, -1
-    // CHECK: %[[R:.+]].i = icmp ult i32 %[[R:.+]].0.i, 32
-    // CHECK: %fine.i = and i1 %[[R:.+]].1.i, %[[R:.+]].i
-    // CHECK: %0 = and i32 %[[R:.+]].0.i, 30
-    // CHECK: %[[R:.+]].i = zext i1 %fine.i to i32
-    // CHECK: %[[R:.+]] = shl nuw nsw i32 %[[R:.+]].i, %0
-    // CHECK: ret i32 %[[R:.+]]
+    // CHECK: %{{[^ ]+}} = icmp ugt i32 %exp, 16
+    // CHECK: %{{[^ ]+}} = shl nuw nsw i32 %exp, 1
+    // CHECK: %{{[^ ]+}} = shl nuw i32 1, %{{[^ ]+}}
+    // CHECK: %{{[^ ]+}} = select i1 %{{[^ ]+}}, i32 0, i32 %{{[^ ]+}}
+    // CHECK: ret i32 %{{[^ ]+}}
    4u32.pow(exp)
 }

 // CHECK-LABEL: @d(
 #[no_mangle]
 pub fn d(exp: u32) -> u32 {
-    // CHECK: tail call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %exp, i32 5)
-    // CHECK: %[[R:.+]].0.i = extractvalue { i32, i1 } %[[R:.+]], 0
-    // CHECK: %[[R:.+]].1.i = extractvalue { i32, i1 } %[[R:.+]], 1
-    // CHECK: %[[R:.+]].i = xor i1 %[[R:.+]].1.i, true
-    // CHECK: %[[R:.+]].i = icmp ult i32 %[[R:.+]].0.i, 32
-    // CHECK: %fine.i = and i1 %[[R:.+]].i, %[[R:.+]].i
-    // CHECK: %[[R:.+]] = and i32 %[[R:.+]].0.i, 31
-    // CHECK: %[[R:.+]].i = zext i1 %fine.i to i32
-    // CHECK: %[[R:.+]] = shl nuw i32 %[[R:.+]].i, %1
-    // CHECK: ret i32 %[[R:.+]]
+    // CHECK: %{{[^ ]+}} = icmp ugt i32 %exp, 6
+    // CHECK: %{{[^ ]+}} = mul nuw nsw i32 %exp, 5
+    // CHECK: %{{[^ ]+}} = shl nuw nsw i32 1, %{{[^ ]+}}
+    // CHECK: %{{[^ ]+}} = select i1 {{[^ ]+}}, i32 0, i32 %{{[^ ]+}}
+    // CHECK: ret i32 %{{[^ ]+}}
    32u32.pow(exp)
 }

 // CHECK-LABEL: @e(
 #[no_mangle]
 pub fn e(exp: u32) -> i32 {
-    // CHECK: tail call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %exp, i32 5)
-    // CHECK: %[[R:.+]].0.i = extractvalue { i32, i1 } %[[R:.+]], 0
-    // CHECK: %[[R:.+]].i = icmp ult i32 %[[R:.+]].0.i, 32
-    // CHECK: %[[R:.+]].1.i = extractvalue { i32, i1 } %[[R:.+]], 1
-    // CHECK: %[[R:.+]].i = xor i1 %[[R:.+]].1.i, true
-    // CHECK: %fine.i = and i1 %[[R:.+]].i, %[[R:.+]].i
-    // CHECK: %[[R:.+]].i = zext i1 %fine.i to i32
-    // CHECK: %[[R:.+]] = and i32 %[[R:.+]].0.i, 31
-    // CHECK: %[[R:.+]] = shl nuw i32 %[[R:.+]].i, %1
-    // CHECK: ret i32 %[[R:.+]]
+    // CHECK: %{{[^ ]+}} = icmp ugt i32 %exp, 6
+    // CHECK: %{{[^ ]+}} = mul nuw {{(nsw )?}}i32 %exp, 5
+    // CHECK: %{{[^ ]+}} = shl nuw {{(nsw )?}}i32 1, %{{[^ ]+}}
+    // CHECK: %{{[^ ]+}} = select i1 {{[^ ]+}}, i32 0, i32 %{{[^ ]+}}
+    // CHECK: ret i32 %{{[^ ]+}}
    32i32.pow(exp)
 }
+// note: d and e are expected to yield the same IR