Avoid superfluous UB checks in IndexRange

`IndexRange::len` is justified as an overall invariant, and
`take_prefix` and `take_suffix` are justified by local branch
conditions. A few more UB-checked calls remain in cases that are only
supported locally by `debug_assert!`, which won't do anything in
distributed builds, so those UB checks may still be useful.

We generally expect core's `#![rustc_preserve_ub_checks]` to optimize
away in user's release builds, but the mere presence of that extra code
can sometimes inhibit optimization, as seen in #131563.
This commit is contained in:
Josh Stone 2024-10-11 14:41:02 -07:00
parent f4966590d8
commit 5365b3f7be
2 changed files with 12 additions and 4 deletions

View File

@ -45,7 +45,8 @@ impl IndexRange {
#[inline]
pub const fn len(&self) -> usize {
// SAFETY: By invariant, this cannot wrap
unsafe { self.end.unchecked_sub(self.start) }
// Using the intrinsic because a UB check here impedes LLVM optimization. (#131563)
unsafe { crate::intrinsics::unchecked_sub(self.end, self.start) }
}
/// # Safety
@ -82,7 +83,8 @@ impl IndexRange {
let mid = if n <= self.len() {
// SAFETY: We just checked that this will be between start and end,
// and thus the addition cannot overflow.
unsafe { self.start.unchecked_add(n) }
// Using the intrinsic avoids a superfluous UB check.
unsafe { crate::intrinsics::unchecked_add(self.start, n) }
} else {
self.end
};
@ -100,8 +102,9 @@ impl IndexRange {
pub fn take_suffix(&mut self, n: usize) -> Self {
let mid = if n <= self.len() {
// SAFETY: We just checked that this will be between start and end,
// and thus the addition cannot overflow.
unsafe { self.end.unchecked_sub(n) }
// and thus the subtraction cannot overflow.
// Using the intrinsic avoids a superfluous UB check.
unsafe { crate::intrinsics::unchecked_sub(self.end, n) }
} else {
self.start
};

View File

@ -1,4 +1,9 @@
//@ compile-flags: -O
//@ revisions: host x86-64-v3
// This particular CPU regressed in #131563
//@[x86-64-v3] only-x86_64
//@[x86-64-v3] compile-flags: -Ctarget-cpu=x86-64-v3
#![crate_type = "lib"]