use indexed loop instead of ptr bumping

this seems to produce less IR
This commit is contained in:
The 8472 2023-06-12 14:06:24 +02:00
parent ba5b2f0b4b
commit d90508f761
2 changed files with 20 additions and 18 deletions

View File

@ -192,23 +192,33 @@ macro_rules! iterator {
}
#[inline]
fn fold<B, F>(mut self, init: B, mut f: F) -> B
fn fold<B, F>(self, init: B, mut f: F) -> B
where
F: FnMut(B, Self::Item) -> B,
{
// Handling the 0-len case explicitly and then using a do-while style loop
// helps the optimizer. See issue #106288
// this implementation consists of the following optimizations compared to the
// default implementation:
// - do-while loop, as is llvm's preferred loop shape,
// see https://releases.llvm.org/16.0.0/docs/LoopTerminology.html#more-canonical-loops
// - bumps an index instead of a pointer since the latter case inhibits
// some optimizations, see #111603
// - avoids Option wrapping/matching
if is_empty!(self) {
return init;
}
let mut acc = init;
// SAFETY: The 0-len case was handled above so one loop iteration is guaranteed.
unsafe {
loop {
acc = f(acc, next_unchecked!(self));
if is_empty!(self) {
break;
}
let mut i = 0;
let len = len!(self);
loop {
// SAFETY: the loop iterates `i in 0..len`, which always is in bounds of
// the slice allocation
acc = f(acc, unsafe { & $( $mut_ )? *self.ptr.add(i).as_ptr() });
// SAFETY: `i` can't overflow since it'll only reach usize::MAX if the
// slice had that length, in which case we'll break out of the loop
// after the increment
i = unsafe { i.unchecked_add(1) };
if i == len {
break;
}
}
acc

View File

@ -37,14 +37,6 @@ pub fn issue71861(vec: Vec<u32>) -> Box<[u32]> {
// CHECK-LABEL: @issue75636
#[no_mangle]
pub fn issue75636<'a>(iter: &[&'a str]) -> Box<[&'a str]> {
// CHECK-NOT: panic
// Call to panic_cannot_unwind in case of double-panic is expected,
// on LLVM 16 and older, but other panics are not.
// old: filter
// old-NEXT: ; call core::panicking::panic_cannot_unwind
// old-NEXT: panic_cannot_unwind
// CHECK-NOT: panic
iter.iter().copied().collect()
}