From 42655ff03b5c3bb5bd8484e7b1b65d5990822d5b Mon Sep 17 00:00:00 2001 From: Lukas Bergdoll Date: Tue, 16 May 2023 18:38:32 +0200 Subject: [PATCH] Use code with reliable branchless code-gen for slice::sort merge The recent LLVM 16 update changes code-gen to be not branchless anymore, in the slice::sort implementation merge function. This improves performance by 30% for random patterns, restoring the performance to the state with LLVM 15. --- library/core/src/slice/sort.rs | 38 +++++++++++----------------------- 1 file changed, 12 insertions(+), 26 deletions(-) diff --git a/library/core/src/slice/sort.rs b/library/core/src/slice/sort.rs index e6e3b55efa9..eb8595ca90d 100644 --- a/library/core/src/slice/sort.rs +++ b/library/core/src/slice/sort.rs @@ -1085,12 +1085,12 @@ where // SAFETY: left and right must be valid and part of v same for out. unsafe { - let to_copy = if is_less(&*right, &**left) { - get_and_increment(&mut right) - } else { - get_and_increment(left) - }; - ptr::copy_nonoverlapping(to_copy, get_and_increment(out), 1); + let is_l = is_less(&*right, &**left); + let to_copy = if is_l { right } else { *left }; + ptr::copy_nonoverlapping(to_copy, *out, 1); + *out = out.add(1); + right = right.add(is_l as usize); + *left = left.add(!is_l as usize); } } } else { @@ -1113,32 +1113,18 @@ where // SAFETY: left and right must be valid and part of v same for out. unsafe { - let to_copy = if is_less(&*right.sub(1), &*left.sub(1)) { - decrement_and_get(left) - } else { - decrement_and_get(right) - }; - ptr::copy_nonoverlapping(to_copy, decrement_and_get(&mut out), 1); + let is_l = is_less(&*right.sub(1), &*left.sub(1)); + *left = left.sub(is_l as usize); + *right = right.sub(!is_l as usize); + let to_copy = if is_l { *left } else { *right }; + out = out.sub(1); + ptr::copy_nonoverlapping(to_copy, out, 1); } } } // Finally, `hole` gets dropped. If the shorter run was not fully consumed, whatever remains of // it will now be copied into the hole in `v`. - unsafe fn get_and_increment(ptr: &mut *mut T) -> *mut T { - let old = *ptr; - - // SAFETY: ptr.add(1) must still be a valid pointer and part of `v`. - *ptr = unsafe { ptr.add(1) }; - old - } - - unsafe fn decrement_and_get(ptr: &mut *mut T) -> *mut T { - // SAFETY: ptr.sub(1) must still be a valid pointer and part of `v`. - *ptr = unsafe { ptr.sub(1) }; - *ptr - } - // When dropped, copies the range `start..end` into `dest..`. struct MergeHole { start: *mut T,