Reduce code duplication by moving partition_lomuto_branchless_simple into quicksort module

This commit is contained in:
Lukas Bergdoll 2024-08-26 11:17:38 +02:00
parent 13d7b546da
commit 89c9a2908f
3 changed files with 39 additions and 79 deletions

View File

@ -6,13 +6,11 @@
//! for pivot selection. Using this as a fallback ensures O(n) worst case running time with
//! better performance than one would get using heapsort as fallback.
use crate::intrinsics;
use crate::mem::{self, SizedTypeProperties};
#[cfg(not(feature = "optimize_for_size"))]
use crate::slice::sort::shared::pivot::choose_pivot;
#[cfg(not(feature = "optimize_for_size"))]
use crate::slice::sort::shared::smallsort::insertion_sort_shift_left;
#[cfg(not(feature = "optimize_for_size"))]
use crate::slice::sort::unstable::quicksort::partition;
/// Reorders the slice such that the element at `index` is at its final sorted position.
@ -252,15 +250,7 @@ fn median_of_ninthers<T, F: FnMut(&T, &T) -> bool>(v: &mut [T], is_less: &mut F)
median_of_medians(&mut v[lo..lo + frac], is_less, pivot);
#[cfg(not(feature = "optimize_for_size"))]
{
partition(v, lo + pivot, is_less)
}
#[cfg(feature = "optimize_for_size")]
{
partition_size_opt(v, lo + pivot, is_less)
}
partition(v, lo + pivot, is_less)
}
/// Moves around the 9 elements at the indices a..i, such that
@ -351,69 +341,3 @@ fn bubble_sort<T, F: FnMut(&T, &T) -> bool>(v: &mut [T], is_less: &mut F) {
n -= 1;
}
}
#[cfg(feature = "optimize_for_size")]
fn partition_size_opt<T, F>(v: &mut [T], pivot: usize, is_less: &mut F) -> usize
where
F: FnMut(&T, &T) -> bool,
{
let len = v.len();
// Allows for panic-free code-gen by proving this property to the compiler.
if len == 0 {
return 0;
}
if pivot >= len {
intrinsics::abort();
}
// SAFETY: We checked that `pivot` is in-bounds.
unsafe {
// Place the pivot at the beginning of slice.
v.swap_unchecked(0, pivot);
}
let (pivot, v_without_pivot) = v.split_at_mut(1);
// Assuming that Rust generates noalias LLVM IR we can be sure that a partition function
// signature of the form `(v: &mut [T], pivot: &T)` guarantees that pivot and v can't alias.
// Having this guarantee is crucial for optimizations. It's possible to copy the pivot value
// into a stack value, but this creates issues for types with interior mutability mandating
// a drop guard.
let pivot = &mut pivot[0];
let num_lt = partition_lomuto_branchless_simple(v_without_pivot, pivot, is_less);
if num_lt >= len {
intrinsics::abort();
}
// SAFETY: We checked that `num_lt` is in-bounds.
unsafe {
// Place the pivot between the two partitions.
v.swap_unchecked(0, num_lt);
}
num_lt
}
#[cfg(feature = "optimize_for_size")]
fn partition_lomuto_branchless_simple<T, F: FnMut(&T, &T) -> bool>(
v: &mut [T],
pivot: &T,
is_less: &mut F,
) -> usize {
let mut left = 0;
for right in 0..v.len() {
// SAFETY: `left` can at max be incremented by 1 each loop iteration, which implies that
// left <= right and that both are in-bounds.
unsafe {
let right_is_lt = is_less(v.get_unchecked(right), pivot);
v.swap_unchecked(left, right);
left += right_is_lt as usize;
}
}
left
}

View File

@ -8,7 +8,6 @@ use crate::slice::sort::shared::find_existing_run;
use crate::slice::sort::shared::smallsort::insertion_sort_shift_left;
pub(crate) mod heapsort;
#[cfg(not(feature = "optimize_for_size"))]
pub(crate) mod quicksort;
/// Unstable sort called ipnsort by Lukas Bergdoll and Orson Peters.

View File

@ -1,7 +1,9 @@
//! This module contains an unstable quicksort and two partition implementations.
use crate::mem::{self, ManuallyDrop};
#[cfg(not(feature = "optimize_for_size"))]
use crate::slice::sort::shared::pivot::choose_pivot;
#[cfg(not(feature = "optimize_for_size"))]
use crate::slice::sort::shared::smallsort::UnstableSmallSortTypeImpl;
use crate::{intrinsics, ptr};
@ -11,6 +13,7 @@ use crate::{intrinsics, ptr};
///
/// `limit` is the number of allowed imbalanced partitions before switching to `heapsort`. If zero,
/// this function will immediately switch to heapsort.
#[cfg(not(feature = "optimize_for_size"))]
pub(crate) fn quicksort<'a, T, F>(
mut v: &'a mut [T],
mut ancestor_pivot: Option<&'a T>,
@ -138,7 +141,16 @@ const fn inst_partition<T, F: FnMut(&T, &T) -> bool>() -> fn(&mut [T], &T, &mut
if mem::size_of::<T>() <= MAX_BRANCHLESS_PARTITION_SIZE {
// Specialize for types that are relatively cheap to copy, where branchless optimizations
// have large leverage e.g. `u64` and `String`.
partition_lomuto_branchless_cyclic::<T, F>
#[cfg(not(feature = "optimize_for_size"))]
{
partition_lomuto_branchless_cyclic::<T, F>
}
#[cfg(feature = "optimize_for_size")]
{
partition_lomuto_branchless_simple::<T, F>
}
} else {
partition_hoare_branchy_cyclic::<T, F>
}
@ -224,6 +236,7 @@ where
}
}
#[cfg(not(feature = "optimize_for_size"))]
struct PartitionState<T> {
// The current element that is being looked at, scans left to right through slice.
right: *mut T,
@ -234,6 +247,7 @@ struct PartitionState<T> {
gap: GapGuardRaw<T>,
}
#[cfg(not(feature = "optimize_for_size"))]
fn partition_lomuto_branchless_cyclic<T, F>(v: &mut [T], pivot: &T, is_less: &mut F) -> usize
where
F: FnMut(&T, &T) -> bool,
@ -325,6 +339,27 @@ where
}
}
#[cfg(feature = "optimize_for_size")]
fn partition_lomuto_branchless_simple<T, F: FnMut(&T, &T) -> bool>(
v: &mut [T],
pivot: &T,
is_less: &mut F,
) -> usize {
let mut left = 0;
for right in 0..v.len() {
// SAFETY: `left` can at max be incremented by 1 each loop iteration, which implies that
// left <= right and that both are in-bounds.
unsafe {
let right_is_lt = is_less(v.get_unchecked(right), pivot);
v.swap_unchecked(left, right);
left += right_is_lt as usize;
}
}
left
}
struct GapGuard<T> {
pos: *mut T,
value: ManuallyDrop<T>,
@ -342,11 +377,13 @@ impl<T> Drop for GapGuard<T> {
/// Ideally this wouldn't be needed and we could just use the regular GapGuard.
/// See comment in [`partition_lomuto_branchless_cyclic`].
#[cfg(not(feature = "optimize_for_size"))]
struct GapGuardRaw<T> {
pos: *mut T,
value: *mut T,
}
#[cfg(not(feature = "optimize_for_size"))]
impl<T> Drop for GapGuardRaw<T> {
fn drop(&mut self) {
// SAFETY: `self` MUST be constructed in a way that makes copying the gap value into