rust/library/alloc/tests/sort/patterns.rs
2025-02-13 12:20:55 -08:00

212 lines
5.4 KiB
Rust

use std::env;
use std::str::FromStr;
use std::sync::OnceLock;
use rand::distr::Uniform;
use rand::prelude::*;
use rand_xorshift::XorShiftRng;
use crate::sort::zipf::ZipfDistribution;
/// Provides a set of patterns useful for testing and benchmarking sorting algorithms.
/// Currently limited to i32 values.
// --- Public ---
pub fn random(len: usize) -> Vec<i32> {
// .
// : . : :
// :.:::.::
random_vec(len)
}
pub fn random_uniform<R>(len: usize, range: R) -> Vec<i32>
where
Uniform<i32>: TryFrom<R, Error: std::fmt::Debug>,
{
// :.:.:.::
let mut rng: XorShiftRng = rand::SeedableRng::seed_from_u64(get_or_init_rand_seed());
// Abstracting over ranges in Rust :(
let dist = Uniform::try_from(range).unwrap();
(0..len).map(|_| dist.sample(&mut rng)).collect()
}
pub fn random_zipf(len: usize, exponent: f64) -> Vec<i32> {
// https://en.wikipedia.org/wiki/Zipf's_law
let mut rng: XorShiftRng = rand::SeedableRng::seed_from_u64(get_or_init_rand_seed());
// Abstracting over ranges in Rust :(
let dist = ZipfDistribution::new(len, exponent).unwrap();
(0..len).map(|_| dist.sample(&mut rng) as i32).collect()
}
pub fn random_sorted(len: usize, sorted_percent: f64) -> Vec<i32> {
// .:
// .:::. :
// .::::::.::
// [----][--]
// ^ ^
// | |
// sorted |
// unsorted
// Simulate pre-existing sorted slice, where len - sorted_percent are the new unsorted values
// and part of the overall distribution.
let mut v = random_vec(len);
let sorted_len = ((len as f64) * (sorted_percent / 100.0)).round() as usize;
v[0..sorted_len].sort_unstable();
v
}
pub fn all_equal(len: usize) -> Vec<i32> {
// ......
// ::::::
(0..len).map(|_| 66).collect::<Vec<_>>()
}
pub fn ascending(len: usize) -> Vec<i32> {
// .:
// .:::
// .:::::
(0..len as i32).collect::<Vec<_>>()
}
pub fn descending(len: usize) -> Vec<i32> {
// :.
// :::.
// :::::.
(0..len as i32).rev().collect::<Vec<_>>()
}
pub fn saw_mixed(len: usize, saw_count: usize) -> Vec<i32> {
// :. :. .::. .:
// :::.:::..::::::..:::
if len == 0 {
return Vec::new();
}
let mut vals = random_vec(len);
let chunks_size = len / saw_count.max(1);
let saw_directions = random_uniform((len / chunks_size) + 1, 0..=1);
for (i, chunk) in vals.chunks_mut(chunks_size).enumerate() {
if saw_directions[i] == 0 {
chunk.sort_unstable();
} else if saw_directions[i] == 1 {
chunk.sort_unstable_by_key(|&e| std::cmp::Reverse(e));
} else {
unreachable!();
}
}
vals
}
pub fn saw_mixed_range(len: usize, range: std::ops::Range<usize>) -> Vec<i32> {
// :.
// :. :::. .::. .:
// :::.:::::..::::::..:.:::
// ascending and descending randomly picked, with length in `range`.
if len == 0 {
return Vec::new();
}
let mut vals = random_vec(len);
let max_chunks = len / range.start;
let saw_directions = random_uniform(max_chunks + 1, 0..=1);
let chunk_sizes = random_uniform(max_chunks + 1, (range.start as i32)..(range.end as i32));
let mut i = 0;
let mut l = 0;
while l < len {
let chunk_size = chunk_sizes[i] as usize;
let chunk_end = std::cmp::min(l + chunk_size, len);
let chunk = &mut vals[l..chunk_end];
if saw_directions[i] == 0 {
chunk.sort_unstable();
} else if saw_directions[i] == 1 {
chunk.sort_unstable_by_key(|&e| std::cmp::Reverse(e));
} else {
unreachable!();
}
i += 1;
l += chunk_size;
}
vals
}
pub fn pipe_organ(len: usize) -> Vec<i32> {
// .:.
// .:::::.
let mut vals = random_vec(len);
let first_half = &mut vals[0..(len / 2)];
first_half.sort_unstable();
let second_half = &mut vals[(len / 2)..len];
second_half.sort_unstable_by_key(|&e| std::cmp::Reverse(e));
vals
}
pub fn get_or_init_rand_seed() -> u64 {
*SEED_VALUE.get_or_init(|| {
env::var("OVERRIDE_SEED")
.ok()
.map(|seed| u64::from_str(&seed).unwrap())
.unwrap_or_else(rand_root_seed)
})
}
// --- Private ---
static SEED_VALUE: OnceLock<u64> = OnceLock::new();
#[cfg(not(miri))]
fn rand_root_seed() -> u64 {
// Other test code hashes `panic::Location::caller()` and constructs a seed from that, in these
// tests we want to have a fuzzer like exploration of the test space, if we used the same caller
// based construction we would always test the same.
//
// Instead we use the seconds since UNIX epoch / 10, given CI log output this value should be
// reasonably easy to re-construct.
use std::time::{SystemTime, UNIX_EPOCH};
let epoch_seconds = SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_secs();
epoch_seconds / 10
}
#[cfg(miri)]
fn rand_root_seed() -> u64 {
// Miri is usually run with isolation with gives us repeatability but also permutations based on
// other code that runs before.
use core::hash::{BuildHasher, Hash, Hasher};
let mut hasher = std::hash::RandomState::new().build_hasher();
core::panic::Location::caller().hash(&mut hasher);
hasher.finish()
}
fn random_vec(len: usize) -> Vec<i32> {
let mut rng: XorShiftRng = rand::SeedableRng::seed_from_u64(get_or_init_rand_seed());
(0..len).map(|_| rng.random::<i32>()).collect()
}