mirror of
https://github.com/rust-lang/rust.git
synced 2024-11-25 16:24:46 +00:00
Auto merge of #107634 - scottmcm:array-drain, r=thomcc
Improve the `array::map` codegen
The `map` method on arrays [is documented as sometimes performing poorly](https://doc.rust-lang.org/std/primitive.array.html#note-on-performance-and-stack-usage), and after [a question on URLO](https://users.rust-lang.org/t/try-trait-residual-o-trait-and-try-collect-into-array/88510?u=scottmcm) prompted me to take another look at the core [`try_collect_into_array`](7c46fb2111/library/core/src/array/mod.rs (L865-L912)
) function, I had some ideas that ended up working better than I'd expected.
There's three main ideas in here, split over three commits:
1. Don't use `array::IntoIter` when we can avoid it, since that seems to not get SRoA'd, meaning that every step writes things like loop counters into the stack unnecessarily
2. Don't return arrays in `Result`s unnecessarily, as that doesn't seem to optimize away even with `unwrap_unchecked` (perhaps because it needs to get moved into a new LLVM type to account for the discriminant)
3. Don't distract LLVM with all the `Option` dances when we know for sure we have enough items (like in `map` and `zip`). This one's a larger commit as to do it I ended up adding a new `pub(crate)` trait, but hopefully those changes are still straight-forward.
(No libs-api changes; everything should be completely implementation-detail-internal.)
It's still not completely fixed -- I think it needs pcwalton's `memcpy` optimizations still (#103830) to get further -- but this seems to go much better than before. And the remaining `memcpy`s are just `transmute`-equivalent (`[T; N] -> ManuallyDrop<[T; N]>` and `[MaybeUninit<T>; N] -> [T; N]`), so hopefully those will be easier to remove with LLVM16 than the previous subobject copies 🤞
r? `@thomcc`
As a simple example, this test
```rust
pub fn long_integer_map(x: [u32; 64]) -> [u32; 64] {
x.map(|x| 13 * x + 7)
}
```
On nightly <https://rust.godbolt.org/z/xK7548TGj> takes `sub rsp, 808`
```llvm
start:
%array.i.i.i.i = alloca [64 x i32], align 4
%_3.sroa.5.i.i.i = alloca [65 x i32], align 4
%_5.i = alloca %"core::iter::adapters::map::Map<core::array::iter::IntoIter<u32, 64>, [closure@/app/example.rs:2:11: 2:14]>", align 8
```
(and yes, that's a 6**5**-element array `alloca` despite 6**4**-element input and output)
But with this PR it's only `sub rsp, 520`
```llvm
start:
%array.i.i.i.i.i.i = alloca [64 x i32], align 4
%array1.i.i.i = alloca %"core::mem::manually_drop::ManuallyDrop<[u32; 64]>", align 4
```
Similarly, the loop it emits on nightly is scalar-only and horrifying
```nasm
.LBB0_1:
mov esi, 64
mov edi, 0
cmp rdx, 64
je .LBB0_3
lea rsi, [rdx + 1]
mov qword ptr [rsp + 784], rsi
mov r8d, dword ptr [rsp + 4*rdx + 528]
mov edi, 1
lea edx, [r8 + 2*r8]
lea r8d, [r8 + 4*rdx]
add r8d, 7
.LBB0_3:
test edi, edi
je .LBB0_11
mov dword ptr [rsp + 4*rcx + 272], r8d
cmp rsi, 64
jne .LBB0_6
xor r8d, r8d
mov edx, 64
test r8d, r8d
jne .LBB0_8
jmp .LBB0_11
.LBB0_6:
lea rdx, [rsi + 1]
mov qword ptr [rsp + 784], rdx
mov edi, dword ptr [rsp + 4*rsi + 528]
mov r8d, 1
lea esi, [rdi + 2*rdi]
lea edi, [rdi + 4*rsi]
add edi, 7
test r8d, r8d
je .LBB0_11
.LBB0_8:
mov dword ptr [rsp + 4*rcx + 276], edi
add rcx, 2
cmp rcx, 64
jne .LBB0_1
```
whereas with this PR it's unrolled and vectorized
```nasm
vpmulld ymm1, ymm0, ymmword ptr [rsp + 64]
vpaddd ymm1, ymm1, ymm2
vmovdqu ymmword ptr [rsp + 328], ymm1
vpmulld ymm1, ymm0, ymmword ptr [rsp + 96]
vpaddd ymm1, ymm1, ymm2
vmovdqu ymmword ptr [rsp + 360], ymm1
```
(though sadly still stack-to-stack)
This commit is contained in:
commit
2d91939bb7
76
library/core/src/array/drain.rs
Normal file
76
library/core/src/array/drain.rs
Normal file
@ -0,0 +1,76 @@
|
||||
use crate::iter::{TrustedLen, UncheckedIterator};
|
||||
use crate::mem::ManuallyDrop;
|
||||
use crate::ptr::drop_in_place;
|
||||
use crate::slice;
|
||||
|
||||
/// A situationally-optimized version of `array.into_iter().for_each(func)`.
|
||||
///
|
||||
/// [`crate::array::IntoIter`]s are great when you need an owned iterator, but
|
||||
/// storing the entire array *inside* the iterator like that can sometimes
|
||||
/// pessimize code. Notable, it can be more bytes than you really want to move
|
||||
/// around, and because the array accesses index into it SRoA has a harder time
|
||||
/// optimizing away the type than it does iterators that just hold a couple pointers.
|
||||
///
|
||||
/// Thus this function exists, which gives a way to get *moved* access to the
|
||||
/// elements of an array using a small iterator -- no bigger than a slice iterator.
|
||||
///
|
||||
/// The function-taking-a-closure structure makes it safe, as it keeps callers
|
||||
/// from looking at already-dropped elements.
|
||||
pub(crate) fn drain_array_with<T, R, const N: usize>(
|
||||
array: [T; N],
|
||||
func: impl for<'a> FnOnce(Drain<'a, T>) -> R,
|
||||
) -> R {
|
||||
let mut array = ManuallyDrop::new(array);
|
||||
// SAFETY: Now that the local won't drop it, it's ok to construct the `Drain` which will.
|
||||
let drain = Drain(array.iter_mut());
|
||||
func(drain)
|
||||
}
|
||||
|
||||
/// See [`drain_array_with`] -- this is `pub(crate)` only so it's allowed to be
|
||||
/// mentioned in the signature of that method. (Otherwise it hits `E0446`.)
|
||||
// INVARIANT: It's ok to drop the remainder of the inner iterator.
|
||||
pub(crate) struct Drain<'a, T>(slice::IterMut<'a, T>);
|
||||
|
||||
impl<T> Drop for Drain<'_, T> {
|
||||
fn drop(&mut self) {
|
||||
// SAFETY: By the type invariant, we're allowed to drop all these.
|
||||
unsafe { drop_in_place(self.0.as_mut_slice()) }
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Iterator for Drain<'_, T> {
|
||||
type Item = T;
|
||||
|
||||
#[inline]
|
||||
fn next(&mut self) -> Option<T> {
|
||||
let p: *const T = self.0.next()?;
|
||||
// SAFETY: The iterator was already advanced, so we won't drop this later.
|
||||
Some(unsafe { p.read() })
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||
let n = self.len();
|
||||
(n, Some(n))
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> ExactSizeIterator for Drain<'_, T> {
|
||||
#[inline]
|
||||
fn len(&self) -> usize {
|
||||
self.0.len()
|
||||
}
|
||||
}
|
||||
|
||||
// SAFETY: This is a 1:1 wrapper for a slice iterator, which is also `TrustedLen`.
|
||||
unsafe impl<T> TrustedLen for Drain<'_, T> {}
|
||||
|
||||
impl<T> UncheckedIterator for Drain<'_, T> {
|
||||
unsafe fn next_unchecked(&mut self) -> T {
|
||||
// SAFETY: `Drain` is 1:1 with the inner iterator, so if the caller promised
|
||||
// that there's an element left, the inner iterator has one too.
|
||||
let p: *const T = unsafe { self.0.next_unchecked() };
|
||||
// SAFETY: The iterator was already advanced, so we won't drop this later.
|
||||
unsafe { p.read() }
|
||||
}
|
||||
}
|
@ -10,16 +10,19 @@ use crate::convert::{Infallible, TryFrom};
|
||||
use crate::error::Error;
|
||||
use crate::fmt;
|
||||
use crate::hash::{self, Hash};
|
||||
use crate::iter::TrustedLen;
|
||||
use crate::iter::UncheckedIterator;
|
||||
use crate::mem::{self, MaybeUninit};
|
||||
use crate::ops::{
|
||||
ChangeOutputType, ControlFlow, FromResidual, Index, IndexMut, NeverShortCircuit, Residual, Try,
|
||||
};
|
||||
use crate::slice::{Iter, IterMut};
|
||||
|
||||
mod drain;
|
||||
mod equality;
|
||||
mod iter;
|
||||
|
||||
pub(crate) use drain::drain_array_with;
|
||||
|
||||
#[stable(feature = "array_value_iter", since = "1.51.0")]
|
||||
pub use iter::IntoIter;
|
||||
|
||||
@ -52,16 +55,11 @@ pub use iter::IntoIter;
|
||||
/// ```
|
||||
#[inline]
|
||||
#[stable(feature = "array_from_fn", since = "1.63.0")]
|
||||
pub fn from_fn<T, const N: usize, F>(mut cb: F) -> [T; N]
|
||||
pub fn from_fn<T, const N: usize, F>(cb: F) -> [T; N]
|
||||
where
|
||||
F: FnMut(usize) -> T,
|
||||
{
|
||||
let mut idx = 0;
|
||||
[(); N].map(|_| {
|
||||
let res = cb(idx);
|
||||
idx += 1;
|
||||
res
|
||||
})
|
||||
try_from_fn(NeverShortCircuit::wrap_mut_1(cb)).0
|
||||
}
|
||||
|
||||
/// Creates an array `[T; N]` where each fallible array element `T` is returned by the `cb` call.
|
||||
@ -101,9 +99,14 @@ where
|
||||
R: Try,
|
||||
R::Residual: Residual<[R::Output; N]>,
|
||||
{
|
||||
// SAFETY: we know for certain that this iterator will yield exactly `N`
|
||||
// items.
|
||||
unsafe { try_collect_into_array_unchecked(&mut (0..N).map(cb)) }
|
||||
let mut array = MaybeUninit::uninit_array::<N>();
|
||||
match try_from_fn_erased(&mut array, cb) {
|
||||
ControlFlow::Break(r) => FromResidual::from_residual(r),
|
||||
ControlFlow::Continue(()) => {
|
||||
// SAFETY: All elements of the array were populated.
|
||||
try { unsafe { MaybeUninit::array_assume_init(array) } }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts a reference to `T` into a reference to an array of length 1 (without copying).
|
||||
@ -414,9 +417,7 @@ trait SpecArrayClone: Clone {
|
||||
impl<T: Clone> SpecArrayClone for T {
|
||||
#[inline]
|
||||
default fn clone<const N: usize>(array: &[T; N]) -> [T; N] {
|
||||
// SAFETY: we know for certain that this iterator will yield exactly `N`
|
||||
// items.
|
||||
unsafe { collect_into_array_unchecked(&mut array.iter().cloned()) }
|
||||
from_trusted_iterator(array.iter().cloned())
|
||||
}
|
||||
}
|
||||
|
||||
@ -500,9 +501,7 @@ impl<T, const N: usize> [T; N] {
|
||||
where
|
||||
F: FnMut(T) -> U,
|
||||
{
|
||||
// SAFETY: we know for certain that this iterator will yield exactly `N`
|
||||
// items.
|
||||
unsafe { collect_into_array_unchecked(&mut IntoIterator::into_iter(self).map(f)) }
|
||||
self.try_map(NeverShortCircuit::wrap_mut_1(f)).0
|
||||
}
|
||||
|
||||
/// A fallible function `f` applied to each element on array `self` in order to
|
||||
@ -539,9 +538,7 @@ impl<T, const N: usize> [T; N] {
|
||||
R: Try,
|
||||
R::Residual: Residual<[R::Output; N]>,
|
||||
{
|
||||
// SAFETY: we know for certain that this iterator will yield exactly `N`
|
||||
// items.
|
||||
unsafe { try_collect_into_array_unchecked(&mut IntoIterator::into_iter(self).map(f)) }
|
||||
drain_array_with(self, |iter| try_from_trusted_iterator(iter.map(f)))
|
||||
}
|
||||
|
||||
/// 'Zips up' two arrays into a single array of pairs.
|
||||
@ -562,11 +559,9 @@ impl<T, const N: usize> [T; N] {
|
||||
/// ```
|
||||
#[unstable(feature = "array_zip", issue = "80094")]
|
||||
pub fn zip<U>(self, rhs: [U; N]) -> [(T, U); N] {
|
||||
let mut iter = IntoIterator::into_iter(self).zip(rhs);
|
||||
|
||||
// SAFETY: we know for certain that this iterator will yield exactly `N`
|
||||
// items.
|
||||
unsafe { collect_into_array_unchecked(&mut iter) }
|
||||
drain_array_with(self, |lhs| {
|
||||
drain_array_with(rhs, |rhs| from_trusted_iterator(crate::iter::zip(lhs, rhs)))
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns a slice containing the entire array. Equivalent to `&s[..]`.
|
||||
@ -613,9 +608,7 @@ impl<T, const N: usize> [T; N] {
|
||||
/// ```
|
||||
#[unstable(feature = "array_methods", issue = "76118")]
|
||||
pub fn each_ref(&self) -> [&T; N] {
|
||||
// SAFETY: we know for certain that this iterator will yield exactly `N`
|
||||
// items.
|
||||
unsafe { collect_into_array_unchecked(&mut self.iter()) }
|
||||
from_trusted_iterator(self.iter())
|
||||
}
|
||||
|
||||
/// Borrows each element mutably and returns an array of mutable references
|
||||
@ -635,9 +628,7 @@ impl<T, const N: usize> [T; N] {
|
||||
/// ```
|
||||
#[unstable(feature = "array_methods", issue = "76118")]
|
||||
pub fn each_mut(&mut self) -> [&mut T; N] {
|
||||
// SAFETY: we know for certain that this iterator will yield exactly `N`
|
||||
// items.
|
||||
unsafe { collect_into_array_unchecked(&mut self.iter_mut()) }
|
||||
from_trusted_iterator(self.iter_mut())
|
||||
}
|
||||
|
||||
/// Divides one array reference into two at an index.
|
||||
@ -797,105 +788,71 @@ impl<T, const N: usize> [T; N] {
|
||||
}
|
||||
}
|
||||
|
||||
/// Pulls `N` items from `iter` and returns them as an array. If the iterator
|
||||
/// yields fewer than `N` items, this function exhibits undefined behavior.
|
||||
/// Populate an array from the first `N` elements of `iter`
|
||||
///
|
||||
/// See [`try_collect_into_array`] for more information.
|
||||
/// # Panics
|
||||
///
|
||||
/// If the iterator doesn't actually have enough items.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// It is up to the caller to guarantee that `iter` yields at least `N` items.
|
||||
/// Violating this condition causes undefined behavior.
|
||||
unsafe fn try_collect_into_array_unchecked<I, T, R, const N: usize>(iter: &mut I) -> R::TryType
|
||||
where
|
||||
// Note: `TrustedLen` here is somewhat of an experiment. This is just an
|
||||
// internal function, so feel free to remove if this bound turns out to be a
|
||||
// bad idea. In that case, remember to also remove the lower bound
|
||||
// `debug_assert!` below!
|
||||
I: Iterator + TrustedLen,
|
||||
I::Item: Try<Output = T, Residual = R>,
|
||||
R: Residual<[T; N]>,
|
||||
{
|
||||
debug_assert!(N <= iter.size_hint().1.unwrap_or(usize::MAX));
|
||||
debug_assert!(N <= iter.size_hint().0);
|
||||
|
||||
// SAFETY: covered by the function contract.
|
||||
unsafe { try_collect_into_array(iter).unwrap_unchecked() }
|
||||
}
|
||||
|
||||
// Infallible version of `try_collect_into_array_unchecked`.
|
||||
unsafe fn collect_into_array_unchecked<I, const N: usize>(iter: &mut I) -> [I::Item; N]
|
||||
where
|
||||
I: Iterator + TrustedLen,
|
||||
{
|
||||
let mut map = iter.map(NeverShortCircuit);
|
||||
|
||||
// SAFETY: The same safety considerations w.r.t. the iterator length
|
||||
// apply for `try_collect_into_array_unchecked` as for
|
||||
// `collect_into_array_unchecked`
|
||||
match unsafe { try_collect_into_array_unchecked(&mut map) } {
|
||||
NeverShortCircuit(array) => array,
|
||||
}
|
||||
}
|
||||
|
||||
/// Pulls `N` items from `iter` and returns them as an array. If the iterator
|
||||
/// yields fewer than `N` items, `Err` is returned containing an iterator over
|
||||
/// the already yielded items.
|
||||
///
|
||||
/// Since the iterator is passed as a mutable reference and this function calls
|
||||
/// `next` at most `N` times, the iterator can still be used afterwards to
|
||||
/// retrieve the remaining items.
|
||||
///
|
||||
/// If `iter.next()` panicks, all items already yielded by the iterator are
|
||||
/// dropped.
|
||||
/// By depending on `TrustedLen`, however, we can do that check up-front (where
|
||||
/// it easily optimizes away) so it doesn't impact the loop that fills the array.
|
||||
#[inline]
|
||||
fn try_collect_into_array<I, T, R, const N: usize>(
|
||||
iter: &mut I,
|
||||
) -> Result<R::TryType, IntoIter<T, N>>
|
||||
fn from_trusted_iterator<T, const N: usize>(iter: impl UncheckedIterator<Item = T>) -> [T; N] {
|
||||
try_from_trusted_iterator(iter.map(NeverShortCircuit)).0
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn try_from_trusted_iterator<T, R, const N: usize>(
|
||||
iter: impl UncheckedIterator<Item = R>,
|
||||
) -> ChangeOutputType<R, [T; N]>
|
||||
where
|
||||
I: Iterator,
|
||||
I::Item: Try<Output = T, Residual = R>,
|
||||
R: Residual<[T; N]>,
|
||||
R: Try<Output = T>,
|
||||
R::Residual: Residual<[T; N]>,
|
||||
{
|
||||
if N == 0 {
|
||||
// SAFETY: An empty array is always inhabited and has no validity invariants.
|
||||
return Ok(Try::from_output(unsafe { mem::zeroed() }));
|
||||
}
|
||||
|
||||
let mut array = MaybeUninit::uninit_array::<N>();
|
||||
let mut guard = Guard { array_mut: &mut array, initialized: 0 };
|
||||
|
||||
for _ in 0..N {
|
||||
match iter.next() {
|
||||
Some(item_rslt) => {
|
||||
let item = match item_rslt.branch() {
|
||||
ControlFlow::Break(r) => {
|
||||
return Ok(FromResidual::from_residual(r));
|
||||
}
|
||||
ControlFlow::Continue(elem) => elem,
|
||||
};
|
||||
|
||||
// SAFETY: `guard.initialized` starts at 0, which means push can be called
|
||||
// at most N times, which this loop does.
|
||||
unsafe {
|
||||
guard.push_unchecked(item);
|
||||
}
|
||||
}
|
||||
None => {
|
||||
let alive = 0..guard.initialized;
|
||||
mem::forget(guard);
|
||||
// SAFETY: `array` was initialized with exactly `initialized`
|
||||
// number of elements.
|
||||
return Err(unsafe { IntoIter::new_unchecked(array, alive) });
|
||||
}
|
||||
assert!(iter.size_hint().0 >= N);
|
||||
fn next<T>(mut iter: impl UncheckedIterator<Item = T>) -> impl FnMut(usize) -> T {
|
||||
move |_| {
|
||||
// SAFETY: We know that `from_fn` will call this at most N times,
|
||||
// and we checked to ensure that we have at least that many items.
|
||||
unsafe { iter.next_unchecked() }
|
||||
}
|
||||
}
|
||||
|
||||
try_from_fn(next(iter))
|
||||
}
|
||||
|
||||
/// Version of [`try_from_fn`] using a passed-in slice in order to avoid
|
||||
/// needing to monomorphize for every array length.
|
||||
///
|
||||
/// This takes a generator rather than an iterator so that *at the type level*
|
||||
/// it never needs to worry about running out of items. When combined with
|
||||
/// an infallible `Try` type, that means the loop canonicalizes easily, allowing
|
||||
/// it to optimize well.
|
||||
///
|
||||
/// It would be *possible* to unify this and [`iter_next_chunk_erased`] into one
|
||||
/// function that does the union of both things, but last time it was that way
|
||||
/// it resulted in poor codegen from the "are there enough source items?" checks
|
||||
/// not optimizing away. So if you give it a shot, make sure to watch what
|
||||
/// happens in the codegen tests.
|
||||
#[inline]
|
||||
fn try_from_fn_erased<T, R>(
|
||||
buffer: &mut [MaybeUninit<T>],
|
||||
mut generator: impl FnMut(usize) -> R,
|
||||
) -> ControlFlow<R::Residual>
|
||||
where
|
||||
R: Try<Output = T>,
|
||||
{
|
||||
let mut guard = Guard { array_mut: buffer, initialized: 0 };
|
||||
|
||||
while guard.initialized < guard.array_mut.len() {
|
||||
let item = generator(guard.initialized).branch()?;
|
||||
|
||||
// SAFETY: The loop condition ensures we have space to push the item
|
||||
unsafe { guard.push_unchecked(item) };
|
||||
}
|
||||
|
||||
mem::forget(guard);
|
||||
// SAFETY: All elements of the array were populated in the loop above.
|
||||
let output = unsafe { array.transpose().assume_init() };
|
||||
Ok(Try::from_output(output))
|
||||
ControlFlow::Continue(())
|
||||
}
|
||||
|
||||
/// Panic guard for incremental initialization of arrays.
|
||||
@ -909,14 +866,14 @@ where
|
||||
///
|
||||
/// To minimize indirection fields are still pub but callers should at least use
|
||||
/// `push_unchecked` to signal that something unsafe is going on.
|
||||
pub(crate) struct Guard<'a, T, const N: usize> {
|
||||
struct Guard<'a, T> {
|
||||
/// The array to be initialized.
|
||||
pub array_mut: &'a mut [MaybeUninit<T>; N],
|
||||
pub array_mut: &'a mut [MaybeUninit<T>],
|
||||
/// The number of items that have been initialized so far.
|
||||
pub initialized: usize,
|
||||
}
|
||||
|
||||
impl<T, const N: usize> Guard<'_, T, N> {
|
||||
impl<T> Guard<'_, T> {
|
||||
/// Adds an item to the array and updates the initialized item counter.
|
||||
///
|
||||
/// # Safety
|
||||
@ -934,28 +891,73 @@ impl<T, const N: usize> Guard<'_, T, N> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<T, const N: usize> Drop for Guard<'_, T, N> {
|
||||
impl<T> Drop for Guard<'_, T> {
|
||||
fn drop(&mut self) {
|
||||
debug_assert!(self.initialized <= N);
|
||||
debug_assert!(self.initialized <= self.array_mut.len());
|
||||
|
||||
// SAFETY: this slice will contain only initialized objects.
|
||||
unsafe {
|
||||
crate::ptr::drop_in_place(MaybeUninit::slice_assume_init_mut(
|
||||
&mut self.array_mut.get_unchecked_mut(..self.initialized),
|
||||
self.array_mut.get_unchecked_mut(..self.initialized),
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the next chunk of `N` items from the iterator or errors with an
|
||||
/// iterator over the remainder. Used for `Iterator::next_chunk`.
|
||||
/// Pulls `N` items from `iter` and returns them as an array. If the iterator
|
||||
/// yields fewer than `N` items, `Err` is returned containing an iterator over
|
||||
/// the already yielded items.
|
||||
///
|
||||
/// Since the iterator is passed as a mutable reference and this function calls
|
||||
/// `next` at most `N` times, the iterator can still be used afterwards to
|
||||
/// retrieve the remaining items.
|
||||
///
|
||||
/// If `iter.next()` panicks, all items already yielded by the iterator are
|
||||
/// dropped.
|
||||
///
|
||||
/// Used for [`Iterator::next_chunk`].
|
||||
#[inline]
|
||||
pub(crate) fn iter_next_chunk<I, const N: usize>(
|
||||
iter: &mut I,
|
||||
) -> Result<[I::Item; N], IntoIter<I::Item, N>>
|
||||
where
|
||||
I: Iterator,
|
||||
{
|
||||
let mut map = iter.map(NeverShortCircuit);
|
||||
try_collect_into_array(&mut map).map(|NeverShortCircuit(arr)| arr)
|
||||
pub(crate) fn iter_next_chunk<T, const N: usize>(
|
||||
iter: &mut impl Iterator<Item = T>,
|
||||
) -> Result<[T; N], IntoIter<T, N>> {
|
||||
let mut array = MaybeUninit::uninit_array::<N>();
|
||||
let r = iter_next_chunk_erased(&mut array, iter);
|
||||
match r {
|
||||
Ok(()) => {
|
||||
// SAFETY: All elements of `array` were populated.
|
||||
Ok(unsafe { MaybeUninit::array_assume_init(array) })
|
||||
}
|
||||
Err(initialized) => {
|
||||
// SAFETY: Only the first `initialized` elements were populated
|
||||
Err(unsafe { IntoIter::new_unchecked(array, 0..initialized) })
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Version of [`iter_next_chunk`] using a passed-in slice in order to avoid
|
||||
/// needing to monomorphize for every array length.
|
||||
///
|
||||
/// Unfortunately this loop has two exit conditions, the buffer filling up
|
||||
/// or the iterator running out of items, making it tend to optimize poorly.
|
||||
#[inline]
|
||||
fn iter_next_chunk_erased<T>(
|
||||
buffer: &mut [MaybeUninit<T>],
|
||||
iter: &mut impl Iterator<Item = T>,
|
||||
) -> Result<(), usize> {
|
||||
let mut guard = Guard { array_mut: buffer, initialized: 0 };
|
||||
while guard.initialized < guard.array_mut.len() {
|
||||
let Some(item) = iter.next() else {
|
||||
// Unlike `try_from_fn_erased`, we want to keep the partial results,
|
||||
// so we need to defuse the guard instead of using `?`.
|
||||
let initialized = guard.initialized;
|
||||
mem::forget(guard);
|
||||
return Err(initialized)
|
||||
};
|
||||
|
||||
// SAFETY: The loop condition ensures we have space to push the item
|
||||
unsafe { guard.push_unchecked(item) };
|
||||
}
|
||||
|
||||
mem::forget(guard);
|
||||
Ok(())
|
||||
}
|
||||
|
@ -1,6 +1,5 @@
|
||||
use crate::array;
|
||||
use crate::iter::{ByRefSized, FusedIterator, Iterator, TrustedRandomAccessNoCoerce};
|
||||
use crate::mem::{self, MaybeUninit};
|
||||
use crate::ops::{ControlFlow, NeverShortCircuit, Try};
|
||||
|
||||
/// An iterator over `N` elements of the iterator at a time.
|
||||
@ -212,19 +211,14 @@ where
|
||||
let mut i = 0;
|
||||
// Use a while loop because (0..len).step_by(N) doesn't optimize well.
|
||||
while inner_len - i >= N {
|
||||
let mut chunk = MaybeUninit::uninit_array();
|
||||
let mut guard = array::Guard { array_mut: &mut chunk, initialized: 0 };
|
||||
while guard.initialized < N {
|
||||
let chunk = crate::array::from_fn(|local| {
|
||||
// SAFETY: The method consumes the iterator and the loop condition ensures that
|
||||
// all accesses are in bounds and only happen once.
|
||||
unsafe {
|
||||
let idx = i + guard.initialized;
|
||||
guard.push_unchecked(self.iter.__iterator_get_unchecked(idx));
|
||||
let idx = i + local;
|
||||
self.iter.__iterator_get_unchecked(idx)
|
||||
}
|
||||
}
|
||||
mem::forget(guard);
|
||||
// SAFETY: The loop above initialized all elements
|
||||
let chunk = unsafe { MaybeUninit::array_assume_init(chunk) };
|
||||
});
|
||||
accum = f(accum, chunk);
|
||||
i += N;
|
||||
}
|
||||
|
@ -1,7 +1,7 @@
|
||||
use crate::iter::adapters::{
|
||||
zip::try_get_unchecked, TrustedRandomAccess, TrustedRandomAccessNoCoerce,
|
||||
};
|
||||
use crate::iter::{FusedIterator, TrustedLen};
|
||||
use crate::iter::{FusedIterator, TrustedLen, UncheckedIterator};
|
||||
use crate::ops::Try;
|
||||
|
||||
/// An iterator that clones the elements of an underlying iterator.
|
||||
@ -140,3 +140,16 @@ where
|
||||
T: Clone,
|
||||
{
|
||||
}
|
||||
|
||||
impl<'a, I, T: 'a> UncheckedIterator for Cloned<I>
|
||||
where
|
||||
I: UncheckedIterator<Item = &'a T>,
|
||||
T: Clone,
|
||||
{
|
||||
unsafe fn next_unchecked(&mut self) -> T {
|
||||
// SAFETY: `Cloned` is 1:1 with the inner iterator, so if the caller promised
|
||||
// that there's an element left, the inner iterator has one too.
|
||||
let item = unsafe { self.it.next_unchecked() };
|
||||
item.clone()
|
||||
}
|
||||
}
|
||||
|
@ -2,7 +2,7 @@ use crate::fmt;
|
||||
use crate::iter::adapters::{
|
||||
zip::try_get_unchecked, SourceIter, TrustedRandomAccess, TrustedRandomAccessNoCoerce,
|
||||
};
|
||||
use crate::iter::{FusedIterator, InPlaceIterable, TrustedLen};
|
||||
use crate::iter::{FusedIterator, InPlaceIterable, TrustedLen, UncheckedIterator};
|
||||
use crate::ops::Try;
|
||||
|
||||
/// An iterator that maps the values of `iter` with `f`.
|
||||
@ -187,6 +187,19 @@ where
|
||||
{
|
||||
}
|
||||
|
||||
impl<B, I, F> UncheckedIterator for Map<I, F>
|
||||
where
|
||||
I: UncheckedIterator,
|
||||
F: FnMut(I::Item) -> B,
|
||||
{
|
||||
unsafe fn next_unchecked(&mut self) -> B {
|
||||
// SAFETY: `Map` is 1:1 with the inner iterator, so if the caller promised
|
||||
// that there's an element left, the inner iterator has one too.
|
||||
let item = unsafe { self.iter.next_unchecked() };
|
||||
(self.f)(item)
|
||||
}
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
#[unstable(feature = "trusted_random_access", issue = "none")]
|
||||
unsafe impl<I, F> TrustedRandomAccess for Map<I, F> where I: TrustedRandomAccess {}
|
||||
|
@ -1,7 +1,7 @@
|
||||
use crate::cmp;
|
||||
use crate::fmt::{self, Debug};
|
||||
use crate::iter::{DoubleEndedIterator, ExactSizeIterator, FusedIterator, Iterator};
|
||||
use crate::iter::{InPlaceIterable, SourceIter, TrustedLen};
|
||||
use crate::iter::{InPlaceIterable, SourceIter, TrustedLen, UncheckedIterator};
|
||||
|
||||
/// An iterator that iterates two other iterators simultaneously.
|
||||
///
|
||||
@ -417,6 +417,13 @@ where
|
||||
{
|
||||
}
|
||||
|
||||
impl<A, B> UncheckedIterator for Zip<A, B>
|
||||
where
|
||||
A: UncheckedIterator,
|
||||
B: UncheckedIterator,
|
||||
{
|
||||
}
|
||||
|
||||
// Arbitrarily selects the left side of the zip iteration as extractable "source"
|
||||
// it would require negative trait bounds to be able to try both
|
||||
#[unstable(issue = "none", feature = "inplace_iteration")]
|
||||
|
@ -450,6 +450,7 @@ pub use self::adapters::{
|
||||
pub use self::adapters::{Intersperse, IntersperseWith};
|
||||
|
||||
pub(crate) use self::adapters::try_process;
|
||||
pub(crate) use self::traits::UncheckedIterator;
|
||||
|
||||
mod adapters;
|
||||
mod range;
|
||||
|
@ -4,6 +4,7 @@ mod double_ended;
|
||||
mod exact_size;
|
||||
mod iterator;
|
||||
mod marker;
|
||||
mod unchecked_iterator;
|
||||
|
||||
#[stable(feature = "rust1", since = "1.0.0")]
|
||||
pub use self::{
|
||||
@ -19,3 +20,5 @@ pub use self::{
|
||||
pub use self::marker::InPlaceIterable;
|
||||
#[unstable(feature = "trusted_step", issue = "85731")]
|
||||
pub use self::marker::TrustedStep;
|
||||
|
||||
pub(crate) use self::unchecked_iterator::UncheckedIterator;
|
||||
|
36
library/core/src/iter/traits/unchecked_iterator.rs
Normal file
36
library/core/src/iter/traits/unchecked_iterator.rs
Normal file
@ -0,0 +1,36 @@
|
||||
use crate::iter::TrustedLen;
|
||||
|
||||
/// [`TrustedLen`] cannot have methods, so this allows augmenting it.
|
||||
///
|
||||
/// It currently requires `TrustedLen` because it's unclear whether it's
|
||||
/// reasonably possible to depend on the `size_hint` of anything else.
|
||||
pub(crate) trait UncheckedIterator: TrustedLen {
|
||||
/// Gets the next item from a non-empty iterator.
|
||||
///
|
||||
/// Because there's always a value to return, that means it can return
|
||||
/// the `Item` type directly, without wrapping it in an `Option`.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// This can only be called if `size_hint().0 != 0`, guaranteeing that
|
||||
/// there's at least one item available.
|
||||
///
|
||||
/// Otherwise (aka when `size_hint().1 == Some(0)`), this is UB.
|
||||
///
|
||||
/// # Note to Implementers
|
||||
///
|
||||
/// This has a default implementation using [`Option::unwrap_unchecked`].
|
||||
/// That's probably sufficient if your `next` *always* returns `Some`,
|
||||
/// such as for infinite iterators. In more complicated situations, however,
|
||||
/// sometimes there can still be `insertvalue`/`assume`/`extractvalue`
|
||||
/// instructions remaining in the IR from the `Option` handling, at which
|
||||
/// point you might want to implement this manually instead.
|
||||
#[unstable(feature = "trusted_len_next_unchecked", issue = "37572")]
|
||||
#[inline]
|
||||
unsafe fn next_unchecked(&mut self) -> Self::Item {
|
||||
let opt = self.next();
|
||||
// SAFETY: The caller promised that we're not empty, and
|
||||
// `Self: TrustedLen` so we can actually trust the `size_hint`.
|
||||
unsafe { opt.unwrap_unchecked() }
|
||||
}
|
||||
}
|
@ -379,6 +379,15 @@ pub(crate) type ChangeOutputType<T, V> = <<T as Try>::Residual as Residual<V>>::
|
||||
pub(crate) struct NeverShortCircuit<T>(pub T);
|
||||
|
||||
impl<T> NeverShortCircuit<T> {
|
||||
/// Wraps a unary function to produce one that wraps the output into a `NeverShortCircuit`.
|
||||
///
|
||||
/// This is useful for implementing infallible functions in terms of the `try_` ones,
|
||||
/// without accidentally capturing extra generic parameters in a closure.
|
||||
#[inline]
|
||||
pub fn wrap_mut_1<A>(mut f: impl FnMut(A) -> T) -> impl FnMut(A) -> NeverShortCircuit<T> {
|
||||
move |a| NeverShortCircuit(f(a))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn wrap_mut_2<A, B>(
|
||||
mut f: impl ~const FnMut(A, B) -> T,
|
||||
|
@ -7,7 +7,9 @@ use crate::cmp;
|
||||
use crate::cmp::Ordering;
|
||||
use crate::fmt;
|
||||
use crate::intrinsics::assume;
|
||||
use crate::iter::{FusedIterator, TrustedLen, TrustedRandomAccess, TrustedRandomAccessNoCoerce};
|
||||
use crate::iter::{
|
||||
FusedIterator, TrustedLen, TrustedRandomAccess, TrustedRandomAccessNoCoerce, UncheckedIterator,
|
||||
};
|
||||
use crate::marker::{PhantomData, Send, Sized, Sync};
|
||||
use crate::mem::{self, SizedTypeProperties};
|
||||
use crate::num::NonZeroUsize;
|
||||
|
@ -384,6 +384,15 @@ macro_rules! iterator {
|
||||
|
||||
#[unstable(feature = "trusted_len", issue = "37572")]
|
||||
unsafe impl<T> TrustedLen for $name<'_, T> {}
|
||||
|
||||
impl<'a, T> UncheckedIterator for $name<'a, T> {
|
||||
unsafe fn next_unchecked(&mut self) -> $elem {
|
||||
// SAFETY: The caller promised there's at least one more item.
|
||||
unsafe {
|
||||
next_unchecked!(self)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -700,3 +700,28 @@ fn array_into_iter_rfold() {
|
||||
let s = it.rfold(10, |a, b| 10 * a + b);
|
||||
assert_eq!(s, 10432);
|
||||
}
|
||||
|
||||
#[cfg(not(panic = "abort"))]
|
||||
#[test]
|
||||
fn array_map_drops_unmapped_elements_on_panic() {
|
||||
struct DropCounter<'a>(usize, &'a AtomicUsize);
|
||||
impl Drop for DropCounter<'_> {
|
||||
fn drop(&mut self) {
|
||||
self.1.fetch_add(1, Ordering::SeqCst);
|
||||
}
|
||||
}
|
||||
|
||||
const MAX: usize = 11;
|
||||
for panic_after in 0..MAX {
|
||||
let counter = AtomicUsize::new(0);
|
||||
let a = array::from_fn::<_, 11, _>(|i| DropCounter(i, &counter));
|
||||
let success = std::panic::catch_unwind(|| {
|
||||
let _ = a.map(|x| {
|
||||
assert!(x.0 < panic_after);
|
||||
assert_eq!(counter.load(Ordering::SeqCst), x.0);
|
||||
});
|
||||
});
|
||||
assert!(success.is_err());
|
||||
assert_eq!(counter.load(Ordering::SeqCst), MAX);
|
||||
}
|
||||
}
|
||||
|
@ -582,6 +582,9 @@ fn test_next_chunk() {
|
||||
assert_eq!(it.next_chunk().unwrap(), []);
|
||||
assert_eq!(it.next_chunk().unwrap(), [4, 5, 6, 7, 8, 9]);
|
||||
assert_eq!(it.next_chunk::<4>().unwrap_err().as_slice(), &[10, 11]);
|
||||
|
||||
let mut it = std::iter::repeat_with(|| panic!());
|
||||
assert_eq!(it.next_chunk::<0>().unwrap(), []);
|
||||
}
|
||||
|
||||
// just tests by whether or not this compiles
|
||||
|
49
tests/codegen/array-map.rs
Normal file
49
tests/codegen/array-map.rs
Normal file
@ -0,0 +1,49 @@
|
||||
// compile-flags: -C opt-level=3 -C target-cpu=x86-64-v3
|
||||
// no-system-llvm
|
||||
// only-x86_64
|
||||
// ignore-debug (the extra assertions get in the way)
|
||||
|
||||
#![crate_type = "lib"]
|
||||
#![feature(array_zip)]
|
||||
|
||||
// CHECK-LABEL: @short_integer_map
|
||||
#[no_mangle]
|
||||
pub fn short_integer_map(x: [u32; 8]) -> [u32; 8] {
|
||||
// CHECK: load <8 x i32>
|
||||
// CHECK: shl <8 x i32>
|
||||
// CHECK: or <8 x i32>
|
||||
// CHECK: store <8 x i32>
|
||||
x.map(|x| 2 * x + 1)
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @short_integer_zip_map
|
||||
#[no_mangle]
|
||||
pub fn short_integer_zip_map(x: [u32; 8], y: [u32; 8]) -> [u32; 8] {
|
||||
// CHECK: %[[A:.+]] = load <8 x i32>
|
||||
// CHECK: %[[B:.+]] = load <8 x i32>
|
||||
// CHECK: sub <8 x i32> %[[A]], %[[B]]
|
||||
// CHECK: store <8 x i32>
|
||||
x.zip(y).map(|(x, y)| x - y)
|
||||
}
|
||||
|
||||
// This test is checking that LLVM can SRoA away a bunch of the overhead,
|
||||
// like fully moving the iterators to registers. Notably, previous implementations
|
||||
// of `map` ended up `alloca`ing the whole `array::IntoIterator`, meaning both a
|
||||
// hard-to-eliminate `memcpy` and that the iteration counts needed to be written
|
||||
// out to stack every iteration, even for infallible operations on `Copy` types.
|
||||
//
|
||||
// This is still imperfect, as there's more copies than would be ideal,
|
||||
// but hopefully work like #103830 will improve that in future,
|
||||
// and update this test to be stricter.
|
||||
//
|
||||
// CHECK-LABEL: @long_integer_map
|
||||
#[no_mangle]
|
||||
pub fn long_integer_map(x: [u32; 64]) -> [u32; 64] {
|
||||
// CHECK: start:
|
||||
// CHECK-NEXT: alloca [64 x i32]
|
||||
// CHECK-NEXT: alloca %"core::mem::manually_drop::ManuallyDrop<[u32; 64]>"
|
||||
// CHECK-NOT: alloca
|
||||
// CHECK: mul <{{[0-9]+}} x i32>
|
||||
// CHECK: add <{{[0-9]+}} x i32>
|
||||
x.map(|x| 13 * x + 7)
|
||||
}
|
@ -1,6 +1,7 @@
|
||||
// compile-flags: -C opt-level=3
|
||||
// compile-flags: -C opt-level=3 -Z merge-functions=disabled
|
||||
// only-x86_64
|
||||
#![crate_type = "lib"]
|
||||
#![feature(array_zip)]
|
||||
|
||||
// CHECK-LABEL: @auto_vectorize_direct
|
||||
#[no_mangle]
|
||||
@ -30,3 +31,13 @@ pub fn auto_vectorize_loop(a: [f32; 4], b: [f32; 4]) -> [f32; 4] {
|
||||
}
|
||||
c
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @auto_vectorize_array_zip_map
|
||||
#[no_mangle]
|
||||
pub fn auto_vectorize_array_zip_map(a: [f32; 4], b: [f32; 4]) -> [f32; 4] {
|
||||
// CHECK: load <4 x float>
|
||||
// CHECK: load <4 x float>
|
||||
// CHECK: fadd <4 x float>
|
||||
// CHECK: store <4 x float>
|
||||
a.zip(b).map(|(a, b)| a + b)
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user