Auto merge of #79846 - the8472:inplace-tra, r=m-ou-se

Use TrustedRandomAccess for in-place iterators where possible

This can speed up in-place iterators containing simple casts and transmutes from `Copy` types to any type of same size. `!Copy` types can't be optimized since `TrustedRandomAccess`  isn't implemented for those iterators.

```
 name                  on.b ns/iter     o1.b ns/iter     diff ns/iter   diff %  speedup
 vec::bench_transmute  20 (40000 MB/s)  12 (66666 MB/s)            -8  -40.00%   x 1.67
```
This commit is contained in:
bors 2021-03-22 01:16:29 +00:00
commit 35385770ae
3 changed files with 83 additions and 16 deletions

View File

@ -548,6 +548,22 @@ fn bench_in_place_zip_iter_mut(b: &mut Bencher) {
black_box(data);
}
pub fn vec_cast<T, U>(input: Vec<T>) -> Vec<U> {
input.into_iter().map(|e| unsafe { std::mem::transmute_copy(&e) }).collect()
}
#[bench]
fn bench_transmute(b: &mut Bencher) {
let mut vec = vec![10u32; 100];
b.bytes = 800; // 2 casts x 4 bytes x 100
b.iter(|| {
let v = std::mem::take(&mut vec);
let v = black_box(vec_cast::<u32, i32>(v));
let v = black_box(vec_cast::<i32, u32>(v));
vec = v;
});
}
#[derive(Clone)]
struct Droppable(usize);

View File

@ -1,4 +1,4 @@
use core::iter::{InPlaceIterable, SourceIter};
use core::iter::{InPlaceIterable, SourceIter, TrustedRandomAccess};
use core::mem::{self, ManuallyDrop};
use core::ptr::{self};
@ -52,16 +52,7 @@ where
)
};
// use try-fold since
// - it vectorizes better for some iterator adapters
// - unlike most internal iteration methods, it only takes a &mut self
// - it lets us thread the write pointer through its innards and get it back in the end
let sink = InPlaceDrop { inner: dst_buf, dst: dst_buf };
let sink = iterator
.try_fold::<_, _, Result<_, !>>(sink, write_in_place_with_drop(dst_end))
.unwrap();
// iteration succeeded, don't drop head
let dst = ManuallyDrop::new(sink).dst;
let len = SpecInPlaceCollect::collect_in_place(&mut iterator, dst_buf, dst_end);
let src = unsafe { iterator.as_inner().as_into_iter() };
// check if SourceIter contract was upheld
@ -72,7 +63,7 @@ where
// then the source pointer will stay in its initial position and we can't use it as reference
if src.ptr != src_ptr {
debug_assert!(
dst as *const _ <= src.ptr,
unsafe { dst_buf.add(len) as *const _ } <= src.ptr,
"InPlaceIterable contract violation, write pointer advanced beyond read pointer"
);
}
@ -82,10 +73,7 @@ where
// but prevent drop of the allocation itself once IntoIter goes out of scope
src.forget_allocation();
let vec = unsafe {
let len = dst.offset_from(dst_buf) as usize;
Vec::from_raw_parts(dst_buf, len, cap)
};
let vec = unsafe { Vec::from_raw_parts(dst_buf, len, cap) };
vec
}
@ -106,3 +94,52 @@ fn write_in_place_with_drop<T>(
Ok(sink)
}
}
/// Helper trait to hold specialized implementations of the in-place iterate-collect loop
trait SpecInPlaceCollect<T, I>: Iterator<Item = T> {
/// Collects an iterator (`self`) into the destination buffer (`dst`) and returns the number of items
/// collected. `end` is the last writable element of the allocation and used for bounds checks.
fn collect_in_place(&mut self, dst: *mut T, end: *const T) -> usize;
}
impl<T, I> SpecInPlaceCollect<T, I> for I
where
I: Iterator<Item = T>,
{
#[inline]
default fn collect_in_place(&mut self, dst_buf: *mut T, end: *const T) -> usize {
// use try-fold since
// - it vectorizes better for some iterator adapters
// - unlike most internal iteration methods, it only takes a &mut self
// - it lets us thread the write pointer through its innards and get it back in the end
let sink = InPlaceDrop { inner: dst_buf, dst: dst_buf };
let sink =
self.try_fold::<_, _, Result<_, !>>(sink, write_in_place_with_drop(end)).unwrap();
// iteration succeeded, don't drop head
unsafe { ManuallyDrop::new(sink).dst.offset_from(dst_buf) as usize }
}
}
impl<T, I> SpecInPlaceCollect<T, I> for I
where
I: Iterator<Item = T> + TrustedRandomAccess,
{
#[inline]
fn collect_in_place(&mut self, dst_buf: *mut T, end: *const T) -> usize {
let len = self.size();
let mut drop_guard = InPlaceDrop { inner: dst_buf, dst: dst_buf };
for i in 0..len {
// Safety: InplaceIterable contract guarantees that for every element we read
// one slot in the underlying storage will have been freed up and we can immediately
// write back the result.
unsafe {
let dst = dst_buf.offset(i as isize);
debug_assert!(dst as *const _ <= end, "InPlaceIterable contract violation");
ptr::write(dst, self.__iterator_get_unchecked(i));
drop_guard.dst = dst.add(1);
}
}
mem::forget(drop_guard);
len
}
}

View File

@ -0,0 +1,14 @@
// ignore-debug: the debug assertions get in the way
// compile-flags: -O
// min-llvm-version: 11.0
#![crate_type = "lib"]
// Ensure that trivial casts of vec elements are O(1)
// CHECK-LABEL: @vec_iterator_cast
#[no_mangle]
pub fn vec_iterator_cast(vec: Vec<isize>) -> Vec<usize> {
// CHECK-NOT: loop
// CHECK-NOT: call
vec.into_iter().map(|e| e as usize).collect()
}