Auto merge of #81126 - oxalica:retain-early-drop, r=m-ou-se

Optimize Vec::retain

Use `copy_non_overlapping` instead of `swap` to reduce memory writes, like what we've done in #44355 and `String::retain`.
#48065 already tried to do this optimization but it is reverted in #67300 due to bad codegen of `DrainFilter::drop`.

This PR re-implement the drop-then-move approach. I did a [benchmark](https://gist.github.com/oxalica/3360eec9376f22533fcecff02798b698) on small-no-drop, small-need-drop, large-no-drop elements with different predicate functions. It turns out that the new implementation is >20% faster in average for almost all cases. Only 2/24 cases are slower by 3% and 5%. See the link above for more detail.

I think regression in may-panic cases is due to drop-guard preventing some optimization. If it's permitted to leak elements when predicate function of element's `drop` panic, the new implementation should be almost always faster than current one.
I'm not sure if we should leak on panic, since there is indeed an issue (#52267) complains about it before.
This commit is contained in:
bors 2021-02-11 04:40:57 +00:00
commit 1efd804983
2 changed files with 125 additions and 11 deletions

View File

@ -1399,22 +1399,75 @@ impl<T, A: Allocator> Vec<T, A> {
where
F: FnMut(&T) -> bool,
{
let len = self.len();
let mut del = 0;
{
let v = &mut **self;
let original_len = self.len();
// Avoid double drop if the drop guard is not executed,
// since we may make some holes during the process.
unsafe { self.set_len(0) };
for i in 0..len {
if !f(&v[i]) {
del += 1;
} else if del > 0 {
v.swap(i - del, i);
// Vec: [Kept, Kept, Hole, Hole, Hole, Hole, Unchecked, Unchecked]
// |<- processed len ->| ^- next to check
// |<- deleted cnt ->|
// |<- original_len ->|
// Kept: Elements which predicate returns true on.
// Hole: Moved or dropped element slot.
// Unchecked: Unchecked valid elements.
//
// This drop guard will be invoked when predicate or `drop` of element panicked.
// It shifts unchecked elements to cover holes and `set_len` to the correct length.
// In cases when predicate and `drop` never panick, it will be optimized out.
struct BackshiftOnDrop<'a, T, A: Allocator> {
v: &'a mut Vec<T, A>,
processed_len: usize,
deleted_cnt: usize,
original_len: usize,
}
impl<T, A: Allocator> Drop for BackshiftOnDrop<'_, T, A> {
fn drop(&mut self) {
if self.deleted_cnt > 0 {
// SAFETY: Trailing unchecked items must be valid since we never touch them.
unsafe {
ptr::copy(
self.v.as_ptr().add(self.processed_len),
self.v.as_mut_ptr().add(self.processed_len - self.deleted_cnt),
self.original_len - self.processed_len,
);
}
}
// SAFETY: After filling holes, all items are in contiguous memory.
unsafe {
self.v.set_len(self.original_len - self.deleted_cnt);
}
}
}
if del > 0 {
self.truncate(len - del);
let mut g = BackshiftOnDrop { v: self, processed_len: 0, deleted_cnt: 0, original_len };
while g.processed_len < original_len {
// SAFETY: Unchecked element must be valid.
let cur = unsafe { &mut *g.v.as_mut_ptr().add(g.processed_len) };
if !f(cur) {
// Advance early to avoid double drop if `drop_in_place` panicked.
g.processed_len += 1;
g.deleted_cnt += 1;
// SAFETY: We never touch this element again after dropped.
unsafe { ptr::drop_in_place(cur) };
// We already advanced the counter.
continue;
}
if g.deleted_cnt > 0 {
// SAFETY: `deleted_cnt` > 0, so the hole slot must not overlap with current element.
// We use copy for move, and never touch this element again.
unsafe {
let hole_slot = g.v.as_mut_ptr().add(g.processed_len - g.deleted_cnt);
ptr::copy_nonoverlapping(cur, hole_slot, 1);
}
}
g.processed_len += 1;
}
// All item are processed. This can be optimized to `set_len` by LLVM.
drop(g);
}
/// Removes all but the first of consecutive elements in the vector that resolve to the same

View File

@ -287,6 +287,67 @@ fn test_retain() {
assert_eq!(vec, [2, 4]);
}
#[test]
fn test_retain_pred_panic_with_hole() {
let v = (0..5).map(Rc::new).collect::<Vec<_>>();
catch_unwind(AssertUnwindSafe(|| {
let mut v = v.clone();
v.retain(|r| match **r {
0 => true,
1 => false,
2 => true,
_ => panic!(),
});
}))
.unwrap_err();
// Everything is dropped when predicate panicked.
assert!(v.iter().all(|r| Rc::strong_count(r) == 1));
}
#[test]
fn test_retain_pred_panic_no_hole() {
let v = (0..5).map(Rc::new).collect::<Vec<_>>();
catch_unwind(AssertUnwindSafe(|| {
let mut v = v.clone();
v.retain(|r| match **r {
0 | 1 | 2 => true,
_ => panic!(),
});
}))
.unwrap_err();
// Everything is dropped when predicate panicked.
assert!(v.iter().all(|r| Rc::strong_count(r) == 1));
}
#[test]
fn test_retain_drop_panic() {
struct Wrap(Rc<i32>);
impl Drop for Wrap {
fn drop(&mut self) {
if *self.0 == 3 {
panic!();
}
}
}
let v = (0..5).map(|x| Rc::new(x)).collect::<Vec<_>>();
catch_unwind(AssertUnwindSafe(|| {
let mut v = v.iter().map(|r| Wrap(r.clone())).collect::<Vec<_>>();
v.retain(|w| match *w.0 {
0 => true,
1 => false,
2 => true,
3 => false, // Drop panic.
_ => true,
});
}))
.unwrap_err();
// Other elements are dropped when `drop` of one element panicked.
// The panicked wrapper also has its Rc dropped.
assert!(v.iter().all(|r| Rc::strong_count(r) == 1));
}
#[test]
fn test_dedup() {
fn case(a: Vec<i32>, b: Vec<i32>) {