mirror of
https://github.com/rust-lang/rust.git
synced 2024-11-27 01:04:03 +00:00
Auto merge of #81126 - oxalica:retain-early-drop, r=m-ou-se
Optimize Vec::retain Use `copy_non_overlapping` instead of `swap` to reduce memory writes, like what we've done in #44355 and `String::retain`. #48065 already tried to do this optimization but it is reverted in #67300 due to bad codegen of `DrainFilter::drop`. This PR re-implement the drop-then-move approach. I did a [benchmark](https://gist.github.com/oxalica/3360eec9376f22533fcecff02798b698) on small-no-drop, small-need-drop, large-no-drop elements with different predicate functions. It turns out that the new implementation is >20% faster in average for almost all cases. Only 2/24 cases are slower by 3% and 5%. See the link above for more detail. I think regression in may-panic cases is due to drop-guard preventing some optimization. If it's permitted to leak elements when predicate function of element's `drop` panic, the new implementation should be almost always faster than current one. I'm not sure if we should leak on panic, since there is indeed an issue (#52267) complains about it before.
This commit is contained in:
commit
1efd804983
@ -1399,22 +1399,75 @@ impl<T, A: Allocator> Vec<T, A> {
|
||||
where
|
||||
F: FnMut(&T) -> bool,
|
||||
{
|
||||
let len = self.len();
|
||||
let mut del = 0;
|
||||
{
|
||||
let v = &mut **self;
|
||||
let original_len = self.len();
|
||||
// Avoid double drop if the drop guard is not executed,
|
||||
// since we may make some holes during the process.
|
||||
unsafe { self.set_len(0) };
|
||||
|
||||
for i in 0..len {
|
||||
if !f(&v[i]) {
|
||||
del += 1;
|
||||
} else if del > 0 {
|
||||
v.swap(i - del, i);
|
||||
// Vec: [Kept, Kept, Hole, Hole, Hole, Hole, Unchecked, Unchecked]
|
||||
// |<- processed len ->| ^- next to check
|
||||
// |<- deleted cnt ->|
|
||||
// |<- original_len ->|
|
||||
// Kept: Elements which predicate returns true on.
|
||||
// Hole: Moved or dropped element slot.
|
||||
// Unchecked: Unchecked valid elements.
|
||||
//
|
||||
// This drop guard will be invoked when predicate or `drop` of element panicked.
|
||||
// It shifts unchecked elements to cover holes and `set_len` to the correct length.
|
||||
// In cases when predicate and `drop` never panick, it will be optimized out.
|
||||
struct BackshiftOnDrop<'a, T, A: Allocator> {
|
||||
v: &'a mut Vec<T, A>,
|
||||
processed_len: usize,
|
||||
deleted_cnt: usize,
|
||||
original_len: usize,
|
||||
}
|
||||
|
||||
impl<T, A: Allocator> Drop for BackshiftOnDrop<'_, T, A> {
|
||||
fn drop(&mut self) {
|
||||
if self.deleted_cnt > 0 {
|
||||
// SAFETY: Trailing unchecked items must be valid since we never touch them.
|
||||
unsafe {
|
||||
ptr::copy(
|
||||
self.v.as_ptr().add(self.processed_len),
|
||||
self.v.as_mut_ptr().add(self.processed_len - self.deleted_cnt),
|
||||
self.original_len - self.processed_len,
|
||||
);
|
||||
}
|
||||
}
|
||||
// SAFETY: After filling holes, all items are in contiguous memory.
|
||||
unsafe {
|
||||
self.v.set_len(self.original_len - self.deleted_cnt);
|
||||
}
|
||||
}
|
||||
}
|
||||
if del > 0 {
|
||||
self.truncate(len - del);
|
||||
|
||||
let mut g = BackshiftOnDrop { v: self, processed_len: 0, deleted_cnt: 0, original_len };
|
||||
|
||||
while g.processed_len < original_len {
|
||||
// SAFETY: Unchecked element must be valid.
|
||||
let cur = unsafe { &mut *g.v.as_mut_ptr().add(g.processed_len) };
|
||||
if !f(cur) {
|
||||
// Advance early to avoid double drop if `drop_in_place` panicked.
|
||||
g.processed_len += 1;
|
||||
g.deleted_cnt += 1;
|
||||
// SAFETY: We never touch this element again after dropped.
|
||||
unsafe { ptr::drop_in_place(cur) };
|
||||
// We already advanced the counter.
|
||||
continue;
|
||||
}
|
||||
if g.deleted_cnt > 0 {
|
||||
// SAFETY: `deleted_cnt` > 0, so the hole slot must not overlap with current element.
|
||||
// We use copy for move, and never touch this element again.
|
||||
unsafe {
|
||||
let hole_slot = g.v.as_mut_ptr().add(g.processed_len - g.deleted_cnt);
|
||||
ptr::copy_nonoverlapping(cur, hole_slot, 1);
|
||||
}
|
||||
}
|
||||
g.processed_len += 1;
|
||||
}
|
||||
|
||||
// All item are processed. This can be optimized to `set_len` by LLVM.
|
||||
drop(g);
|
||||
}
|
||||
|
||||
/// Removes all but the first of consecutive elements in the vector that resolve to the same
|
||||
|
@ -287,6 +287,67 @@ fn test_retain() {
|
||||
assert_eq!(vec, [2, 4]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_retain_pred_panic_with_hole() {
|
||||
let v = (0..5).map(Rc::new).collect::<Vec<_>>();
|
||||
catch_unwind(AssertUnwindSafe(|| {
|
||||
let mut v = v.clone();
|
||||
v.retain(|r| match **r {
|
||||
0 => true,
|
||||
1 => false,
|
||||
2 => true,
|
||||
_ => panic!(),
|
||||
});
|
||||
}))
|
||||
.unwrap_err();
|
||||
// Everything is dropped when predicate panicked.
|
||||
assert!(v.iter().all(|r| Rc::strong_count(r) == 1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_retain_pred_panic_no_hole() {
|
||||
let v = (0..5).map(Rc::new).collect::<Vec<_>>();
|
||||
catch_unwind(AssertUnwindSafe(|| {
|
||||
let mut v = v.clone();
|
||||
v.retain(|r| match **r {
|
||||
0 | 1 | 2 => true,
|
||||
_ => panic!(),
|
||||
});
|
||||
}))
|
||||
.unwrap_err();
|
||||
// Everything is dropped when predicate panicked.
|
||||
assert!(v.iter().all(|r| Rc::strong_count(r) == 1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_retain_drop_panic() {
|
||||
struct Wrap(Rc<i32>);
|
||||
|
||||
impl Drop for Wrap {
|
||||
fn drop(&mut self) {
|
||||
if *self.0 == 3 {
|
||||
panic!();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let v = (0..5).map(|x| Rc::new(x)).collect::<Vec<_>>();
|
||||
catch_unwind(AssertUnwindSafe(|| {
|
||||
let mut v = v.iter().map(|r| Wrap(r.clone())).collect::<Vec<_>>();
|
||||
v.retain(|w| match *w.0 {
|
||||
0 => true,
|
||||
1 => false,
|
||||
2 => true,
|
||||
3 => false, // Drop panic.
|
||||
_ => true,
|
||||
});
|
||||
}))
|
||||
.unwrap_err();
|
||||
// Other elements are dropped when `drop` of one element panicked.
|
||||
// The panicked wrapper also has its Rc dropped.
|
||||
assert!(v.iter().all(|r| Rc::strong_count(r) == 1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_dedup() {
|
||||
fn case(a: Vec<i32>, b: Vec<i32>) {
|
||||
|
Loading…
Reference in New Issue
Block a user