Skip to content

Commit bd33b83

Browse files
committed
Auto merge of #149784 - fereidani:retain_mut, r=joboet
Improve alloc `Vec::retain_mut` performance Hi, While reading the rustc source code, I noticed it uses `smallvec` and `thin-vec` in many places. I started reviewing those crates, optimized their `retain_mut` implementation, and then realized they were using the exact same algorithm as `alloc::vec::Vec` with less unsafe So now I’m back here with a PR for the standard library 😂. In my benchmarks, this version is noticeably faster when `retain_mut` actually removes elements (thanks to fewer pointer operations, it just advances `write_index`), while performing identically to the current implementation when nothing is removed. Let’s see if bors likes this change or not.
2 parents b7bcaa5 + bd79ea1 commit bd33b83

File tree

2 files changed

+57
-61
lines changed

2 files changed

+57
-61
lines changed

library/alloc/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@
126126
#![feature(iter_next_chunk)]
127127
#![feature(layout_for_ptr)]
128128
#![feature(legacy_receiver_trait)]
129+
#![feature(likely_unlikely)]
129130
#![feature(local_waker)]
130131
#![feature(maybe_uninit_uninit_array_transpose)]
131132
#![feature(panic_internals)]

library/alloc/src/vec/mod.rs

Lines changed: 56 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ use core::mem::{self, Assume, ManuallyDrop, MaybeUninit, SizedTypeProperties, Tr
8686
use core::ops::{self, Index, IndexMut, Range, RangeBounds};
8787
use core::ptr::{self, NonNull};
8888
use core::slice::{self, SliceIndex};
89-
use core::{fmt, intrinsics, ub_checks};
89+
use core::{fmt, hint, intrinsics, ub_checks};
9090

9191
#[stable(feature = "extract_if", since = "1.87.0")]
9292
pub use self::extract_if::ExtractIf;
@@ -2292,13 +2292,8 @@ impl<T, A: Allocator> Vec<T, A> {
22922292
return;
22932293
}
22942294

2295-
// Avoid double drop if the drop guard is not executed,
2296-
// since we may make some holes during the process.
2297-
unsafe { self.set_len(0) };
2298-
22992295
// Vec: [Kept, Kept, Hole, Hole, Hole, Hole, Unchecked, Unchecked]
2300-
// |<- processed len ->| ^- next to check
2301-
// |<- deleted cnt ->|
2296+
// | ^- write ^- read |
23022297
// |<- original_len ->|
23032298
// Kept: Elements which predicate returns true on.
23042299
// Hole: Moved or dropped element slot.
@@ -2307,77 +2302,77 @@ impl<T, A: Allocator> Vec<T, A> {
23072302
// This drop guard will be invoked when predicate or `drop` of element panicked.
23082303
// It shifts unchecked elements to cover holes and `set_len` to the correct length.
23092304
// In cases when predicate and `drop` never panick, it will be optimized out.
2310-
struct BackshiftOnDrop<'a, T, A: Allocator> {
2305+
struct PanicGuard<'a, T, A: Allocator> {
23112306
v: &'a mut Vec<T, A>,
2312-
processed_len: usize,
2313-
deleted_cnt: usize,
2307+
read: usize,
2308+
write: usize,
23142309
original_len: usize,
23152310
}
23162311

2317-
impl<T, A: Allocator> Drop for BackshiftOnDrop<'_, T, A> {
2312+
impl<T, A: Allocator> Drop for PanicGuard<'_, T, A> {
2313+
#[cold]
23182314
fn drop(&mut self) {
2319-
if self.deleted_cnt > 0 {
2320-
// SAFETY: Trailing unchecked items must be valid since we never touch them.
2321-
unsafe {
2322-
ptr::copy(
2323-
self.v.as_ptr().add(self.processed_len),
2324-
self.v.as_mut_ptr().add(self.processed_len - self.deleted_cnt),
2325-
self.original_len - self.processed_len,
2326-
);
2327-
}
2315+
let remaining = self.original_len - self.read;
2316+
// SAFETY: Trailing unchecked items must be valid since we never touch them.
2317+
unsafe {
2318+
ptr::copy(
2319+
self.v.as_ptr().add(self.read),
2320+
self.v.as_mut_ptr().add(self.write),
2321+
remaining,
2322+
);
23282323
}
23292324
// SAFETY: After filling holes, all items are in contiguous memory.
23302325
unsafe {
2331-
self.v.set_len(self.original_len - self.deleted_cnt);
2326+
self.v.set_len(self.write + remaining);
23322327
}
23332328
}
23342329
}
23352330

2336-
let mut g = BackshiftOnDrop { v: self, processed_len: 0, deleted_cnt: 0, original_len };
2331+
let mut read = 0;
2332+
loop {
2333+
// SAFETY: read < original_len
2334+
let cur = unsafe { self.get_unchecked_mut(read) };
2335+
if hint::unlikely(!f(cur)) {
2336+
break;
2337+
}
2338+
read += 1;
2339+
if read == original_len {
2340+
// All elements are kept, return early.
2341+
return;
2342+
}
2343+
}
23372344

2338-
fn process_loop<F, T, A: Allocator, const DELETED: bool>(
2339-
original_len: usize,
2340-
f: &mut F,
2341-
g: &mut BackshiftOnDrop<'_, T, A>,
2342-
) where
2343-
F: FnMut(&mut T) -> bool,
2344-
{
2345-
while g.processed_len != original_len {
2346-
// SAFETY: Unchecked element must be valid.
2347-
let cur = unsafe { &mut *g.v.as_mut_ptr().add(g.processed_len) };
2348-
if !f(cur) {
2349-
// Advance early to avoid double drop if `drop_in_place` panicked.
2350-
g.processed_len += 1;
2351-
g.deleted_cnt += 1;
2352-
// SAFETY: We never touch this element again after dropped.
2353-
unsafe { ptr::drop_in_place(cur) };
2354-
// We already advanced the counter.
2355-
if DELETED {
2356-
continue;
2357-
} else {
2358-
break;
2359-
}
2360-
}
2361-
if DELETED {
2362-
// SAFETY: `deleted_cnt` > 0, so the hole slot must not overlap with current element.
2363-
// We use copy for move, and never touch this element again.
2364-
unsafe {
2365-
let hole_slot = g.v.as_mut_ptr().add(g.processed_len - g.deleted_cnt);
2366-
ptr::copy_nonoverlapping(cur, hole_slot, 1);
2367-
}
2345+
// Critical section starts here and at least one element is going to be removed.
2346+
// Advance `g.read` early to avoid double drop if `drop_in_place` panicked.
2347+
let mut g = PanicGuard { v: self, read: read + 1, write: read, original_len };
2348+
// SAFETY: previous `read` is always less than original_len.
2349+
unsafe { ptr::drop_in_place(&mut *g.v.as_mut_ptr().add(read)) };
2350+
2351+
while g.read < g.original_len {
2352+
// SAFETY: `read` is always less than original_len.
2353+
let cur = unsafe { &mut *g.v.as_mut_ptr().add(g.read) };
2354+
if !f(cur) {
2355+
// Advance `read` early to avoid double drop if `drop_in_place` panicked.
2356+
g.read += 1;
2357+
// SAFETY: We never touch this element again after dropped.
2358+
unsafe { ptr::drop_in_place(cur) };
2359+
} else {
2360+
// SAFETY: `read` > `write`, so the slots don't overlap.
2361+
// We use copy for move, and never touch the source element again.
2362+
unsafe {
2363+
let hole = g.v.as_mut_ptr().add(g.write);
2364+
ptr::copy_nonoverlapping(cur, hole, 1);
23682365
}
2369-
g.processed_len += 1;
2366+
g.write += 1;
2367+
g.read += 1;
23702368
}
23712369
}
23722370

2373-
// Stage 1: Nothing was deleted.
2374-
process_loop::<F, T, A, false>(original_len, &mut f, &mut g);
2375-
2376-
// Stage 2: Some elements were deleted.
2377-
process_loop::<F, T, A, true>(original_len, &mut f, &mut g);
2378-
2379-
// All item are processed. This can be optimized to `set_len` by LLVM.
2380-
drop(g);
2371+
// We are leaving the critical section and no panic happened,
2372+
// Commit the length change and forget the guard.
2373+
// SAFETY: `write` is always less than or equal to original_len.
2374+
unsafe { g.v.set_len(g.write) };
2375+
mem::forget(g);
23812376
}
23822377

23832378
/// Removes all but the first of consecutive elements in the vector that resolve to the same

0 commit comments

Comments
 (0)