oxalica/bench-R5-2400G.txt

## bench-R5-2400G.txt
retain_std u64 almost_kept
                        time:   [123.44 us 127.97 us 134.19 us]
Found 14 outliers among 100 measurements (14.00%)
  2 (2.00%) low mild
  5 (5.00%) high mild
  7 (7.00%) high severe

retain_early_drop u64 almost_kept
                        time:   [98.345 us 100.49 us 103.22 us]
Found 15 outliers among 100 measurements (15.00%)
  1 (1.00%) low severe
  1 (1.00%) low mild
  3 (3.00%) high mild
  10 (10.00%) high severe

retain_std_black u64 almost_kept
                        time:   [215.45 us 221.39 us 229.35 us]
Found 16 outliers among 100 measurements (16.00%)
  7 (7.00%) high mild
  9 (9.00%) high severe

retain_early_drop_black u64 almost_kept
                        time:   [211.25 us 215.49 us 221.38 us]
Found 9 outliers among 100 measurements (9.00%)
  3 (3.00%) high mild
  6 (6.00%) high severe

retain_std u64 almost_drop
                        time:   [109.32 us 113.41 us 118.71 us]
Found 13 outliers among 100 measurements (13.00%)
  2 (2.00%) low severe
  3 (3.00%) high mild
  8 (8.00%) high severe

retain_early_drop u64 almost_drop
                        time:   [54.413 us 56.216 us 58.747 us]
Found 11 outliers among 100 measurements (11.00%)
  3 (3.00%) high mild
  8 (8.00%) high severe

retain_std_black u64 almost_drop
                        time:   [201.42 us 204.76 us 210.15 us]
Found 8 outliers among 100 measurements (8.00%)
  3 (3.00%) low mild
  1 (1.00%) high mild
  4 (4.00%) high severe

retain_early_drop_black u64 almost_drop
                        time:   [208.82 us 212.38 us 216.96 us]
Found 9 outliers among 100 measurements (9.00%)
  1 (1.00%) high mild
  8 (8.00%) high severe

retain_std u64 drop_head
                        time:   [120.17 us 125.81 us 133.32 us]
Found 16 outliers among 100 measurements (16.00%)
  6 (6.00%) high mild
  10 (10.00%) high severe

retain_early_drop u64 drop_head
                        time:   [100.77 us 104.96 us 110.49 us]
Found 22 outliers among 100 measurements (22.00%)
  3 (3.00%) low severe
  2 (2.00%) low mild
  2 (2.00%) high mild
  15 (15.00%) high severe

retain_std_black u64 drop_head
                        time:   [217.34 us 223.07 us 229.90 us]
Found 9 outliers among 100 measurements (9.00%)
  4 (4.00%) high mild
  5 (5.00%) high severe

retain_early_drop_black u64 drop_head
                        time:   [229.91 us 235.38 us 242.05 us]
Found 6 outliers among 100 measurements (6.00%)
  1 (1.00%) high mild
  5 (5.00%) high severe

retain_std u64 drop_tail
                        time:   [108.65 us 111.25 us 115.37 us]
Found 16 outliers among 100 measurements (16.00%)
  3 (3.00%) high mild
  13 (13.00%) high severe

retain_early_drop u64 drop_tail
                        time:   [60.681 us 61.226 us 62.334 us]
Found 11 outliers among 100 measurements (11.00%)
  2 (2.00%) high mild
  9 (9.00%) high severe

retain_std_black u64 drop_tail
                        time:   [188.84 us 191.98 us 196.50 us]
Found 16 outliers among 100 measurements (16.00%)
  5 (5.00%) high mild
  11 (11.00%) high severe

retain_early_drop_black u64 drop_tail
                        time:   [191.12 us 193.76 us 197.41 us]
Found 10 outliers among 100 measurements (10.00%)
  3 (3.00%) high mild
  7 (7.00%) high severe

retain_std box_u64 almost_kept
                        time:   [522.42 us 536.95 us 556.43 us]
Found 11 outliers among 100 measurements (11.00%)
  3 (3.00%) high mild
  8 (8.00%) high severe

retain_early_drop box_u64 almost_kept
                        time:   [336.46 us 345.96 us 358.06 us]
Found 9 outliers among 100 measurements (9.00%)
  4 (4.00%) high mild
  5 (5.00%) high severe

retain_std_black box_u64 almost_kept
                        time:   [551.75 us 567.97 us 589.73 us]
Found 11 outliers among 100 measurements (11.00%)
  5 (5.00%) high mild
  6 (6.00%) high severe

retain_early_drop_black box_u64 almost_kept
                        time:   [350.19 us 355.36 us 361.61 us]
Found 8 outliers among 100 measurements (8.00%)
  3 (3.00%) high mild
  5 (5.00%) high severe

retain_std box_u64 almost_drop
                        time:   [1.3071 ms 1.3337 ms 1.3697 ms]
Found 9 outliers among 100 measurements (9.00%)
  2 (2.00%) high mild
  7 (7.00%) high severe

retain_early_drop box_u64 almost_drop
                        time:   [1.0219 ms 1.0687 ms 1.1472 ms]
Found 10 outliers among 100 measurements (10.00%)
  3 (3.00%) high mild
  7 (7.00%) high severe

retain_std_black box_u64 almost_drop
                        time:   [1.3579 ms 1.3644 ms 1.3754 ms]
Found 11 outliers among 100 measurements (11.00%)
  1 (1.00%) low mild
  3 (3.00%) high mild
  7 (7.00%) high severe

retain_early_drop_black box_u64 almost_drop
                        time:   [1.1125 ms 1.1491 ms 1.1969 ms]
Found 5 outliers among 100 measurements (5.00%)
  1 (1.00%) high mild
  4 (4.00%) high severe

retain_std box_u64 drop_head
                        time:   [375.30 us 378.73 us 383.46 us]
Found 7 outliers among 100 measurements (7.00%)
  1 (1.00%) low mild
  2 (2.00%) high mild
  4 (4.00%) high severe

retain_early_drop box_u64 drop_head
                        time:   [338.84 us 346.42 us 357.24 us]
Found 6 outliers among 100 measurements (6.00%)
  2 (2.00%) high mild
  4 (4.00%) high severe

retain_std_black box_u64 drop_head
                        time:   [416.23 us 429.75 us 449.05 us]
Found 11 outliers among 100 measurements (11.00%)
  3 (3.00%) high mild
  8 (8.00%) high severe

retain_early_drop_black box_u64 drop_head
                        time:   [374.24 us 387.10 us 403.51 us]
Found 6 outliers among 100 measurements (6.00%)
  1 (1.00%) high mild
  5 (5.00%) high severe

retain_std box_u64 drop_tail
                        time:   [318.67 us 322.47 us 328.94 us]
Found 7 outliers among 100 measurements (7.00%)
  3 (3.00%) low mild
  1 (1.00%) high mild
  3 (3.00%) high severe

retain_early_drop box_u64 drop_tail
                        time:   [299.08 us 305.63 us 315.34 us]
Found 9 outliers among 100 measurements (9.00%)
  3 (3.00%) high mild
  6 (6.00%) high severe

retain_std_black box_u64 drop_tail
                        time:   [350.63 us 360.51 us 375.18 us]
Found 5 outliers among 100 measurements (5.00%)
  5 (5.00%) high severe

retain_early_drop_black box_u64 drop_tail
                        time:   [331.23 us 335.57 us 342.68 us]
Found 9 outliers among 100 measurements (9.00%)
  2 (2.00%) high mild
  7 (7.00%) high severe

retain_std 64byte almost_kept
                        time:   [784.09 us 792.22 us 802.28 us]
Found 14 outliers among 100 measurements (14.00%)
  3 (3.00%) high mild
  11 (11.00%) high severe

retain_early_drop 64byte almost_kept
                        time:   [668.95 us 677.15 us 686.92 us]
Found 13 outliers among 100 measurements (13.00%)
  2 (2.00%) high mild
  11 (11.00%) high severe

retain_std_black 64byte almost_kept
                        time:   [784.50 us 793.33 us 804.71 us]
Found 11 outliers among 100 measurements (11.00%)
  2 (2.00%) high mild
  9 (9.00%) high severe

retain_early_drop_black 64byte almost_kept
                        time:   [695.69 us 703.90 us 714.98 us]
Found 13 outliers among 100 measurements (13.00%)
  5 (5.00%) high mild
  8 (8.00%) high severe

retain_std 64byte almost_drop
                        time:   [457.75 us 465.60 us 475.24 us]
Found 11 outliers among 100 measurements (11.00%)
  11 (11.00%) high severe

retain_early_drop 64byte almost_drop
                        time:   [337.76 us 341.31 us 346.20 us]
Found 10 outliers among 100 measurements (10.00%)
  3 (3.00%) high mild
  7 (7.00%) high severe

retain_std_black 64byte almost_drop
                        time:   [493.88 us 501.65 us 511.77 us]
Found 8 outliers among 100 measurements (8.00%)
  1 (1.00%) high mild
  7 (7.00%) high severe

retain_early_drop_black 64byte almost_drop
                        time:   [455.96 us 472.64 us 490.19 us]
Found 2 outliers among 100 measurements (2.00%)
  2 (2.00%) high mild

retain_std 64byte drop_head
                        time:   [831.50 us 850.35 us 871.02 us]
Found 5 outliers among 100 measurements (5.00%)
  3 (3.00%) high mild
  2 (2.00%) high severe

retain_early_drop 64byte drop_head
                        time:   [689.15 us 696.21 us 704.81 us]
Found 15 outliers among 100 measurements (15.00%)
  2 (2.00%) high mild
  13 (13.00%) high severe

retain_std_black 64byte drop_head
                        time:   [777.10 us 787.47 us 800.61 us]
Found 17 outliers among 100 measurements (17.00%)
  4 (4.00%) high mild
  13 (13.00%) high severe

retain_early_drop_black 64byte drop_head
                        time:   [708.77 us 722.93 us 742.72 us]
Found 15 outliers among 100 measurements (15.00%)
  6 (6.00%) high mild
  9 (9.00%) high severe

retain_std 64byte drop_tail
                        time:   [266.76 us 273.20 us 281.11 us]
Found 13 outliers among 100 measurements (13.00%)
  3 (3.00%) high mild
  10 (10.00%) high severe

retain_early_drop 64byte drop_tail
                        time:   [267.17 us 271.99 us 278.05 us]
Found 7 outliers among 100 measurements (7.00%)
  1 (1.00%) high mild
  6 (6.00%) high severe

retain_std_black 64byte drop_tail
                        time:   [313.70 us 319.00 us 326.10 us]
Found 10 outliers among 100 measurements (10.00%)
  2 (2.00%) high mild
  8 (8.00%) high severe

retain_early_drop_black 64byte drop_tail
                        time:   [314.71 us 322.80 us 333.55 us]
Found 8 outliers among 100 measurements (8.00%)
  3 (3.00%) high mild
  5 (5.00%) high severe

## bench.rs
use criterion::{black_box, criterion_group, criterion_main, BatchSize, Criterion};
use testt::retain;

#[rustfmt::skip]
fn bench_retain(c: &mut Criterion) {
    const N: usize = 100_000;

    fn black_fn<T>(f: fn(&T) -> bool) -> fn(&T) -> bool {
        black_box(f)
    }

    macro_rules! bench {
        ($name:literal, $gen:expr, $f:expr) => {
            c.bench_function(concat!("retain_std ", $name), move |b| {
                b.iter_batched($gen, move |mut v| {
                    v.retain($f);
                    v
                }, BatchSize::NumBatches(1));
            });
            c.bench_function(concat!("retain_early_drop ", $name), move |b| {
                b.iter_batched($gen, move |mut v| {
                    retain(&mut v, $f);
                    v
                }, BatchSize::NumBatches(1));
            });
            c.bench_function(concat!("retain_std_black ", $name), move |b| {
                b.iter_batched($gen, move |mut v| {
                    v.retain(black_fn($f));
                    v
                }, BatchSize::NumBatches(1));
            });
            c.bench_function(concat!("retain_early_drop_black ", $name), move |b| {
                b.iter_batched($gen, move |mut v| {
                    retain(&mut v, black_fn($f));
                    v
                }, BatchSize::NumBatches(1));
            });
        };
    }

    let gen_u64_seq = || (0..N as u64).collect::<Vec<_>>();
    bench!("u64 almost_kept", gen_u64_seq, |x| *x & 7 != 0);
    bench!("u64 almost_drop", gen_u64_seq, |x| *x & 7 == 0);
    bench!("u64 drop_head", gen_u64_seq, |x| *x > 10000);
    bench!("u64 drop_tail", gen_u64_seq, |x| *x < N as u64 - 10000);

    let gen_boxed_u64 = || (0..N as u64).map(Box::new).collect::<Vec<_>>();
    bench!("box_u64 almost_kept", gen_boxed_u64, |x: &Box<u64>| **x & 7 != 0);
    bench!("box_u64 almost_drop", gen_boxed_u64, |x: &Box<u64>| **x & 7 == 0);
    bench!("box_u64 drop_head", gen_boxed_u64, |x: &Box<u64>| **x > 10000);
    bench!("box_u64 drop_tail", gen_boxed_u64, |x: &Box<u64>| **x < N as u64 - 10000);

    let gen_arr = || (0..N as u64).map(|x| [x; 8]).collect::<Vec<_>>();
    bench!("64byte almost_kept", gen_arr, |x: &[u64; 8]| x[0] & 7 != 0);
    bench!("64byte almost_drop", gen_arr, |x: &[u64; 8]| x[0] & 7 == 0);
    bench!("64byte drop_head", gen_arr, |x: &[u64; 8]| x[0] > 10000);
    bench!("64byte drop_tail", gen_arr, |x: &[u64; 8]| x[0] < N as u64 - 10000);
}

criterion_group!(benches, bench_retain);
criterion_main!(benches);

## lib.rs
use std::ptr;

pub fn retain<T, F>(self_: &mut Vec<T>, mut f: F)
where
    F: FnMut(&T) -> bool,
{
    let original_len = self_.len();
    // Avoid double drop if the drop guard is not executed,
    // since we may make some holes during the process.
    unsafe { self_.set_len(0) };

    // Vec: [Kept, Kept, Hole, Hole, Hole, Hole, Unchecked, Unchecked]
    //      |<-              processed len   ->| ^- next to check
    //                  |<-  deleted cnt     ->|
    //      |<-              original_len                          ->|
    // Kept: Elements which predicate returns true on.
    // Hole: Moved or dropped element slot.
    // Unchecked: Unchecked valid elements.
    //
    // This drop guard will be invoked when predicate or `drop` of element panicked.
    // It shifts unchecked elements to cover holes and `set_len` to the correct length.
    // In cases when predicate and `drop` never panick, it will be optimized out.
    struct BackshiftOnDrop<'a, T> {
        v: &'a mut Vec<T>,
        processed_len: usize,
        deleted_cnt: usize,
        original_len: usize,
    }

    impl<T> Drop for BackshiftOnDrop<'_, T> {
        fn drop(&mut self) {
            if self.deleted_cnt > 0 {
                // SAFETY: Trailing unchecked items must be valid since we never touch them.
                unsafe {
                    ptr::copy(
                        self.v.as_ptr().add(self.processed_len),
                        self.v
                            .as_mut_ptr()
                            .add(self.processed_len - self.deleted_cnt),
                        self.original_len - self.processed_len,
                    );
                }
            }
            // SAFETY: After filling holes, all items are in contiguous memory.
            unsafe {
                self.v.set_len(self.original_len - self.deleted_cnt);
            }
        }
    }

    let mut g = BackshiftOnDrop {
        v: self_,
        processed_len: 0,
        deleted_cnt: 0,
        original_len,
    };

    while g.processed_len < original_len {
        // SAFETY: Unchecked element must be valid.
        let cur = unsafe { &mut *g.v.as_mut_ptr().add(g.processed_len) };
        if !f(cur) {
            // Advance early to avoid double drop if `drop_in_place` panicked.
            g.processed_len += 1;
            g.deleted_cnt += 1;
            // SAFETY: We never touch this element again after dropped.
            unsafe { ptr::drop_in_place(cur) };
            // We already advanced the counter.
            continue;
        }
        if g.deleted_cnt > 0 {
            // SAFETY: `deleted_cnt` > 0, so the hole slot must not overlap with current element.
            // We use copy for move, and never touch this element again.
            unsafe {
                let hole_slot = g.v.as_mut_ptr().add(g.processed_len - g.deleted_cnt);
                ptr::copy_nonoverlapping(cur, hole_slot, 1);
            }
        }
        g.processed_len += 1;
    }

    // All item are processed. This can be optimized to `set_len` by LLVM.
    drop(g);
}
	retain_std u64 almost_kept
	time: [123.44 us 127.97 us 134.19 us]
	Found 14 outliers among 100 measurements (14.00%)
	2 (2.00%) low mild
	5 (5.00%) high mild
	7 (7.00%) high severe

	retain_early_drop u64 almost_kept
	time: [98.345 us 100.49 us 103.22 us]
	Found 15 outliers among 100 measurements (15.00%)
	1 (1.00%) low severe
	1 (1.00%) low mild
	3 (3.00%) high mild
	10 (10.00%) high severe

	retain_std_black u64 almost_kept
	time: [215.45 us 221.39 us 229.35 us]
	Found 16 outliers among 100 measurements (16.00%)
	7 (7.00%) high mild
	9 (9.00%) high severe

	retain_early_drop_black u64 almost_kept
	time: [211.25 us 215.49 us 221.38 us]
	Found 9 outliers among 100 measurements (9.00%)
	3 (3.00%) high mild
	6 (6.00%) high severe

	retain_std u64 almost_drop
	time: [109.32 us 113.41 us 118.71 us]
	Found 13 outliers among 100 measurements (13.00%)
	2 (2.00%) low severe
	3 (3.00%) high mild
	8 (8.00%) high severe

	retain_early_drop u64 almost_drop
	time: [54.413 us 56.216 us 58.747 us]
	Found 11 outliers among 100 measurements (11.00%)
	3 (3.00%) high mild
	8 (8.00%) high severe

	retain_std_black u64 almost_drop
	time: [201.42 us 204.76 us 210.15 us]
	Found 8 outliers among 100 measurements (8.00%)
	3 (3.00%) low mild
	1 (1.00%) high mild
	4 (4.00%) high severe

	retain_early_drop_black u64 almost_drop
	time: [208.82 us 212.38 us 216.96 us]
	Found 9 outliers among 100 measurements (9.00%)
	1 (1.00%) high mild
	8 (8.00%) high severe

	retain_std u64 drop_head
	time: [120.17 us 125.81 us 133.32 us]
	Found 16 outliers among 100 measurements (16.00%)
	6 (6.00%) high mild
	10 (10.00%) high severe

	retain_early_drop u64 drop_head
	time: [100.77 us 104.96 us 110.49 us]
	Found 22 outliers among 100 measurements (22.00%)
	3 (3.00%) low severe
	2 (2.00%) low mild
	2 (2.00%) high mild
	15 (15.00%) high severe

	retain_std_black u64 drop_head
	time: [217.34 us 223.07 us 229.90 us]
	Found 9 outliers among 100 measurements (9.00%)
	4 (4.00%) high mild
	5 (5.00%) high severe

	retain_early_drop_black u64 drop_head
	time: [229.91 us 235.38 us 242.05 us]
	Found 6 outliers among 100 measurements (6.00%)
	1 (1.00%) high mild
	5 (5.00%) high severe

	retain_std u64 drop_tail
	time: [108.65 us 111.25 us 115.37 us]
	Found 16 outliers among 100 measurements (16.00%)
	3 (3.00%) high mild
	13 (13.00%) high severe

	retain_early_drop u64 drop_tail
	time: [60.681 us 61.226 us 62.334 us]
	Found 11 outliers among 100 measurements (11.00%)
	2 (2.00%) high mild
	9 (9.00%) high severe

	retain_std_black u64 drop_tail
	time: [188.84 us 191.98 us 196.50 us]
	Found 16 outliers among 100 measurements (16.00%)
	5 (5.00%) high mild
	11 (11.00%) high severe

	retain_early_drop_black u64 drop_tail
	time: [191.12 us 193.76 us 197.41 us]
	Found 10 outliers among 100 measurements (10.00%)
	3 (3.00%) high mild
	7 (7.00%) high severe

	retain_std box_u64 almost_kept
	time: [522.42 us 536.95 us 556.43 us]
	Found 11 outliers among 100 measurements (11.00%)
	3 (3.00%) high mild
	8 (8.00%) high severe

	retain_early_drop box_u64 almost_kept
	time: [336.46 us 345.96 us 358.06 us]
	Found 9 outliers among 100 measurements (9.00%)
	4 (4.00%) high mild
	5 (5.00%) high severe

	retain_std_black box_u64 almost_kept
	time: [551.75 us 567.97 us 589.73 us]
	Found 11 outliers among 100 measurements (11.00%)
	5 (5.00%) high mild
	6 (6.00%) high severe

	retain_early_drop_black box_u64 almost_kept
	time: [350.19 us 355.36 us 361.61 us]
	Found 8 outliers among 100 measurements (8.00%)
	3 (3.00%) high mild
	5 (5.00%) high severe

	retain_std box_u64 almost_drop
	time: [1.3071 ms 1.3337 ms 1.3697 ms]
	Found 9 outliers among 100 measurements (9.00%)
	2 (2.00%) high mild
	7 (7.00%) high severe

	retain_early_drop box_u64 almost_drop
	time: [1.0219 ms 1.0687 ms 1.1472 ms]
	Found 10 outliers among 100 measurements (10.00%)
	3 (3.00%) high mild
	7 (7.00%) high severe

	retain_std_black box_u64 almost_drop
	time: [1.3579 ms 1.3644 ms 1.3754 ms]
	Found 11 outliers among 100 measurements (11.00%)
	1 (1.00%) low mild
	3 (3.00%) high mild
	7 (7.00%) high severe

	retain_early_drop_black box_u64 almost_drop
	time: [1.1125 ms 1.1491 ms 1.1969 ms]
	Found 5 outliers among 100 measurements (5.00%)
	1 (1.00%) high mild
	4 (4.00%) high severe

	retain_std box_u64 drop_head
	time: [375.30 us 378.73 us 383.46 us]
	Found 7 outliers among 100 measurements (7.00%)
	1 (1.00%) low mild
	2 (2.00%) high mild
	4 (4.00%) high severe

	retain_early_drop box_u64 drop_head
	time: [338.84 us 346.42 us 357.24 us]
	Found 6 outliers among 100 measurements (6.00%)
	2 (2.00%) high mild
	4 (4.00%) high severe

	retain_std_black box_u64 drop_head
	time: [416.23 us 429.75 us 449.05 us]
	Found 11 outliers among 100 measurements (11.00%)
	3 (3.00%) high mild
	8 (8.00%) high severe

	retain_early_drop_black box_u64 drop_head
	time: [374.24 us 387.10 us 403.51 us]
	Found 6 outliers among 100 measurements (6.00%)
	1 (1.00%) high mild
	5 (5.00%) high severe

	retain_std box_u64 drop_tail
	time: [318.67 us 322.47 us 328.94 us]
	Found 7 outliers among 100 measurements (7.00%)
	3 (3.00%) low mild
	1 (1.00%) high mild
	3 (3.00%) high severe

	retain_early_drop box_u64 drop_tail
	time: [299.08 us 305.63 us 315.34 us]
	Found 9 outliers among 100 measurements (9.00%)
	3 (3.00%) high mild
	6 (6.00%) high severe

	retain_std_black box_u64 drop_tail
	time: [350.63 us 360.51 us 375.18 us]
	Found 5 outliers among 100 measurements (5.00%)
	5 (5.00%) high severe

	retain_early_drop_black box_u64 drop_tail
	time: [331.23 us 335.57 us 342.68 us]
	Found 9 outliers among 100 measurements (9.00%)
	2 (2.00%) high mild
	7 (7.00%) high severe

	retain_std 64byte almost_kept
	time: [784.09 us 792.22 us 802.28 us]
	Found 14 outliers among 100 measurements (14.00%)
	3 (3.00%) high mild
	11 (11.00%) high severe

	retain_early_drop 64byte almost_kept
	time: [668.95 us 677.15 us 686.92 us]
	Found 13 outliers among 100 measurements (13.00%)
	2 (2.00%) high mild
	11 (11.00%) high severe

	retain_std_black 64byte almost_kept
	time: [784.50 us 793.33 us 804.71 us]
	Found 11 outliers among 100 measurements (11.00%)
	2 (2.00%) high mild
	9 (9.00%) high severe

	retain_early_drop_black 64byte almost_kept
	time: [695.69 us 703.90 us 714.98 us]
	Found 13 outliers among 100 measurements (13.00%)
	5 (5.00%) high mild
	8 (8.00%) high severe

	retain_std 64byte almost_drop
	time: [457.75 us 465.60 us 475.24 us]
	Found 11 outliers among 100 measurements (11.00%)
	11 (11.00%) high severe

	retain_early_drop 64byte almost_drop
	time: [337.76 us 341.31 us 346.20 us]
	Found 10 outliers among 100 measurements (10.00%)
	3 (3.00%) high mild
	7 (7.00%) high severe

	retain_std_black 64byte almost_drop
	time: [493.88 us 501.65 us 511.77 us]
	Found 8 outliers among 100 measurements (8.00%)
	1 (1.00%) high mild
	7 (7.00%) high severe

	retain_early_drop_black 64byte almost_drop
	time: [455.96 us 472.64 us 490.19 us]
	Found 2 outliers among 100 measurements (2.00%)
	2 (2.00%) high mild

	retain_std 64byte drop_head
	time: [831.50 us 850.35 us 871.02 us]
	Found 5 outliers among 100 measurements (5.00%)
	3 (3.00%) high mild
	2 (2.00%) high severe

	retain_early_drop 64byte drop_head
	time: [689.15 us 696.21 us 704.81 us]
	Found 15 outliers among 100 measurements (15.00%)
	2 (2.00%) high mild
	13 (13.00%) high severe

	retain_std_black 64byte drop_head
	time: [777.10 us 787.47 us 800.61 us]
	Found 17 outliers among 100 measurements (17.00%)
	4 (4.00%) high mild
	13 (13.00%) high severe

	retain_early_drop_black 64byte drop_head
	time: [708.77 us 722.93 us 742.72 us]
	Found 15 outliers among 100 measurements (15.00%)
	6 (6.00%) high mild
	9 (9.00%) high severe

	retain_std 64byte drop_tail
	time: [266.76 us 273.20 us 281.11 us]
	Found 13 outliers among 100 measurements (13.00%)
	3 (3.00%) high mild
	10 (10.00%) high severe

	retain_early_drop 64byte drop_tail
	time: [267.17 us 271.99 us 278.05 us]
	Found 7 outliers among 100 measurements (7.00%)
	1 (1.00%) high mild
	6 (6.00%) high severe

	retain_std_black 64byte drop_tail
	time: [313.70 us 319.00 us 326.10 us]
	Found 10 outliers among 100 measurements (10.00%)
	2 (2.00%) high mild
	8 (8.00%) high severe

	retain_early_drop_black 64byte drop_tail
	time: [314.71 us 322.80 us 333.55 us]
	Found 8 outliers among 100 measurements (8.00%)
	3 (3.00%) high mild
	5 (5.00%) high severe
	use criterion::{black_box, criterion_group, criterion_main, BatchSize, Criterion};
	use testt::retain;

	#[rustfmt::skip]
	fn bench_retain(c: &mut Criterion) {
	const N: usize = 100_000;

	fn black_fn<T>(f: fn(&T) -> bool) -> fn(&T) -> bool {
	black_box(f)
	}

	macro_rules! bench {
	($name:literal, $gen:expr, $f:expr) => {
	c.bench_function(concat!("retain_std ", $name), move \|b\| {
	b.iter_batched($gen, move \|mut v\| {
	v.retain($f);
	v
	}, BatchSize::NumBatches(1));
	});
	c.bench_function(concat!("retain_early_drop ", $name), move \|b\| {
	b.iter_batched($gen, move \|mut v\| {
	retain(&mut v, $f);
	v
	}, BatchSize::NumBatches(1));
	});
	c.bench_function(concat!("retain_std_black ", $name), move \|b\| {
	b.iter_batched($gen, move \|mut v\| {
	v.retain(black_fn($f));
	v
	}, BatchSize::NumBatches(1));
	});
	c.bench_function(concat!("retain_early_drop_black ", $name), move \|b\| {
	b.iter_batched($gen, move \|mut v\| {
	retain(&mut v, black_fn($f));
	v
	}, BatchSize::NumBatches(1));
	});
	};
	}

	let gen_u64_seq = \|\| (0..N as u64).collect::<Vec<_>>();
	bench!("u64 almost_kept", gen_u64_seq, \|x\| *x & 7 != 0);
	bench!("u64 almost_drop", gen_u64_seq, \|x\| *x & 7 == 0);
	bench!("u64 drop_head", gen_u64_seq, \|x\| *x > 10000);
	bench!("u64 drop_tail", gen_u64_seq, \|x\| *x < N as u64 - 10000);

	let gen_boxed_u64 = \|\| (0..N as u64).map(Box::new).collect::<Vec<_>>();
	bench!("box_u64 almost_kept", gen_boxed_u64, \|x: &Box<u64>\| **x & 7 != 0);
	bench!("box_u64 almost_drop", gen_boxed_u64, \|x: &Box<u64>\| **x & 7 == 0);
	bench!("box_u64 drop_head", gen_boxed_u64, \|x: &Box<u64>\| **x > 10000);
	bench!("box_u64 drop_tail", gen_boxed_u64, \|x: &Box<u64>\| **x < N as u64 - 10000);

	let gen_arr = \|\| (0..N as u64).map(\|x\| [x; 8]).collect::<Vec<_>>();
	bench!("64byte almost_kept", gen_arr, \|x: &[u64; 8]\| x[0] & 7 != 0);
	bench!("64byte almost_drop", gen_arr, \|x: &[u64; 8]\| x[0] & 7 == 0);
	bench!("64byte drop_head", gen_arr, \|x: &[u64; 8]\| x[0] > 10000);
	bench!("64byte drop_tail", gen_arr, \|x: &[u64; 8]\| x[0] < N as u64 - 10000);
	}

	criterion_group!(benches, bench_retain);
	criterion_main!(benches);
	use std::ptr;

	pub fn retain<T, F>(self_: &mut Vec<T>, mut f: F)
	where
	F: FnMut(&T) -> bool,
	{
	let original_len = self_.len();
	// Avoid double drop if the drop guard is not executed,
	// since we may make some holes during the process.
	unsafe { self_.set_len(0) };

	// Vec: [Kept, Kept, Hole, Hole, Hole, Hole, Unchecked, Unchecked]
	// \|<- processed len ->\| ^- next to check
	// \|<- deleted cnt ->\|
	// \|<- original_len ->\|
	// Kept: Elements which predicate returns true on.
	// Hole: Moved or dropped element slot.
	// Unchecked: Unchecked valid elements.
	//
	// This drop guard will be invoked when predicate or `drop` of element panicked.
	// It shifts unchecked elements to cover holes and `set_len` to the correct length.
	// In cases when predicate and `drop` never panick, it will be optimized out.
	struct BackshiftOnDrop<'a, T> {
	v: &'a mut Vec<T>,
	processed_len: usize,
	deleted_cnt: usize,
	original_len: usize,
	}

	impl<T> Drop for BackshiftOnDrop<'_, T> {
	fn drop(&mut self) {
	if self.deleted_cnt > 0 {
	// SAFETY: Trailing unchecked items must be valid since we never touch them.
	unsafe {
	ptr::copy(
	self.v.as_ptr().add(self.processed_len),
	self.v
	.as_mut_ptr()
	.add(self.processed_len - self.deleted_cnt),
	self.original_len - self.processed_len,
	);
	}
	}
	// SAFETY: After filling holes, all items are in contiguous memory.
	unsafe {
	self.v.set_len(self.original_len - self.deleted_cnt);
	}
	}
	}

	let mut g = BackshiftOnDrop {
	v: self_,
	processed_len: 0,
	deleted_cnt: 0,
	original_len,
	};

	while g.processed_len < original_len {
	// SAFETY: Unchecked element must be valid.
	let cur = unsafe { &mut *g.v.as_mut_ptr().add(g.processed_len) };
	if !f(cur) {
	// Advance early to avoid double drop if `drop_in_place` panicked.
	g.processed_len += 1;
	g.deleted_cnt += 1;
	// SAFETY: We never touch this element again after dropped.
	unsafe { ptr::drop_in_place(cur) };
	// We already advanced the counter.
	continue;
	}
	if g.deleted_cnt > 0 {
	// SAFETY: `deleted_cnt` > 0, so the hole slot must not overlap with current element.
	// We use copy for move, and never touch this element again.
	unsafe {
	let hole_slot = g.v.as_mut_ptr().add(g.processed_len - g.deleted_cnt);
	ptr::copy_nonoverlapping(cur, hole_slot, 1);
	}
	}
	g.processed_len += 1;
	}

	// All item are processed. This can be optimized to `set_len` by LLVM.
	drop(g);
	}