Skip to content

Instantly share code, notes, and snippets.

@KirillLykov
Created February 22, 2023 18:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save KirillLykov/506596d1d777e11b3771930fc79dda0d to your computer and use it in GitHub Desktop.
Save KirillLykov/506596d1d777e11b3771930fc79dda0d to your computer and use it in GitHub Desktop.
use criterion::{criterion_group, criterion_main, Criterion, BenchmarkId};
use itertools::Itertools;
use rand::Rng;
type T = u64;
fn sum_1(data: &Vec<T>) -> (T, T, T)
{
let mut sum: (T, T, T) = (T::default(), T::default(), T::default());
for (x,y,z) in data.iter().tuples() {
sum.0 += x;
sum.1 += y;
sum.2 += z;
}
sum
}
fn sum_2(data: &Vec<T>) -> (T, T, T) {
let mut sum: (T, T, T) = (T::default(), T::default(), T::default());
let mut chunks = data.chunks(3);
while let Some([x,y, z]) = chunks.next() {
sum.0 += x;
sum.1 += y;
sum.2 += z;
}
sum
}
fn bench_fibs(c: &mut Criterion) {
let mut group = c.benchmark_group("Sum");
let mut rng = rand::thread_rng();
for n in (100_000..200_000).step_by(100_000) {
let vals: Vec<u64> = (0..n).map(|_| rng.gen_range(0..10000)).collect();
group.bench_with_input(BenchmarkId::new(" using tuple", n), &vals,
|b, i| b.iter(|| sum_1(i)));
group.bench_with_input(BenchmarkId::new(" using chunk(3)", n), &vals,
|b, i| b.iter(|| sum_2(i)));
}
group.finish();
}
criterion_group!(benches, bench_fibs);
criterion_main!(benches);
@KirillLykov
Copy link
Author

Benchmarking Sum/ using tuple/100000: Collecting 100 samples in Sum/ using tuple/100000 time:   [21.238 µs 21.249 µs 21.261 µs]
Found 6 outliers among 100 measurements (6.00%)
  1 (1.00%) low mild
  1 (1.00%) high mild
  4 (4.00%) high severe
Benchmarking Sum/ using chunk(3)/100000: Collecting 100 samples Sum/ using chunk(3)/100000
                        time:   [9.1937 µs 9.2042 µs 9.2170 µs]
Found 5 outliers among 100 measurements (5.00%)
  3 (3.00%) high mild
  2 (2.00%) high severe

@KirillLykov
Copy link
Author

section __TEXT,__text,regular,pure_instructions
	.build_version macos, 11, 0
	.globl	try_bml::sum_1
	.p2align	2
try_bml::sum_1:
Lfunc_begin0:
	.cfi_startproc
	sub sp, sp, #32
	.cfi_def_cfa_offset 32
	stp x29, x30, [sp, #16]
	add x29, sp, #16
	.cfi_def_cfa w29, 16
	.cfi_offset w30, -8
	.cfi_offset w29, -16
	mov x9, #0
	mov x10, #0
	mov x11, #0
	mov x12, #0
	stp xzr, xzr, [x8]
	str xzr, [x8, #16]
	ldr x13, [x0]
	ldr x14, [x0, #16]
	str x13, [sp, #8]
	lsl x14, x14, #3
	sub x15, x14, #8
	sub x16, x14, #16
	cmp x14, x9
	b.eq LBB0_4
LBB0_1:
	cmp x15, x9
	b.eq LBB0_4
	cmp x16, x9
	b.eq LBB0_4
	add x17, x13, x9
	ldr x0, [x13, x9]
	add x10, x10, x0
	ldp x0, x17, [x17, #8]
	add x11, x11, x0
	stp x10, x11, [x8]
	add x12, x12, x17
	str x12, [x8, #16]
	add x9, x9, #24
	cmp x14, x9
	b.ne LBB0_1
LBB0_4:
	.cfi_def_cfa wsp, 32
	ldp x29, x30, [sp, #16]
	add sp, sp, #32
	.cfi_def_cfa_offset 0
	.cfi_restore w30
	.cfi_restore w29
	ret

@KirillLykov
Copy link
Author

	.globl	try_bml::sum_2
	.p2align	2
try_bml::sum_2:
Lfunc_begin1:
	.cfi_startproc
	stp x29, x30, [sp, #-16]!
	.cfi_def_cfa_offset 16
	mov x29, sp
	.cfi_def_cfa w29, 16
	.cfi_offset w30, -8
	.cfi_offset w29, -16
	stp xzr, xzr, [x8]
	str xzr, [x8, #16]
	ldr x9, [x0, #16]
	subs x10, x9, #3
	b.lo LBB1_8
	ldr x15, [x0]
	cmp x10, #8
	b.hi LBB1_3
	mov x16, #0
	mov x12, #0
	mov x11, #0
	mov x10, x15
	b LBB1_6
LBB1_3:
	mov x11, #-6148914691236517206
	movk x11, #43691
	umulh x10, x10, x11
	lsr x10, x10, #1
	add x13, x10, #1
	and x14, x13, #0x7ffffffffffffffc
	mov w10, #24
	madd x10, x14, x10, x15
	sub x11, x14, x14, lsl #2
	add x9, x9, x11
	movi.2d v0, #0000000000000000
	mov x11, x14
	movi.2d v1, #0000000000000000
	movi.2d v2, #0000000000000000
	movi.2d v3, #0000000000000000
	movi.2d v4, #0000000000000000
	movi.2d v5, #0000000000000000
LBB1_4:
	mov x12, x15
	add x15, x15, #96
	ld3.2d { v16, v17, v18 }, [x12], #48
	add.2d v0, v0, v16
	ld3.2d { v19, v20, v21 }, [x12]
	add.2d v1, v1, v19
	add.2d v2, v2, v17
	add.2d v3, v3, v20
	add.2d v4, v4, v18
	add.2d v5, v5, v21
	subs x11, x11, #4
	b.ne LBB1_4
	add.2d v4, v5, v4
	addp.2d d4, v4
	fmov x11, d4
	add.2d v2, v3, v2
	addp.2d d2, v2
	fmov x12, d2
	add.2d v0, v1, v0
	addp.2d d0, v0
	fmov x16, d0
	cmp x13, x14
	b.eq LBB1_7
LBB1_6:
	sub x9, x9, #3
	ldp x13, x14, [x10]
	add x16, x16, x13
	add x12, x12, x14
	ldr x13, [x10, #16]
	add x11, x11, x13
	add x10, x10, #24
	cmp x9, #3
	b.hs LBB1_6
LBB1_7:
	stp x16, x12, [x8]
	str x11, [x8, #16]
LBB1_8:
	.cfi_def_cfa wsp, 16
	ldp x29, x30, [sp], #16
	.cfi_def_cfa_offset 0
	.cfi_restore w30
	.cfi_restore w29
	ret

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment