Skip to content

Instantly share code, notes, and snippets.

@cormacrelf
Last active January 26, 2022 15:28
Show Gist options
  • Save cormacrelf/e5da002fd8855f82fa0cf1473206e027 to your computer and use it in GitHub Desktop.
Save cormacrelf/e5da002fd8855f82fa0cf1473206e027 to your computer and use it in GitHub Desktop.
loops

This is a really dumb test, but the try_for_each desugaring is 3.5x faster at this aimless looping exercise on an Apple M1. However, if you put some non-trivial code in place of black_box or you use a much more complex iterator, then you might get something useful. You could think of this as a benchmark template.

rustc +nightly --edition 2021 optimised.rs -O --test
./optimised --test --bench

running 2 tests
test bench_desugared ... bench:      95,768 ns/iter (+/- 361)
test bench_original  ... bench:     328,977 ns/iter (+/- 9,256)

test result: ok. 0 passed; 0 failed; 0 ignored; 2 measured; 0 filtered out; finished in 0.49s
#![feature(bench_black_box)]
#![feature(test)]
extern crate test;
use std::hint::black_box;
use test::Bencher;
use std::ops::ControlFlow::*;
#[derive(PartialEq, Debug, Copy, Clone)]
pub enum Scenario { BreakOuter, Return, ContinueMiddle }
#[bench]
fn bench_original(b: &mut Bencher) {
b.iter(|| {
original(black_box(Scenario::BreakOuter));
original(black_box(Scenario::Return));
original(black_box(Scenario::ContinueMiddle));
})
}
#[bench]
fn bench_desugared(b: &mut Bencher) {
b.iter(|| {
desugared(black_box(Scenario::BreakOuter));
desugared(black_box(Scenario::Return));
desugared(black_box(Scenario::ContinueMiddle));
})
}
const MAX_X: i32 = 100;
const MAX_Y: i32 = 1000;
#[inline(never)]
pub fn original(scenario: Scenario) {
'outer: loop {
'middle: for x in 1..=MAX_X {
for y in 1..=MAX_Y {
black_box((x, y));
if scenario == Scenario::BreakOuter && x == MAX_X && y == MAX_Y {
break 'outer;
} else if scenario == Scenario::Return && x == MAX_X {
return;
} else if scenario == Scenario::ContinueMiddle && x == MAX_X {
continue 'middle;
}
}
}
break;
}
}
#[inline(never)]
pub fn desugared(scenario: Scenario) {
'outer: loop {
// 'middle: for x in 1..MAX_X {
let control_flow = (1..=MAX_X).into_iter().try_for_each(|x| {
// for y in 1..=MAX_Y {
let control_flow = (1..=MAX_Y).into_iter().try_for_each(|y| {
black_box((x, y));
if scenario == Scenario::BreakOuter && x == MAX_X && y == MAX_Y {
// break 'outer;
return Break(Break(1));
} else if scenario == Scenario::Return && x == MAX_X {
// return;
return Break(Break(0));
} else if scenario == Scenario::ContinueMiddle && x == MAX_X {
// continue 'middle;
return Break(Continue(2));
}
Continue(())
});
match control_flow {
Break(Continue(2)) => Continue(()),
_ => control_flow,
}
});
match control_flow {
Break(Break(0)) => return,
Break(Break(1)) => break 'outer,
_ => {}
}
break;
}
}
; note: std::hint::black_box((x, y)) is rendered as an empty InlineAsm block
original:
sub sp, sp, #16
mov w9, #1
add x8, sp, #8
LBB9_1:
mov w14, #0
mov x10, x9
ands w9, w0, #0xff
cset w11, eq
cmp w9, #2
cset w13, eq
cmp w9, #1
cset w12, eq
cmp w10, #100
cset w15, eq
cinc w9, w10, ne
and w11, w11, w15
and w12, w12, w15
and w13, w13, w15
mov w15, #1
LBB9_2:
tst w14, #0xff
b.ne LBB9_5
cmp w15, #1000
cset w14, eq
stp w10, w15, [sp, #8]
; InlineAsm Start
; InlineAsm End
and w14, w11, w14
orr w14, w14, w12
cmp w14, #1
b.eq LBB9_7
cmp w15, #1000
cset w14, eq
cinc w15, w15, ne
tbz w13, #0, LBB9_2
LBB9_5:
cmp w10, #100
b.eq LBB9_7
cmp w9, #100
b.le LBB9_1
LBB9_7:
add sp, sp, #16
ret
desugared:
sub sp, sp, #16
mov w10, #1
add x8, sp, #8
mov w9, #1000
LBB10_1:
mov w11, #1
LBB10_2:
stp w10, w11, [sp, #8]
add w12, w11, #1
; InlineAsm Start
; InlineAsm End
mov x11, x12
cmp w12, #1000
b.ne LBB10_2
add w11, w10, #1
stp w10, w9, [sp, #8]
; InlineAsm Start
; InlineAsm End
mov x10, x11
cmp w11, #100
b.ne LBB10_1
mov w8, #1
mov w9, #100
add x10, sp, #8
LBB10_5:
stp w9, w8, [sp, #8]
; InlineAsm Start
; InlineAsm End
tst w0, #0x3
b.ne LBB10_8
add w8, w8, #1
cmp w8, #1000
b.ne LBB10_5
mov x8, #100
movk x8, #1000, lsl #32
str x8, [sp, #8]
add x8, sp, #8
; InlineAsm Start
; InlineAsm End
LBB10_8:
add sp, sp, #16
ret
; note: std::hint::black_box((x, y)) is rendered as an empty InlineAsm block
original:
push rbp
mov rbp, rsp
push rbx
push rax
mov r8d, 1
lea r10, [rbp - 16]
LBB9_4:
mov edx, r8d
cmp dil, 2
sete r9b
cmp dil, 1
sete r11b
test dil, dil
sete cl
lea r8d, [rdx + 1]
cmp edx, 100
sete al
cmove r8d, edx
and cl, al
and r11b, al
and r9b, al
xor eax, eax
mov esi, 1
LBB9_5:
test al, al
jne LBB9_2
cmp esi, 1000
sete al
mov dword ptr [rbp - 16], edx
mov dword ptr [rbp - 12], esi
## InlineAsm Start
## InlineAsm End
and al, cl
or al, r11b
cmp al, 1
je LBB9_7
lea ebx, [rsi + 1]
cmp esi, 1000
sete al
cmove ebx, esi
mov esi, ebx
test r9b, r9b
je LBB9_5
LBB9_2:
cmp edx, 100
je LBB9_7
cmp r8d, 100
jle LBB9_4
LBB9_7:
add rsp, 8
pop rbx
pop rbp
ret
desugared:
push rbp
mov rbp, rsp
push rax
mov ecx, 1
lea rax, [rbp - 8]
LBB10_1:
mov edx, 1
LBB10_2:
mov dword ptr [rbp - 8], ecx
mov dword ptr [rbp - 4], edx
inc edx
## InlineAsm Start
## InlineAsm End
cmp edx, 1000
jne LBB10_2
mov dword ptr [rbp - 8], ecx
inc ecx
mov dword ptr [rbp - 4], 1000
## InlineAsm Start
## InlineAsm End
cmp ecx, 100
jne LBB10_1
mov ecx, 1
LBB10_5:
mov dword ptr [rbp - 8], 100
mov dword ptr [rbp - 4], ecx
## InlineAsm Start
## InlineAsm End
test dil, dil
jne LBB10_8
inc ecx
cmp ecx, 1000
jne LBB10_5
movabs rcx, 4294967296100
mov qword ptr [rbp - 8], rcx
## InlineAsm Start
## InlineAsm End
LBB10_8:
add rsp, 8
pop rbp
ret
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment