Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save CensoredUsername/d9667fe33e162b18af06001f8eb2c73a to your computer and use it in GitHub Desktop.
Save CensoredUsername/d9667fe33e162b18af06001f8eb2c73a to your computer and use it in GitHub Desktop.
timings
; excerpt from loop_notrash as it got inlined in here
loc_140001050:
xor esi, esi
xor eax, eax
loc_140001054:
mov rcx, rax
lea rax, [rcx+rsi*2-2]
mov rdx, rsi
nop
loc_140001060:
cmp rdx, 0F4240h ; 1_000_000
jg short loc_140001080
lea rsi, [rdx+1]
add rax, 2
test dl, 20h
mov rdx, rsi
jnz short loc_140001060
jmp short loc_140001054
loc_140001080:
; end of excerpt
rust_iter_notrash:
inc rcx
xor r8d, r8d
xor r9d, r9d
loc_140001009:
mov rax, r9
lea r9, [rax+r8*2-2]
mov rdx, r8
db 66h, 66h, 2Eh
nop word ptr [rax+rax+00000000h]
loc_140001020:
cmp rdx, rcx
jge short locret_140001037
lea r8, [rdx+1]
add r9, 2
test dl, 20h
mov rdx, r8
jnz short loc_140001020
jmp short loc_140001009
locret_140001037:
retn
; interesting thing here: the compiler tried REALLY hard to align the second label there on an alignment boundary.
; the weird garbage at line 10/11 is actually one instruction, a 12-bit noop constructed using multiple redundant prefixes
; excerpt from loop_notrash as it got inlined in here
xor esi, esi
xor eax, eax
loc_140001034:
mov rcx, rax
lea rax, [rcx+rsi*2-2]
mov rdx, rsi
nop
loc_140001040:
cmp rdx, 0F4240h ; 1_000_000
jg short loc_140001060
lea rsi, [rdx+1]
add rax, 2
test dl, 20h
mov rdx, rsi
jnz short loc_140001040
jmp short loc_140001034
loc_140001060:
; end of excerpt
; excerpt from loop_trash as it got inlined in here
loc_140001120:
xor esi, esi
xor eax, eax
loc_140001124:
mov rcx, rax
lea rax, [rcx+rsi*2-2]
mov rdx, rsi
nop
loc_140001130:
cmp rdx, 0F4240h ; 1_000_000
jg short loc_140001150
lea rsi, [rdx+1]
add rax, 2
test dl, 1
mov rdx, rsi
jnz short loc_140001130
jmp short loc_140001124
loc_140001150:
; end of excerpt
; so, this is slow for some reason. very slow even. AND IT IS EXACTLY THE SAME. I have no idea what happened here.
rust_iter_trash:
inc rcx
xor r8d, r8d
xor r9d, r9d
loc_140001049:
mov rax, r9
lea r9, [rax+r8*2-2]
mov rdx, r8
db 66h, 66h, 2Eh
nop word ptr [rax+rax+00000000h]
loc_140001060:
cmp rdx, rcx
jge short locret_140001077
lea r8, [rdx+1]
add r9, 2
test dl, 1
mov rdx, r8
jnz short loc_140001060
jmp short loc_140001049
locret_140001077:
retn
; interesting thing here: the compiler tried REALLY hard to align the second label there on an alignment boundary.
; the weird garbage at line 10/11 is actually one instruction, a 12-bit noop constructed using multiple redundant prefixes
; excerpt from loop_trash as it got inlined in here
xor esi, esi
xor eax, eax
loc_1400010F4:
mov rcx, rax
lea rax, [rcx+rsi*2-2]
mov rdx, rsi
nop
loc_140001100:
cmp rdx, 0F4240h ; 1_000_000
jg short loc_140001120
lea rsi, [rdx+1]
add rax, 2
test dl, 1
mov rdx, rsi
jnz short loc_140001100
jmp short loc_1400010F4
loc_140001120:
; end of excerpt
; so, this is fast for some reason. significantly faster. AND IT IS EXACTLY THE SAME. I have no idea what happened here.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment