Last active
October 31, 2022 16:14
-
-
Save blueskythlikesclouds/fc50fb882dd5698510869e1a3ad9d84d to your computer and use it in GitHub Desktop.
Vectorization & unrolling test among compilers through Compiler Explorer
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
; -O3 -mavx2 -ffast-math | |
VectorizationTest(Vector<float, 3> const&, Vector<float, 3> const&): # @VectorizationTest(Vector<float, 3> const&, Vector<float, 3> const&) | |
vmovaps xmm0, xmmword ptr [rdi] | |
vmovaps xmm1, xmmword ptr [rsi] | |
vrcpps xmm2, xmm1 | |
vmulps xmm3, xmm0, xmm2 | |
vmulps xmm4, xmm1, xmm3 | |
vsubps xmm4, xmm0, xmm4 | |
vmulps xmm2, xmm2, xmm4 | |
vaddps xmm2, xmm3, xmm2 | |
vaddps xmm8, xmm1, xmm0 | |
vsubps xmm4, xmm0, xmm1 | |
vrcpps xmm5, xmm4 | |
vmulps xmm6, xmm8, xmm8 | |
vmulps xmm7, xmm6, xmm5 | |
vmulps xmm3, xmm4, xmm7 | |
vsubps xmm3, xmm3, xmm6 | |
vmulps xmm3, xmm5, xmm3 | |
vsubps xmm3, xmm3, xmm7 | |
vmulps xmm0, xmm1, xmm0 | |
vrcpps xmm1, xmm0 | |
vmulps xmm5, xmm4, xmm4 | |
vmulps xmm6, xmm5, xmm1 | |
vmulps xmm7, xmm0, xmm6 | |
vsubps xmm5, xmm7, xmm5 | |
vmulps xmm1, xmm1, xmm5 | |
vsubps xmm1, xmm1, xmm6 | |
vaddps xmm1, xmm1, xmm3 | |
vrcpps xmm3, xmm2 | |
vmulps xmm5, xmm0, xmm0 | |
vmulps xmm6, xmm5, xmm3 | |
vmulps xmm7, xmm2, xmm6 | |
vsubps xmm5, xmm7, xmm5 | |
vmulps xmm3, xmm3, xmm5 | |
vsubps xmm3, xmm3, xmm6 | |
vaddps xmm5, xmm8, xmm8 | |
vaddps xmm0, xmm0, xmm4 | |
vaddps xmm0, xmm0, xmm5 | |
vaddps xmm0, xmm2, xmm0 | |
vmulps xmm2, xmm2, xmm2 | |
vsubps xmm0, xmm0, xmm2 | |
vaddps xmm0, xmm0, xmm1 | |
vaddps xmm0, xmm3, xmm0 | |
vpermilpd xmm1, xmm0, 1 # xmm1 = xmm0[1,0] | |
ret |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
; -O3 -mavx2 -ffast-math | |
VectorizationTest(Vector<float, 3> const&, Vector<float, 3> const&): | |
vmovaps xmm1, XMMWORD PTR [rsi] | |
vmovaps xmm2, XMMWORD PTR [rdi] | |
vrcpps xmm0, xmm1 | |
vsubps xmm4, xmm2, xmm1 | |
vrcpps xmm5, xmm4 | |
vmulps xmm3, xmm0, xmm1 | |
vmulps xmm6, xmm5, xmm4 | |
vmulps xmm3, xmm0, xmm3 | |
vaddps xmm0, xmm0, xmm0 | |
vmulps xmm6, xmm5, xmm6 | |
vaddps xmm5, xmm5, xmm5 | |
vsubps xmm0, xmm0, xmm3 | |
vmulps xmm3, xmm2, xmm1 | |
vmulps xmm0, xmm2, xmm0 | |
vaddps xmm2, xmm2, xmm1 | |
vsubps xmm5, xmm5, xmm6 | |
vaddps xmm1, xmm2, xmm2 | |
vmulps xmm2, xmm2, xmm2 | |
vmulps xmm2, xmm2, xmm5 | |
vsubps xmm1, xmm1, xmm2 | |
vrcpps xmm2, xmm3 | |
vaddps xmm1, xmm1, xmm4 | |
vmulps xmm4, xmm4, xmm4 | |
vmulps xmm5, xmm2, xmm3 | |
vmulps xmm5, xmm2, xmm5 | |
vaddps xmm2, xmm2, xmm2 | |
vsubps xmm2, xmm2, xmm5 | |
vmulps xmm4, xmm4, xmm2 | |
vrcpps xmm2, xmm0 | |
vsubps xmm1, xmm1, xmm4 | |
vmulps xmm4, xmm2, xmm0 | |
vaddps xmm1, xmm1, xmm3 | |
vmulps xmm3, xmm3, xmm3 | |
vmulps xmm4, xmm2, xmm4 | |
vaddps xmm2, xmm2, xmm2 | |
vsubps xmm2, xmm2, xmm4 | |
vmulps xmm3, xmm3, xmm2 | |
vsubps xmm1, xmm1, xmm3 | |
vaddps xmm1, xmm1, xmm0 | |
vmulps xmm0, xmm0, xmm0 | |
vsubps xmm7, xmm1, xmm0 | |
vmovaps XMMWORD PTR [rsp-24], xmm7 | |
mov rax, QWORD PTR [rsp-16] | |
vmovq xmm0, QWORD PTR [rsp-24] | |
vmovq xmm1, rax | |
ret |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
; /O2 /arch:AVX2 /fp:fast | |
Vector<float,3> VectorizationTest(Vector<float,3> const &,Vector<float,3> const &) PROC ; VectorizationTest, COMDAT | |
$LN407: | |
mov QWORD PTR [rsp+8], rbx | |
mov QWORD PTR [rsp+16], rdi | |
push rbp | |
lea rbp, QWORD PTR [rsp-87] | |
sub rsp, 176 ; 000000b0H | |
mov rax, QWORD PTR __security_cookie | |
xor rax, rsp | |
mov QWORD PTR __$ArrayPad$[rbp-89], rax | |
mov r10, r8 | |
lea r11, QWORD PTR $T21[rbp-89] | |
xor eax, eax | |
mov r9, r10 | |
sub r9, rdx | |
mov rdi, rcx | |
sub r11, rdx | |
mov r8d, eax | |
mov rbx, rdx | |
$LL6@Vectorizat: | |
vrcpps xmm0, XMMWORD PTR [r9+rbx] | |
vaddps xmm2, xmm0, xmm0 | |
vmulps xmm0, xmm0, xmm0 | |
vfnmadd231ps xmm2, xmm0, XMMWORD PTR [r9+rbx] | |
vmulps xmm1, xmm2, XMMWORD PTR [rbx] | |
add r8d, 4 | |
vmovups XMMWORD PTR [r11+rbx], xmm1 | |
lea rbx, QWORD PTR [rbx+16] | |
cmp r8d, 4 | |
jb SHORT $LL6@Vectorizat | |
movsxd rcx, r8d | |
cmp rcx, 4 | |
jae SHORT $LN5@Vectorizat | |
lea rcx, QWORD PTR [rdx+rcx*4] | |
npad 7 | |
$LL220@Vectorizat: | |
vmovss xmm0, DWORD PTR [rcx] | |
vdivss xmm1, xmm0, DWORD PTR [rcx+r9] | |
inc r8d | |
vmovss DWORD PTR [rcx+r11], xmm1 | |
lea rcx, QWORD PTR [rcx+4] | |
cmp r8d, 4 | |
jb SHORT $LL220@Vectorizat | |
$LN5@Vectorizat: | |
lea r11, QWORD PTR $T8[rbp-89] | |
mov r8d, eax | |
sub r11, rdx | |
mov rcx, rdx | |
npad 6 | |
$LL11@Vectorizat: | |
vrcpps xmm0, XMMWORD PTR [r9+rcx] | |
vaddps xmm2, xmm0, xmm0 | |
vmulps xmm0, xmm0, xmm0 | |
vfnmadd231ps xmm2, xmm0, XMMWORD PTR [r9+rcx] | |
vmulps xmm1, xmm2, XMMWORD PTR [rcx] | |
add r8d, 4 | |
vmovups XMMWORD PTR [r11+rcx], xmm1 | |
lea rcx, QWORD PTR [rcx+16] | |
cmp r8d, 4 | |
jb SHORT $LL11@Vectorizat | |
movsxd rcx, r8d | |
cmp rcx, 4 | |
jae SHORT $LN10@Vectorizat | |
lea rcx, QWORD PTR [rdx+rcx*4] | |
npad 7 | |
$LL222@Vectorizat: | |
vmovss xmm0, DWORD PTR [rcx] | |
vdivss xmm1, xmm0, DWORD PTR [r9+rcx] | |
inc r8d | |
vmovss DWORD PTR [r11+rcx], xmm1 | |
lea rcx, QWORD PTR [rcx+4] | |
cmp r8d, 4 | |
jb SHORT $LL222@Vectorizat | |
$LN10@Vectorizat: | |
mov ecx, eax | |
mov r8, rax | |
$LL16@Vectorizat: | |
vmovups xmm1, XMMWORD PTR $T21[rbp+r8-89] | |
vmulps xmm1, xmm1, XMMWORD PTR $T8[rbp+r8-89] | |
add ecx, 4 | |
vmovups XMMWORD PTR $T29[rbp+r8-89], xmm1 | |
lea r8, QWORD PTR [r8+16] | |
cmp ecx, 4 | |
jb SHORT $LL16@Vectorizat | |
movsxd r8, ecx | |
cmp r8, 4 | |
jae SHORT $LN15@Vectorizat | |
lea r8, QWORD PTR [r8*4] | |
npad 12 | |
$LL224@Vectorizat: | |
vmovss xmm0, DWORD PTR $T21[rbp+r8-89] | |
vmulss xmm1, xmm0, DWORD PTR $T8[rbp+r8-89] | |
inc ecx | |
vmovss DWORD PTR $T29[rbp+r8-89], xmm1 | |
lea r8, QWORD PTR [r8+4] | |
cmp ecx, 4 | |
jb SHORT $LL224@Vectorizat | |
$LN15@Vectorizat: | |
lea r11, QWORD PTR $T28[rbp-89] | |
mov r8d, eax | |
sub r11, rdx | |
mov rcx, rdx | |
npad 3 | |
$LL21@Vectorizat: | |
vrcpps xmm0, XMMWORD PTR [r9+rcx] | |
vaddps xmm2, xmm0, xmm0 | |
vmulps xmm0, xmm0, xmm0 | |
vfnmadd231ps xmm2, xmm0, XMMWORD PTR [r9+rcx] | |
vmulps xmm1, xmm2, XMMWORD PTR [rcx] | |
add r8d, 4 | |
vmovups XMMWORD PTR [r11+rcx], xmm1 | |
lea rcx, QWORD PTR [rcx+16] | |
cmp r8d, 4 | |
jb SHORT $LL21@Vectorizat | |
movsxd rcx, r8d | |
cmp rcx, 4 | |
jae SHORT $LN20@Vectorizat | |
lea rcx, QWORD PTR [rdx+rcx*4] | |
npad 7 | |
$LL226@Vectorizat: | |
vmovss xmm0, DWORD PTR [rcx] | |
vdivss xmm1, xmm0, DWORD PTR [r9+rcx] | |
inc r8d | |
vmovss DWORD PTR [r11+rcx], xmm1 | |
lea rcx, QWORD PTR [rcx+4] | |
cmp r8d, 4 | |
jb SHORT $LL226@Vectorizat | |
$LN20@Vectorizat: | |
lea r11, QWORD PTR $T2[rbp-89] | |
mov r8d, eax | |
sub r11, rdx | |
mov rcx, rdx | |
npad 6 | |
$LL26@Vectorizat: | |
vrcpps xmm0, XMMWORD PTR [r9+rcx] | |
vaddps xmm2, xmm0, xmm0 | |
vmulps xmm0, xmm0, xmm0 | |
vfnmadd231ps xmm2, xmm0, XMMWORD PTR [r9+rcx] | |
vmulps xmm1, xmm2, XMMWORD PTR [rcx] | |
add r8d, 4 | |
vmovups XMMWORD PTR [r11+rcx], xmm1 | |
lea rcx, QWORD PTR [rcx+16] | |
cmp r8d, 4 | |
jb SHORT $LL26@Vectorizat | |
movsxd rcx, r8d | |
cmp rcx, 4 | |
jae SHORT $LN25@Vectorizat | |
lea rcx, QWORD PTR [rdx+rcx*4] | |
npad 7 | |
$LL228@Vectorizat: | |
vmovss xmm0, DWORD PTR [rcx] | |
vdivss xmm1, xmm0, DWORD PTR [r9+rcx] | |
inc r8d | |
vmovss DWORD PTR [r11+rcx], xmm1 | |
lea rcx, QWORD PTR [rcx+4] | |
cmp r8d, 4 | |
jb SHORT $LL228@Vectorizat | |
$LN25@Vectorizat: | |
sub rdx, r10 | |
lea r9, QWORD PTR $T24[rbp-89] | |
sub r9, r10 | |
mov ecx, eax | |
mov r8, r10 | |
npad 4 | |
$LL31@Vectorizat: | |
vmovups xmm1, XMMWORD PTR [rdx+r8] | |
vmulps xmm1, xmm1, XMMWORD PTR [r8] | |
add ecx, 4 | |
vmovups XMMWORD PTR [r9+r8], xmm1 | |
lea r8, QWORD PTR [r8+16] | |
cmp ecx, 4 | |
jb SHORT $LL31@Vectorizat | |
movsxd r8, ecx | |
cmp r8, 4 | |
jae SHORT $LN30@Vectorizat | |
lea r8, QWORD PTR [r10+r8*4] | |
npad 6 | |
$LL230@Vectorizat: | |
vmovss xmm0, DWORD PTR [rdx+r8] | |
vmulss xmm1, xmm0, DWORD PTR [r8] | |
inc ecx | |
vmovss DWORD PTR [r9+r8], xmm1 | |
lea r8, QWORD PTR [r8+4] | |
cmp ecx, 4 | |
jb SHORT $LL230@Vectorizat | |
$LN30@Vectorizat: | |
lea r9, QWORD PTR $T9[rbp-89] | |
mov ecx, eax | |
sub r9, r10 | |
mov r8, r10 | |
npad 8 | |
$LL36@Vectorizat: | |
vmovups xmm1, XMMWORD PTR [rdx+r8] | |
vmulps xmm1, xmm1, XMMWORD PTR [r8] | |
add ecx, 4 | |
vmovups XMMWORD PTR [r9+r8], xmm1 | |
lea r8, QWORD PTR [r8+16] | |
cmp ecx, 4 | |
jb SHORT $LL36@Vectorizat | |
movsxd r8, ecx | |
cmp r8, 4 | |
jae SHORT $LN35@Vectorizat | |
lea r8, QWORD PTR [r10+r8*4] | |
npad 6 | |
$LL232@Vectorizat: | |
vmovss xmm0, DWORD PTR [r8+rdx] | |
vmulss xmm1, xmm0, DWORD PTR [r8] | |
inc ecx | |
vmovss DWORD PTR [r8+r9], xmm1 | |
lea r8, QWORD PTR [r8+4] | |
cmp ecx, 4 | |
jb SHORT $LL232@Vectorizat | |
$LN35@Vectorizat: | |
mov ecx, eax | |
mov r8, rax | |
$LL41@Vectorizat: | |
vmovups xmm1, XMMWORD PTR $T24[rbp+r8-89] | |
vmulps xmm1, xmm1, XMMWORD PTR $T9[rbp+r8-89] | |
add ecx, 4 | |
vmovups XMMWORD PTR $T13[rbp+r8-89], xmm1 | |
lea r8, QWORD PTR [r8+16] | |
cmp ecx, 4 | |
jb SHORT $LL41@Vectorizat | |
movsxd r8, ecx | |
cmp r8, 4 | |
jae SHORT $LN40@Vectorizat | |
lea r8, QWORD PTR [r8*4] | |
npad 13 | |
$LL234@Vectorizat: | |
vmovss xmm0, DWORD PTR $T24[rbp+r8-89] | |
vmulss xmm1, xmm0, DWORD PTR $T9[rbp+r8-89] | |
inc ecx | |
vmovss DWORD PTR $T13[rbp+r8-89], xmm1 | |
lea r8, QWORD PTR [r8+4] | |
cmp ecx, 4 | |
jb SHORT $LL234@Vectorizat | |
$LN40@Vectorizat: | |
mov r8d, eax | |
mov rcx, rax | |
npad 10 | |
$LL46@Vectorizat: | |
vmovups xmm1, XMMWORD PTR $T2[rbp+rcx-89] | |
vrcpps xmm0, xmm1 | |
vaddps xmm2, xmm0, xmm0 | |
vmulps xmm0, xmm0, xmm0 | |
vfnmadd231ps xmm2, xmm0, xmm1 | |
vmulps xmm1, xmm2, XMMWORD PTR $T13[rbp+rcx-89] | |
add r8d, 4 | |
vmovups XMMWORD PTR $T23[rbp+rcx-89], xmm1 | |
lea rcx, QWORD PTR [rcx+16] | |
cmp r8d, 4 | |
jb SHORT $LL46@Vectorizat | |
movsxd rcx, r8d | |
cmp rcx, 4 | |
jae SHORT $LN45@Vectorizat | |
lea rcx, QWORD PTR [rcx*4] | |
npad 14 | |
$LL236@Vectorizat: | |
vmovss xmm0, DWORD PTR $T13[rbp+rcx-89] | |
vdivss xmm1, xmm0, DWORD PTR $T2[rbp+rcx-89] | |
inc r8d | |
vmovss DWORD PTR $T23[rbp+rcx-89], xmm1 | |
lea rcx, QWORD PTR [rcx+4] | |
cmp r8d, 4 | |
jb SHORT $LL236@Vectorizat | |
$LN45@Vectorizat: | |
lea r9, QWORD PTR $T31[rbp-89] | |
mov ecx, eax | |
sub r9, r10 | |
mov r8, r10 | |
npad 5 | |
$LL51@Vectorizat: | |
vmovups xmm1, XMMWORD PTR [r8+rdx] | |
vmulps xmm1, xmm1, XMMWORD PTR [r8] | |
add ecx, 4 | |
vmovups XMMWORD PTR [r9+r8], xmm1 | |
lea r8, QWORD PTR [r8+16] | |
cmp ecx, 4 | |
jb SHORT $LL51@Vectorizat | |
movsxd r8, ecx | |
cmp r8, 4 | |
jae SHORT $LN50@Vectorizat | |
lea r8, QWORD PTR [r10+r8*4] | |
npad 6 | |
$LL238@Vectorizat: | |
vmovss xmm0, DWORD PTR [r8+rdx] | |
vmulss xmm1, xmm0, DWORD PTR [r8] | |
inc ecx | |
vmovss DWORD PTR [r8+r9], xmm1 | |
lea r8, QWORD PTR [r8+4] | |
cmp ecx, 4 | |
jb SHORT $LL238@Vectorizat | |
$LN50@Vectorizat: | |
lea r9, QWORD PTR $T17[rbp-89] | |
mov ecx, eax | |
sub r9, r10 | |
mov r8, r10 | |
npad 8 | |
$LL56@Vectorizat: | |
vmovups xmm1, XMMWORD PTR [r8+rdx] | |
vmulps xmm1, xmm1, XMMWORD PTR [r8] | |
add ecx, 4 | |
vmovups XMMWORD PTR [r9+r8], xmm1 | |
lea r8, QWORD PTR [r8+16] | |
cmp ecx, 4 | |
jb SHORT $LL56@Vectorizat | |
movsxd r8, ecx | |
cmp r8, 4 | |
jae SHORT $LN55@Vectorizat | |
lea r8, QWORD PTR [r10+r8*4] | |
npad 6 | |
$LL240@Vectorizat: | |
vmovss xmm0, DWORD PTR [rdx+r8] | |
vmulss xmm1, xmm0, DWORD PTR [r8] | |
inc ecx | |
vmovss DWORD PTR [r9+r8], xmm1 | |
lea r8, QWORD PTR [r8+4] | |
cmp ecx, 4 | |
jb SHORT $LL240@Vectorizat | |
$LN55@Vectorizat: | |
lea r9, QWORD PTR $T12[rbp-89] | |
mov ecx, eax | |
sub r9, r10 | |
mov r8, r10 | |
npad 8 | |
$LL61@Vectorizat: | |
vmovups xmm1, XMMWORD PTR [rdx+r8] | |
vsubps xmm1, xmm1, XMMWORD PTR [r8] | |
add ecx, 4 | |
vmovups XMMWORD PTR [r9+r8], xmm1 | |
lea r8, QWORD PTR [r8+16] | |
cmp ecx, 4 | |
jb SHORT $LL61@Vectorizat | |
movsxd r8, ecx | |
cmp r8, 4 | |
jae SHORT $LN60@Vectorizat | |
lea r8, QWORD PTR [r10+r8*4] | |
npad 6 | |
$LL242@Vectorizat: | |
vmovss xmm0, DWORD PTR [rdx+r8] | |
vsubss xmm1, xmm0, DWORD PTR [r8] | |
inc ecx | |
vmovss DWORD PTR [r9+r8], xmm1 | |
lea r8, QWORD PTR [r8+4] | |
cmp ecx, 4 | |
jb SHORT $LL242@Vectorizat | |
$LN60@Vectorizat: | |
lea r9, QWORD PTR $T4[rbp-89] | |
mov ecx, eax | |
sub r9, r10 | |
mov r8, r10 | |
npad 8 | |
$LL66@Vectorizat: | |
vmovups xmm1, XMMWORD PTR [rdx+r8] | |
vsubps xmm1, xmm1, XMMWORD PTR [r8] | |
add ecx, 4 | |
vmovups XMMWORD PTR [r9+r8], xmm1 | |
lea r8, QWORD PTR [r8+16] | |
cmp ecx, 4 | |
jb SHORT $LL66@Vectorizat | |
movsxd r8, ecx | |
cmp r8, 4 | |
jae SHORT $LN65@Vectorizat | |
lea r8, QWORD PTR [r10+r8*4] | |
npad 6 | |
$LL244@Vectorizat: | |
vmovss xmm0, DWORD PTR [rdx+r8] | |
vsubss xmm1, xmm0, DWORD PTR [r8] | |
inc ecx | |
vmovss DWORD PTR [r9+r8], xmm1 | |
lea r8, QWORD PTR [r8+4] | |
cmp ecx, 4 | |
jb SHORT $LL244@Vectorizat | |
$LN65@Vectorizat: | |
mov ecx, eax | |
mov r8, rax | |
$LL71@Vectorizat: | |
vmovups xmm1, XMMWORD PTR $T12[rbp+r8-89] | |
vmulps xmm1, xmm1, XMMWORD PTR $T4[rbp+r8-89] | |
add ecx, 4 | |
vmovups XMMWORD PTR $T14[rbp+r8-89], xmm1 | |
lea r8, QWORD PTR [r8+16] | |
cmp ecx, 4 | |
jb SHORT $LL71@Vectorizat | |
movsxd r8, ecx | |
cmp r8, 4 | |
jae SHORT $LN70@Vectorizat | |
lea r8, QWORD PTR [r8*4] | |
npad 13 | |
$LL246@Vectorizat: | |
vmovss xmm0, DWORD PTR $T12[rbp+r8-89] | |
vmulss xmm1, xmm0, DWORD PTR $T4[rbp+r8-89] | |
inc ecx | |
vmovss DWORD PTR $T14[rbp+r8-89], xmm1 | |
lea r8, QWORD PTR [r8+4] | |
cmp ecx, 4 | |
jb SHORT $LL246@Vectorizat | |
$LN70@Vectorizat: | |
mov r8d, eax | |
mov rcx, rax | |
npad 10 | |
$LL76@Vectorizat: | |
vmovups xmm1, XMMWORD PTR $T17[rbp+rcx-89] | |
vrcpps xmm0, xmm1 | |
vaddps xmm2, xmm0, xmm0 | |
vmulps xmm0, xmm0, xmm0 | |
vfnmadd231ps xmm2, xmm0, xmm1 | |
vmulps xmm1, xmm2, XMMWORD PTR $T14[rbp+rcx-89] | |
add r8d, 4 | |
vmovups XMMWORD PTR $T11[rbp+rcx-89], xmm1 | |
lea rcx, QWORD PTR [rcx+16] | |
cmp r8d, 4 | |
jb SHORT $LL76@Vectorizat | |
movsxd rcx, r8d | |
cmp rcx, 4 | |
jae SHORT $LN75@Vectorizat | |
lea rcx, QWORD PTR [rcx*4] | |
npad 14 | |
$LL248@Vectorizat: | |
vmovss xmm0, DWORD PTR $T14[rbp+rcx-89] | |
vdivss xmm1, xmm0, DWORD PTR $T17[rbp+rcx-89] | |
inc r8d | |
vmovss DWORD PTR $T11[rbp+rcx-89], xmm1 | |
lea rcx, QWORD PTR [rcx+4] | |
cmp r8d, 4 | |
jb SHORT $LL248@Vectorizat | |
$LN75@Vectorizat: | |
lea r9, QWORD PTR $T30[rbp-89] | |
mov ecx, eax | |
sub r9, r10 | |
mov r8, r10 | |
npad 5 | |
$LL81@Vectorizat: | |
vmovups xmm1, XMMWORD PTR [rdx+r8] | |
vsubps xmm1, xmm1, XMMWORD PTR [r8] | |
add ecx, 4 | |
vmovups XMMWORD PTR [r9+r8], xmm1 | |
lea r8, QWORD PTR [r8+16] | |
cmp ecx, 4 | |
jb SHORT $LL81@Vectorizat | |
movsxd r8, ecx | |
cmp r8, 4 | |
jae SHORT $LN80@Vectorizat | |
lea r8, QWORD PTR [r10+r8*4] | |
npad 6 | |
$LL250@Vectorizat: | |
vmovss xmm0, DWORD PTR [rdx+r8] | |
vsubss xmm1, xmm0, DWORD PTR [r8] | |
inc ecx | |
vmovss DWORD PTR [r9+r8], xmm1 | |
lea r8, QWORD PTR [r8+4] | |
cmp ecx, 4 | |
jb SHORT $LL250@Vectorizat | |
$LN80@Vectorizat: | |
lea r9, QWORD PTR $T25[rbp-89] | |
mov ecx, eax | |
sub r9, r10 | |
mov r8, r10 | |
npad 8 | |
$LL86@Vectorizat: | |
vmovups xmm1, XMMWORD PTR [rdx+r8] | |
vsubps xmm1, xmm1, XMMWORD PTR [r8] | |
add ecx, 4 | |
vmovups XMMWORD PTR [r9+r8], xmm1 | |
lea r8, QWORD PTR [r8+16] | |
cmp ecx, 4 | |
jb SHORT $LL86@Vectorizat | |
movsxd r8, ecx | |
cmp r8, 4 | |
jae SHORT $LN85@Vectorizat | |
lea r8, QWORD PTR [r10+r8*4] | |
npad 6 | |
$LL252@Vectorizat: | |
vmovss xmm0, DWORD PTR [r8+rdx] | |
vsubss xmm1, xmm0, DWORD PTR [r8] | |
inc ecx | |
vmovss DWORD PTR [r9+r8], xmm1 | |
lea r8, QWORD PTR [r8+4] | |
cmp ecx, 4 | |
jb SHORT $LL252@Vectorizat | |
$LN85@Vectorizat: | |
lea r9, QWORD PTR $T6[rbp-89] | |
mov ecx, eax | |
sub r9, r10 | |
mov r8, r10 | |
npad 8 | |
$LL91@Vectorizat: | |
vmovups xmm1, XMMWORD PTR [r8+rdx] | |
vaddps xmm1, xmm1, XMMWORD PTR [r8] | |
add ecx, 4 | |
vmovups XMMWORD PTR [r9+r8], xmm1 | |
lea r8, QWORD PTR [r8+16] | |
cmp ecx, 4 | |
jb SHORT $LL91@Vectorizat | |
movsxd r8, ecx | |
cmp r8, 4 | |
jae SHORT $LN90@Vectorizat | |
lea r8, QWORD PTR [r10+r8*4] | |
npad 6 | |
$LL254@Vectorizat: | |
vmovss xmm0, DWORD PTR [r8+rdx] | |
vaddss xmm1, xmm0, DWORD PTR [r8] | |
inc ecx | |
vmovss DWORD PTR [r8+r9], xmm1 | |
lea r8, QWORD PTR [r8+4] | |
cmp ecx, 4 | |
jb SHORT $LL254@Vectorizat | |
$LN90@Vectorizat: | |
lea r9, QWORD PTR $T20[rbp-89] | |
mov ecx, eax | |
sub r9, r10 | |
mov r8, r10 | |
npad 8 | |
$LL96@Vectorizat: | |
vmovups xmm1, XMMWORD PTR [r8+rdx] | |
vaddps xmm1, xmm1, XMMWORD PTR [r8] | |
add ecx, 4 | |
vmovups XMMWORD PTR [r9+r8], xmm1 | |
lea r8, QWORD PTR [r8+16] | |
cmp ecx, 4 | |
jb SHORT $LL96@Vectorizat | |
movsxd r8, ecx | |
cmp r8, 4 | |
jae SHORT $LN95@Vectorizat | |
lea r8, QWORD PTR [r10+r8*4] | |
npad 6 | |
$LL256@Vectorizat: | |
vmovss xmm0, DWORD PTR [r8+rdx] | |
vaddss xmm1, xmm0, DWORD PTR [r8] | |
inc ecx | |
vmovss DWORD PTR [r8+r9], xmm1 | |
lea r8, QWORD PTR [r8+4] | |
cmp ecx, 4 | |
jb SHORT $LL256@Vectorizat | |
$LN95@Vectorizat: | |
mov ecx, eax | |
mov r8, rax | |
$LL101@Vectorizat: | |
vmovups xmm1, XMMWORD PTR $T6[rbp+r8-89] | |
vmulps xmm1, xmm1, XMMWORD PTR $T20[rbp+r8-89] | |
add ecx, 4 | |
vmovups XMMWORD PTR $T15[rbp+r8-89], xmm1 | |
lea r8, QWORD PTR [r8+16] | |
cmp ecx, 4 | |
jb SHORT $LL101@Vectorizat | |
movsxd r8, ecx | |
cmp r8, 4 | |
jae SHORT $LN100@Vectorizat | |
lea r8, QWORD PTR [r8*4] | |
npad 13 | |
$LL258@Vectorizat: | |
vmovss xmm0, DWORD PTR $T6[rbp+r8-89] | |
vmulss xmm1, xmm0, DWORD PTR $T20[rbp+r8-89] | |
inc ecx | |
vmovss DWORD PTR $T15[rbp+r8-89], xmm1 | |
lea r8, QWORD PTR [r8+4] | |
cmp ecx, 4 | |
jb SHORT $LL258@Vectorizat | |
$LN100@Vectorizat: | |
mov r8d, eax | |
mov rcx, rax | |
npad 10 | |
$LL106@Vectorizat: | |
vmovups xmm1, XMMWORD PTR $T25[rbp+rcx-89] | |
vrcpps xmm0, xmm1 | |
vaddps xmm2, xmm0, xmm0 | |
vmulps xmm0, xmm0, xmm0 | |
vfnmadd231ps xmm2, xmm0, xmm1 | |
vmulps xmm1, xmm2, XMMWORD PTR $T15[rbp+rcx-89] | |
add r8d, 4 | |
vmovups XMMWORD PTR $T19[rbp+rcx-89], xmm1 | |
lea rcx, QWORD PTR [rcx+16] | |
cmp r8d, 4 | |
jb SHORT $LL106@Vectorizat | |
movsxd rcx, r8d | |
cmp rcx, 4 | |
jae SHORT $LN105@Vectorizat | |
lea rcx, QWORD PTR [rcx*4] | |
npad 14 | |
$LL260@Vectorizat: | |
vmovss xmm0, DWORD PTR $T15[rbp+rcx-89] | |
vdivss xmm1, xmm0, DWORD PTR $T25[rbp+rcx-89] | |
inc r8d | |
vmovss DWORD PTR $T19[rbp+rcx-89], xmm1 | |
lea rcx, QWORD PTR [rcx+4] | |
cmp r8d, 4 | |
jb SHORT $LL260@Vectorizat | |
$LN105@Vectorizat: | |
lea r9, QWORD PTR $T26[rbp-89] | |
mov ecx, eax | |
sub r9, r10 | |
mov r8, r10 | |
npad 5 | |
$LL111@Vectorizat: | |
vmovups xmm1, XMMWORD PTR [rdx+r8] | |
vaddps xmm1, xmm1, XMMWORD PTR [r8] | |
add ecx, 4 | |
vmovups XMMWORD PTR [r9+r8], xmm1 | |
lea r8, QWORD PTR [r8+16] | |
cmp ecx, 4 | |
jb SHORT $LL111@Vectorizat | |
movsxd r8, ecx | |
cmp r8, 4 | |
jae SHORT $LN110@Vectorizat | |
lea r8, QWORD PTR [r10+r8*4] | |
npad 6 | |
$LL262@Vectorizat: | |
vmovss xmm0, DWORD PTR [rdx+r8] | |
vaddss xmm1, xmm0, DWORD PTR [r8] | |
inc ecx | |
vmovss DWORD PTR [r9+r8], xmm1 | |
lea r8, QWORD PTR [r8+4] | |
cmp ecx, 4 | |
jb SHORT $LL262@Vectorizat | |
$LN110@Vectorizat: | |
lea r9, QWORD PTR $T7[rbp-89] | |
mov ecx, eax | |
sub r9, r10 | |
mov r8, r10 | |
npad 8 | |
$LL116@Vectorizat: | |
vmovups xmm1, XMMWORD PTR [rdx+r8] | |
vaddps xmm1, xmm1, XMMWORD PTR [r8] | |
add ecx, 4 | |
vmovups XMMWORD PTR [r9+r8], xmm1 | |
lea r8, QWORD PTR [r8+16] | |
cmp ecx, 4 | |
jb SHORT $LL116@Vectorizat | |
movsxd r8, ecx | |
cmp r8, 4 | |
jae SHORT $LN115@Vectorizat | |
lea r8, QWORD PTR [r10+r8*4] | |
npad 6 | |
$LL264@Vectorizat: | |
vmovss xmm0, DWORD PTR [rdx+r8] | |
vaddss xmm1, xmm0, DWORD PTR [r8] | |
inc ecx | |
vmovss DWORD PTR [r9+r8], xmm1 | |
lea r8, QWORD PTR [r8+4] | |
cmp ecx, 4 | |
jb SHORT $LL264@Vectorizat | |
$LN115@Vectorizat: | |
mov ecx, eax | |
mov rdx, rax | |
$LL121@Vectorizat: | |
vmovups xmm1, XMMWORD PTR $T26[rbp+rdx-89] | |
vaddps xmm1, xmm1, XMMWORD PTR $T7[rbp+rdx-89] | |
add ecx, 4 | |
vmovups XMMWORD PTR $T16[rbp+rdx-89], xmm1 | |
lea rdx, QWORD PTR [rdx+16] | |
cmp ecx, 4 | |
jb SHORT $LL121@Vectorizat | |
movsxd rdx, ecx | |
cmp rdx, 4 | |
jae SHORT $LN120@Vectorizat | |
lea rdx, QWORD PTR [rdx*4] | |
$LL266@Vectorizat: | |
vmovss xmm0, DWORD PTR $T26[rbp+rdx-89] | |
vaddss xmm1, xmm0, DWORD PTR $T7[rbp+rdx-89] | |
inc ecx | |
vmovss DWORD PTR $T16[rbp+rdx-89], xmm1 | |
lea rdx, QWORD PTR [rdx+4] | |
cmp ecx, 4 | |
jb SHORT $LL266@Vectorizat | |
$LN120@Vectorizat: | |
mov ecx, eax | |
mov rdx, rax | |
npad 14 | |
$LL126@Vectorizat: | |
vmovups xmm0, XMMWORD PTR $T16[rbp+rdx-89] | |
vsubps xmm1, xmm0, XMMWORD PTR $T19[rbp+rdx-89] | |
add ecx, 4 | |
vmovups XMMWORD PTR $T5[rbp+rdx-89], xmm1 | |
lea rdx, QWORD PTR [rdx+16] | |
cmp ecx, 4 | |
jb SHORT $LL126@Vectorizat | |
movsxd rdx, ecx | |
cmp rdx, 4 | |
jae SHORT $LN125@Vectorizat | |
lea rdx, QWORD PTR [rdx*4] | |
npad 1 | |
$LL268@Vectorizat: | |
vmovss xmm0, DWORD PTR $T16[rbp+rdx-89] | |
vsubss xmm1, xmm0, DWORD PTR $T19[rbp+rdx-89] | |
inc ecx | |
vmovss DWORD PTR $T5[rbp+rdx-89], xmm1 | |
lea rdx, QWORD PTR [rdx+4] | |
cmp ecx, 4 | |
jb SHORT $LL268@Vectorizat | |
$LN125@Vectorizat: | |
mov ecx, eax | |
mov rdx, rax | |
npad 14 | |
$LL131@Vectorizat: | |
vmovups xmm1, XMMWORD PTR $T30[rbp+rdx-89] | |
vaddps xmm1, xmm1, XMMWORD PTR $T5[rbp+rdx-89] | |
add ecx, 4 | |
vmovups XMMWORD PTR $T18[rbp+rdx-89], xmm1 | |
lea rdx, QWORD PTR [rdx+16] | |
cmp ecx, 4 | |
jb SHORT $LL131@Vectorizat | |
movsxd rdx, ecx | |
cmp rdx, 4 | |
jae SHORT $LN130@Vectorizat | |
lea rdx, QWORD PTR [rdx*4] | |
npad 1 | |
$LL270@Vectorizat: | |
vmovss xmm0, DWORD PTR $T30[rbp+rdx-89] | |
vaddss xmm1, xmm0, DWORD PTR $T5[rbp+rdx-89] | |
inc ecx | |
vmovss DWORD PTR $T18[rbp+rdx-89], xmm1 | |
lea rdx, QWORD PTR [rdx+4] | |
cmp ecx, 4 | |
jb SHORT $LL270@Vectorizat | |
$LN130@Vectorizat: | |
mov ecx, eax | |
mov rdx, rax | |
npad 14 | |
$LL136@Vectorizat: | |
vmovups xmm0, XMMWORD PTR $T18[rbp+rdx-89] | |
vsubps xmm1, xmm0, XMMWORD PTR $T11[rbp+rdx-89] | |
add ecx, 4 | |
vmovups XMMWORD PTR $T3[rbp+rdx-89], xmm1 | |
lea rdx, QWORD PTR [rdx+16] | |
cmp ecx, 4 | |
jb SHORT $LL136@Vectorizat | |
movsxd rdx, ecx | |
cmp rdx, 4 | |
jae SHORT $LN135@Vectorizat | |
lea rdx, QWORD PTR [rdx*4] | |
npad 1 | |
$LL272@Vectorizat: | |
vmovss xmm0, DWORD PTR $T18[rbp+rdx-89] | |
vsubss xmm1, xmm0, DWORD PTR $T11[rbp+rdx-89] | |
inc ecx | |
vmovss DWORD PTR $T3[rbp+rdx-89], xmm1 | |
lea rdx, QWORD PTR [rdx+4] | |
cmp ecx, 4 | |
jb SHORT $LL272@Vectorizat | |
$LN135@Vectorizat: | |
mov ecx, eax | |
mov rdx, rax | |
npad 14 | |
$LL141@Vectorizat: | |
vmovups xmm1, XMMWORD PTR $T31[rbp+rdx-89] | |
vaddps xmm1, xmm1, XMMWORD PTR $T3[rbp+rdx-89] | |
add ecx, 4 | |
vmovups XMMWORD PTR $T10[rbp+rdx-89], xmm1 | |
lea rdx, QWORD PTR [rdx+16] | |
cmp ecx, 4 | |
jb SHORT $LL141@Vectorizat | |
movsxd rdx, ecx | |
cmp rdx, 4 | |
jae SHORT $LN140@Vectorizat | |
lea rdx, QWORD PTR [rdx*4] | |
npad 1 | |
$LL274@Vectorizat: | |
vmovss xmm0, DWORD PTR $T31[rbp+rdx-89] | |
vaddss xmm1, xmm0, DWORD PTR $T3[rbp+rdx-89] | |
inc ecx | |
vmovss DWORD PTR $T10[rbp+rdx-89], xmm1 | |
lea rdx, QWORD PTR [rdx+4] | |
cmp ecx, 4 | |
jb SHORT $LL274@Vectorizat | |
$LN140@Vectorizat: | |
mov ecx, eax | |
mov rdx, rax | |
npad 14 | |
$LL146@Vectorizat: | |
vmovups xmm0, XMMWORD PTR $T10[rbp+rdx-89] | |
vsubps xmm1, xmm0, XMMWORD PTR $T23[rbp+rdx-89] | |
add ecx, 4 | |
vmovups XMMWORD PTR $T1[rbp+rdx-89], xmm1 | |
lea rdx, QWORD PTR [rdx+16] | |
cmp ecx, 4 | |
jb SHORT $LL146@Vectorizat | |
movsxd rdx, ecx | |
cmp rdx, 4 | |
jae SHORT $LN145@Vectorizat | |
lea rdx, QWORD PTR [rdx*4] | |
npad 1 | |
$LL276@Vectorizat: | |
vmovss xmm0, DWORD PTR $T10[rbp+rdx-89] | |
vsubss xmm1, xmm0, DWORD PTR $T23[rbp+rdx-89] | |
inc ecx | |
vmovss DWORD PTR $T1[rbp+rdx-89], xmm1 | |
lea rdx, QWORD PTR [rdx+4] | |
cmp ecx, 4 | |
jb SHORT $LL276@Vectorizat | |
$LN145@Vectorizat: | |
mov ecx, eax | |
mov rdx, rax | |
npad 14 | |
$LL151@Vectorizat: | |
vmovups xmm1, XMMWORD PTR $T28[rbp+rdx-89] | |
vaddps xmm1, xmm1, XMMWORD PTR $T1[rbp+rdx-89] | |
add ecx, 4 | |
vmovups XMMWORD PTR $T22[rbp+rdx-89], xmm1 | |
lea rdx, QWORD PTR [rdx+16] | |
cmp ecx, 4 | |
jb SHORT $LL151@Vectorizat | |
movsxd rdx, ecx | |
cmp rdx, 4 | |
jae SHORT $LN150@Vectorizat | |
lea rdx, QWORD PTR [rdx*4] | |
npad 1 | |
$LL278@Vectorizat: | |
vmovss xmm0, DWORD PTR $T28[rbp+rdx-89] | |
vaddss xmm1, xmm0, DWORD PTR $T1[rbp+rdx-89] | |
inc ecx | |
vmovss DWORD PTR $T22[rbp+rdx-89], xmm1 | |
lea rdx, QWORD PTR [rdx+4] | |
cmp ecx, 4 | |
jb SHORT $LL278@Vectorizat | |
$LN150@Vectorizat: | |
mov rcx, rax | |
$LL156@Vectorizat: | |
vmovups xmm0, XMMWORD PTR $T22[rbp+rcx-89] | |
vsubps xmm1, xmm0, XMMWORD PTR $T29[rbp+rcx-89] | |
add eax, 4 | |
vmovups XMMWORD PTR $T27[rbp+rcx-89], xmm1 | |
lea rcx, QWORD PTR [rcx+16] | |
cmp eax, 4 | |
jb SHORT $LL156@Vectorizat | |
movsxd rcx, eax | |
cmp rcx, 4 | |
jae SHORT $LN155@Vectorizat | |
lea rcx, QWORD PTR [rcx*4] | |
npad 1 | |
$LL280@Vectorizat: | |
vmovss xmm0, DWORD PTR $T22[rbp+rcx-89] | |
vsubss xmm1, xmm0, DWORD PTR $T29[rbp+rcx-89] | |
inc eax | |
vmovss DWORD PTR $T27[rbp+rcx-89], xmm1 | |
lea rcx, QWORD PTR [rcx+4] | |
cmp eax, 4 | |
jb SHORT $LL280@Vectorizat | |
$LN155@Vectorizat: | |
vmovups xmm0, XMMWORD PTR $T27[rbp-89] | |
vmovups XMMWORD PTR [rdi], xmm0 | |
mov rax, rdi | |
mov rcx, QWORD PTR __$ArrayPad$[rbp-89] | |
xor rcx, rsp | |
call __security_check_cookie | |
lea r11, QWORD PTR [rsp+176] | |
mov rbx, QWORD PTR [r11+16] | |
mov rdi, QWORD PTR [r11+24] | |
mov rsp, r11 | |
pop rbp | |
ret 0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <cmath> | |
#include <cstdio> | |
template<typename T, int N> | |
struct alignas((sizeof(T) * N + 7) & ~7) Vector | |
{ | |
T data[(N + 3) & ~3]; | |
#define MAKE_VECTOR_OPERATOR(type) \ | |
Vector operator type(const Vector& other) const \ | |
{ \ | |
Vector result; \ | |
for (int i = 0; i < sizeof(data) / sizeof(T); i++) \ | |
{ \ | |
result.data[i] = data[i] type other.data[i]; \ | |
} \ | |
return result; \ | |
} \ | |
void operator type##=(const Vector& other) \ | |
{ \ | |
for (int i = 0; i < sizeof(data) / sizeof(T); i++) \ | |
{ \ | |
data[i] type##= other.data[i]; \ | |
} \ | |
} \ | |
Vector operator type(const T other) const \ | |
{ \ | |
Vector result; \ | |
for (int i = 0; i < sizeof(data) / sizeof(T); i++) \ | |
{ \ | |
result.data[i] = data[i] type other; \ | |
} \ | |
return result; \ | |
} \ | |
\ | |
void operator type##=(const T other) \ | |
{ \ | |
for (int i = 0; i < sizeof(data) / sizeof(T); i++) \ | |
{ \ | |
data[i] type##= other; \ | |
} \ | |
} | |
MAKE_VECTOR_OPERATOR(+) | |
MAKE_VECTOR_OPERATOR(-) | |
MAKE_VECTOR_OPERATOR(*) | |
MAKE_VECTOR_OPERATOR(/) | |
}; | |
using Vector2 = Vector<float, 2>; | |
using Vector3 = Vector<float, 3>; | |
using Vector4 = Vector<float, 4>; | |
Vector3 VectorizationTest(const Vector3& a, const Vector3& b) | |
{ | |
return | |
(a + b) + | |
(a + b) - | |
(a + b) * | |
(a + b) / | |
(a - b) + | |
(a - b) - | |
(a - b) * | |
(a - b) / | |
(a * b) + | |
(a * b) - | |
(a * b) * | |
(a * b) / | |
(a / b) + | |
(a / b) - | |
(a / b) * | |
(a / b); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment