Skip to content

Instantly share code, notes, and snippets.

@bitshifter
Created December 5, 2017 00:35
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save bitshifter/71f0c5f3146db43a223ce08cdd0247c2 to your computer and use it in GitHub Desktop.
benchmarking f32 vs f32x4
# bench_f32 loop:
.LBB23_30:
vmovss (%rdi), %xmm1
vmovsd 4(%rdi), %xmm2
vmulss 12(%rdi), %xmm1, %xmm1
vmovsd 16(%rdi), %xmm3
vmulps %xmm3, %xmm2, %xmm2
vaddss %xmm2, %xmm1, %xmm1
vmovshdup %xmm2, %xmm2
vaddss %xmm2, %xmm1, %xmm1
vaddss %xmm1, %xmm0, %xmm0
addq $24, %rdi
cmpq %rbx, %rdi
jne .LBB23_30
# bench_f32_sse loop
.LBB24_11:
vmovaps (%rdx), %xmm1
vdpps $113, 16(%rdx), %xmm1, %xmm1
vmovaps 32(%rdx), %xmm2
vdpps $113, 48(%rdx), %xmm2, %xmm2
vmovaps 64(%rdx), %xmm3
vmovaps 96(%rdx), %xmm4
vdpps $113, 80(%rdx), %xmm3, %xmm3
vaddss %xmm1, %xmm0, %xmm0
vaddss %xmm2, %xmm0, %xmm0
vdpps $113, 112(%rdx), %xmm4, %xmm1
vaddss %xmm3, %xmm0, %xmm0
vmovaps 128(%rdx), %xmm2
vdpps $113, 144(%rdx), %xmm2, %xmm2
vaddss %xmm1, %xmm0, %xmm0
vmovaps 160(%rdx), %xmm1
vdpps $113, 176(%rdx), %xmm1, %xmm1
vaddss %xmm2, %xmm0, %xmm0
vmovaps 192(%rdx), %xmm2
vdpps $113, 208(%rdx), %xmm2, %xmm2
vaddss %xmm1, %xmm0, %xmm0
vmovaps 224(%rdx), %xmm1
vdpps $113, 240(%rdx), %xmm1, %xmm1
vaddss %xmm2, %xmm0, %xmm0
vaddss %xmm1, %xmm0, %xmm0
addq $256, %rdx
cmpq %rsi, %rdx
jne .LBB24_11
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment