Skip to content

Instantly share code, notes, and snippets.

@jeremyong
Last active August 19, 2020 07:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save jeremyong/9a6983a282bf3da6b2e1327949bd999d to your computer and use it in GitHub Desktop.
Save jeremyong/9a6983a282bf3da6b2e1327949bd999d to your computer and use it in GitHub Desktop.

Assembly generated from code implemented here

AVX enabled

Motor (dual-quat) applied to a point:

 mov rax, rdi
  vmovaps xmm0, xmmword ptr [rsi]
  vmovaps xmm1, xmmword ptr [rsi + 16]
  vpermilps xmm2, xmm0, 120 # xmm2 = xmm0[0,2,3,1]
  vmulps xmm3, xmm0, xmm2
  vpermilps xmm4, xmm0, 0 # xmm4 = xmm0[0,0,0,0]
  vpermilps xmm5, xmm0, 156 # xmm5 = xmm0[0,3,1,2]
  vmulps xmm6, xmm4, xmm5
  vsubps xmm3, xmm3, xmm6
  vmovaps xmm6, xmmword ptr [rip + .LCPI1_0] # xmm6 = [0.0E+0,2.0E+0,2.0E+0,2.0E+0]
  vmulps xmm8, xmm3, xmm6
  vmulps xmm7, xmm4, xmm2
  vmulps xmm3, xmm0, xmm5
  vaddps xmm3, xmm7, xmm3
  vmulps xmm9, xmm3, xmm6
  vmulps xmm0, xmm0, xmm0
  vpermilps xmm7, xmm0, 0 # xmm7 = xmm0[0,0,0,0]
  vpermilps xmm3, xmm0, 229 # xmm3 = xmm0[1,1,2,3]
  vaddps xmm3, xmm7, xmm3
  vpermilps xmm7, xmm0, 90 # xmm7 = xmm0[2,2,1,1]
  vpermilps xmm0, xmm0, 191 # xmm0 = xmm0[3,3,3,2]
  vaddps xmm0, xmm7, xmm0
  vmulps xmm0, xmm0, xmmword ptr [rip + .LCPI1_1]
  vaddps xmm0, xmm3, xmm0
  vpermilps xmm3, xmm1, 216 # xmm3 = xmm1[0,2,1,3]
  vmulps xmm3, xmm5, xmm3
  vpermilps xmm5, xmm1, 108 # xmm5 = xmm1[0,3,2,1]
  vmulps xmm4, xmm4, xmm5
  vsubps xmm3, xmm3, xmm4
  vpermilps xmm1, xmm1, 180 # xmm1 = xmm1[0,1,3,2]
  vmulps xmm1, xmm2, xmm1
  vsubps xmm1, xmm3, xmm1
  vmulps xmm1, xmm1, xmm6
  vmovaps xmm2, xmmword ptr [rdx]
  vpermilps xmm3, xmm2, 120 # xmm3 = xmm2[0,2,3,1]
  vmulps xmm3, xmm3, xmm8
  vpermilps xmm4, xmm2, 156 # xmm4 = xmm2[0,3,1,2]
  vmulps xmm4, xmm4, xmm9
  vaddps xmm3, xmm3, xmm4
  vmulps xmm0, xmm2, xmm0
  vaddps xmm0, xmm3, xmm0
  vpermilps xmm2, xmm2, 0 # xmm2 = xmm2[0,0,0,0]
  vmulps xmm1, xmm2, xmm1
  vaddps xmm0, xmm0, xmm1
  vmovaps xmmword ptr [rdi], xmm0
  ret

Motor applied to a plane

 mov rax, rdi
vmovaps xmm2, xmmword ptr [rsi]
vmovaps xmm0, xmmword ptr [rsi + 16]
vpermilps xmm3, xmm2, 0 # xmm3 = xmm2[0,0,0,0]
vpermilps xmm1, xmm2, 30 # xmm1 = xmm2[2,3,1,0]
vmulps xmm4, xmm3, xmm1
vpermilps xmm5, xmm2, 101 # xmm5 = xmm2[1,1,2,1]
vpermilps xmm6, xmm2, 123 # xmm6 = xmm2[3,2,3,1]
vmulps xmm5, xmm5, xmm6
vaddps xmm4, xmm4, xmm5
vmovaps xmm9, xmmword ptr [rip + .LCPI0_0] # xmm9 = [2.0E+0,2.0E+0,2.0E+0,1.0E+0]
vmulps xmm8, xmm4, xmm9
vpermilps xmm6, xmm2, 153 # xmm6 = xmm2[1,2,1,2]
vpermilps xmm7, xmm2, 190 # xmm7 = xmm2[2,3,3,2]
vmulps xmm6, xmm6, xmm7
vpermilps xmm7, xmm2, 192 # xmm7 = xmm2[0,0,0,3]
vpermilps xmm4, xmm2, 231 # xmm4 = xmm2[3,1,2,3]
vmulps xmm4, xmm7, xmm4
vmulps xmm4, xmm4, xmmword ptr [rip + .LCPI0_1]
vaddps xmm4, xmm6, xmm4
vmulps xmm4, xmm4, xmm9
vmulps xmm6, xmm2, xmm2
vpermilps xmm7, xmm6, 0 # xmm7 = xmm6[0,0,0,0]
vpermilps xmm5, xmm6, 121 # xmm5 = xmm6[1,2,3,1]
vaddps xmm5, xmm7, xmm5
vpermilps xmm7, xmm6, 150 # xmm7 = xmm6[2,1,1,2]
vsubps xmm5, xmm5, xmm7
vpermilps xmm6, xmm6, 239 # xmm6 = xmm6[3,3,2,3]
vsubps xmm5, xmm5, xmm6
vmulps xmm5, xmm5, xmmword ptr [rip + .LCPI0_2]
vpermilps xmm6, xmm0, 27 # xmm6 = xmm0[3,2,1,0]
vmulps xmm3, xmm3, xmm6
vpermilps xmm2, xmm2, 39 # xmm2 = xmm2[3,1,2,0]
vpermilps xmm6, xmm0, 54 # xmm6 = xmm0[2,1,3,0]
vmulps xmm2, xmm2, xmm6
vaddps xmm2, xmm3, xmm2
vpermilps xmm0, xmm0, 45 # xmm0 = xmm0[1,3,2,0]
vmulps xmm0, xmm1, xmm0
vsubps xmm0, xmm2, xmm0
vmulps xmm0, xmm0, xmm9
vmovaps xmm1, xmmword ptr [rdx]
vpermilps xmm2, xmm1, 210 # xmm2 = xmm1[2,0,1,3]
vmulps xmm2, xmm2, xmm8
vpermilps xmm3, xmm1, 201 # xmm3 = xmm1[1,2,0,3]
vmulps xmm3, xmm3, xmm4
vaddps xmm2, xmm2, xmm3
vdpps xmm0, xmm0, xmm1, 120
vmulps xmm1, xmm1, xmm5
vaddps xmm1, xmm2, xmm1
vaddps xmm0, xmm0, xmm1
vmovaps xmmword ptr [rdi], xmm0

AVX Disabled

Motor-point

mov rax, rdi
movaps xmm0, xmmword ptr [rsi]
movaps xmm2, xmmword ptr [rsi + 16]
movaps xmm9, xmm0
shufps xmm9, xmm0, 120 # xmm9 = xmm9[0,2],xmm0[3,1]
movaps xmm1, xmm0
mulps xmm1, xmm9
movaps xmm5, xmm0
shufps xmm5, xmm0, 0 # xmm5 = xmm5[0,0],xmm0[0,0]
movaps xmm4, xmm0
shufps xmm4, xmm0, 156 # xmm4 = xmm4[0,3],xmm0[1,2]
movaps xmm6, xmm5
movaps xmm7, xmm2
shufps xmm7, xmm2, 108 # xmm7 = xmm7[0,3],xmm2[2,1]
mulps xmm7, xmm5
mulps xmm5, xmm4
subps xmm1, xmm5
movaps xmm8, xmmword ptr [rip + .LCPI1_0] # xmm8 = [0.0E+0,2.0E+0,2.0E+0,2.0E+0]
mulps xmm1, xmm8
mulps xmm6, xmm9
movaps xmm5, xmm0
mulps xmm5, xmm4
addps xmm5, xmm6
mulps xmm5, xmm8
mulps xmm0, xmm0
movaps xmm6, xmm0
shufps xmm6, xmm0, 0 # xmm6 = xmm6[0,0],xmm0[0,0]
movaps xmm3, xmm0
shufps xmm3, xmm0, 229 # xmm3 = xmm3[1,1],xmm0[2,3]
addps xmm3, xmm6
movaps xmm6, xmm0
shufps xmm6, xmm0, 90 # xmm6 = xmm6[2,2],xmm0[1,1]
shufps xmm0, xmm0, 191 # xmm0 = xmm0[3,3,3,2]
addps xmm0, xmm6
mulps xmm0, xmmword ptr [rip + .LCPI1_1]
addps xmm0, xmm3
movaps xmm3, xmm2
shufps xmm3, xmm2, 216 # xmm3 = xmm3[0,2],xmm2[1,3]
mulps xmm3, xmm4
subps xmm3, xmm7
shufps xmm2, xmm2, 180 # xmm2 = xmm2[0,1,3,2]
mulps xmm2, xmm9
subps xmm3, xmm2
mulps xmm3, xmm8
movaps xmm2, xmmword ptr [rdx]
movaps xmm4, xmm2
shufps xmm4, xmm2, 120 # xmm4 = xmm4[0,2],xmm2[3,1]
mulps xmm4, xmm1
movaps xmm1, xmm2
shufps xmm1, xmm2, 156 # xmm1 = xmm1[0,3],xmm2[1,2]
mulps xmm1, xmm5
addps xmm1, xmm4
mulps xmm0, xmm2
addps xmm0, xmm1
shufps xmm2, xmm2, 0 # xmm2 = xmm2[0,0,0,0]
mulps xmm2, xmm3
addps xmm2, xmm0
movaps xmmword ptr [rdi], xmm2
ret

Motor-plane

mov rax, rdi
movaps xmm1, xmmword ptr [rsi]
movaps xmm0, xmmword ptr [rsi + 16]
movaps xmm3, xmm1
shufps xmm3, xmm1, 0 # xmm3 = xmm3[0,0],xmm1[0,0]
movaps xmm9, xmm1
shufps xmm9, xmm1, 30 # xmm9 = xmm9[2,3],xmm1[1,0]
movaps xmm4, xmm0
shufps xmm4, xmm0, 27 # xmm4 = xmm4[3,2],xmm0[1,0]
mulps xmm4, xmm3
mulps xmm3, xmm9
movaps xmm5, xmm1
shufps xmm5, xmm1, 101 # xmm5 = xmm5[1,1],xmm1[2,1]
movaps xmm6, xmm1
shufps xmm6, xmm1, 123 # xmm6 = xmm6[3,2],xmm1[3,1]
mulps xmm6, xmm5
addps xmm6, xmm3
movaps xmm8, xmmword ptr [rip + .LCPI0_0] # xmm8 = [2.0E+0,2.0E+0,2.0E+0,1.0E+0]
mulps xmm6, xmm8
movaps xmm5, xmm1
shufps xmm5, xmm1, 153 # xmm5 = xmm5[1,2],xmm1[1,2]
movaps xmm7, xmm1
shufps xmm7, xmm1, 190 # xmm7 = xmm7[2,3],xmm1[3,2]
mulps xmm7, xmm5
movaps xmm5, xmm1
shufps xmm5, xmm1, 195 # xmm5 = xmm5[3,0],xmm1[0,3]
mulps xmm5, xmm1
mulps xmm5, xmmword ptr [rip + .LCPI0_1]
addps xmm5, xmm7
mulps xmm5, xmm8
movaps xmm3, xmm1
mulps xmm3, xmm1
movaps xmm2, xmm3
shufps xmm2, xmm3, 0 # xmm2 = xmm2[0,0],xmm3[0,0]
movaps xmm7, xmm3
shufps xmm7, xmm3, 121 # xmm7 = xmm7[1,2],xmm3[3,1]
addps xmm7, xmm2
movaps xmm2, xmm3
shufps xmm2, xmm3, 150 # xmm2 = xmm2[2,1],xmm3[1,2]
subps xmm7, xmm2
shufps xmm3, xmm3, 239 # xmm3 = xmm3[3,3,2,3]
subps xmm7, xmm3
mulps xmm7, xmmword ptr [rip + .LCPI0_2]
shufps xmm1, xmm1, 39 # xmm1 = xmm1[3,1,2,0]
movaps xmm2, xmm0
shufps xmm2, xmm0, 54 # xmm2 = xmm2[2,1],xmm0[3,0]
mulps xmm2, xmm1
addps xmm2, xmm4
shufps xmm0, xmm0, 45 # xmm0 = xmm0[1,3,2,0]
mulps xmm0, xmm9
subps xmm2, xmm0
mulps xmm2, xmm8
movaps xmm0, xmmword ptr [rdx]
movaps xmm1, xmm0
mulps xmm7, xmm0
dpps xmm2, xmm0, 120
shufps xmm0, xmm0, 210 # xmm0 = xmm0[2,0,1,3]
mulps xmm0, xmm6
shufps xmm1, xmm1, 201 # xmm1 = xmm1[1,2,0,3]
mulps xmm1, xmm5
addps xmm1, xmm0
addps xmm7, xmm1
addps xmm2, xmm7
movaps xmmword ptr [rdi], xmm2
ret
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment