Skip to content

Instantly share code, notes, and snippets.

@goldshtn
Created August 15, 2015 07:12
Show Gist options
  • Save goldshtn/2c12eb8fb16b1698f5b3 to your computer and use it in GitHub Desktop.
Save goldshtn/2c12eb8fb16b1698f5b3 to your computer and use it in GitHub Desktop.
Dot product of shorts and floats: C# vs. C++
; C#, RyuJIT, release, x64
00007ffe`fc6808ec 4c63c8 movsxd r9,eax ; loop: r9 = i
00007ffe`fc6808ef 4e0fbf4c4910 movsx r9,word ptr [rcx+r9*2+10h] ; r9 = a[i]
00007ffe`fc6808f5 f3410f2ac9 cvtsi2ss xmm1,r9d ; xmm1 = (float)r9d
00007ffe`fc6808fa 4c63c8 movsxd r9,eax ; r9 = i
00007ffe`fc6808fd f3420f594c8a10 mulss xmm1,dword ptr [rdx+r9*4+10h] ; xmm1 *= b[i]
00007ffe`fc680904 f30f58c1 addss xmm0,xmm1 ; sum += xmm1
00007ffe`fc680908 ffc0 inc eax ; ++i
00007ffe`fc68090a 443bc0 cmp r8d,eax ; if i < a.Length
00007ffe`fc68090d 7fdd jg 00007ffe`fc6808ec ; goto loop
00007ffe`fc68090f eb2c jmp 00007ffe`fc68093d ; goto done
; VC++ 2015, release, x64
00007fff`4f501090 0fbf43fc movsx eax,word ptr [rbx-4] ; eax = &a[rbx-2]
00007fff`4f501094 660f6ec8 movd xmm1,eax ; xmm1 = eax
00007fff`4f501098 0fbf43fe movsx eax,word ptr [rbx-2] ; eax = &a[rbx-1]
00007fff`4f50109c 0f5bc9 cvtdq2ps xmm1,xmm1 ; xmm1 = (float)xmm1
00007fff`4f50109f 660f6ec0 movd xmm0,eax ; xmm0 = eax
00007fff`4f5010a3 f3410f594bf8 mulss xmm1,dword ptr [r11-8] ; xmm1 *= &b[r11-2]
00007fff`4f5010a9 0fbf03 movsx eax,word ptr [rbx] ; eax = &a[rbx]
00007fff`4f5010ac 0f5bc0 cvtdq2ps xmm0,xmm0 ; xmm0 = (float)xmm0
00007fff`4f5010af f30f58ca addss xmm1,xmm2 ; xmm1 += sum
00007fff`4f5010b3 f3410f5943fc mulss xmm0,dword ptr [r11-4] ; xmm0 *= &b[r11-1]
00007fff`4f5010b9 0f28d1 movaps xmm2,xmm1 ; sum = xmm1
00007fff`4f5010bc 660f6ec8 movd xmm1,eax ; xmm1 = eax
00007fff`4f5010c0 0fbf4302 movsx eax,word ptr [rbx+2] ; eax = &a[rbx+1]
00007fff`4f5010c4 f30f58d0 addss xmm2,xmm0 ; sum += xmm0
00007fff`4f5010c8 0f5bc9 cvtdq2ps xmm1,xmm1 ; xmm1 = (float)xmm1
00007fff`4f5010cb 4883c308 add rbx,8 ; rbx += 8
00007fff`4f5010cf 660f6ec0 movd xmm0,eax ; xmm0 = eax
00007fff`4f5010d3 f3410f590b mulss xmm1,dword ptr [r11] ; xmm1 *= &b[r11]
00007fff`4f5010d8 0f5bc0 cvtdq2ps xmm0,xmm0 ; xmm0 = (float)xmm0
00007fff`4f5010db f30f58d1 addss xmm2,xmm1 ; sum += xmm1
00007fff`4f5010df f3410f594304 mulss xmm0,dword ptr [r11+4] ; xmm0 *= &b[r11+1]
00007fff`4f5010e5 4983c310 add r11,10h ; r11 += 16
00007fff`4f5010e9 f30f58d0 addss xmm2,xmm0 ; sum += xmm0
00007fff`4f5010ed 4983e801 sub r8,1 ; --r8
00007fff`4f5010f1 759d jne 00007fff`4f501090 ; if not zero goto loop
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment