Skip to content

Instantly share code, notes, and snippets.

@rygorous
Created January 23, 2015 00:02
Embed
What would you like to do?
Code output by VC++2012 for both versions in https://gist.github.com/rygorous/c6831e60f5366569d2e9
; ---- std::complex version
; 301 :
; 302 : for (size_t k = 0; k < N1; k++)
test edi, edi
je $LN1@ffts_rec
mov edi, edx
sub edi, eax
mov DWORD PTR tv1343[esp+112], edi
mov edi, DWORD PTR _out1$1$[esp+108]
sub esi, ecx
sub edi, ecx
sub edx, ecx
sub eax, ecx
mov DWORD PTR _twiddle$1$[esp+112], esi
mov DWORD PTR _out1$1$[esp+108], edi
mov DWORD PTR _out2$1$[esp+112], edx
mov DWORD PTR _out3$1$[esp+112], eax
$LL3@ffts_rec:
; 303 : {
; 304 : #if 1
; 305 : complexf Uk = out0[k];
mov edx, DWORD PTR [ecx+4]
; 306 : complexf Uk_N1 = out1[k];
mov eax, DWORD PTR [edi+ecx]
mov DWORD PTR _Uk_N1$2$[esp+112], eax
mov DWORD PTR _Uk_N1$2[esp+112], eax
mov DWORD PTR _Uk$2$[esp+112], edx
; 307 : complexf w = twiddle[k];
mov edx, DWORD PTR _twiddle$1$[esp+112]
mov esi, DWORD PTR [ecx]
mov eax, DWORD PTR [edx+ecx]
mov DWORD PTR _w$5[esp+112], eax
mov eax, DWORD PTR [edx+ecx+4]
mov DWORD PTR _w$5[esp+116], eax
; 308 :
; 309 : // Twiddle Zk, Z'k then butterfly
; 310 : complexf Zk = w * out2[k];
mov eax, DWORD PTR [edx+ecx]
; 311 : complexf Zpk = std::conj(w) * out3[k];
movss xmm5, DWORD PTR _w$5[esp+116]
xorps xmm5, DWORD PTR __xmm@80000000800000008000000080000000
mov DWORD PTR __Tmp$10[esp+112], eax
mov eax, DWORD PTR [edx+ecx+4]
movss xmm3, DWORD PTR __Tmp$10[esp+112]
mov edx, DWORD PTR tv1343[esp+112]
mov DWORD PTR __Tmp$10[esp+116], eax
mov eax, DWORD PTR _out2$1$[esp+112]
movss xmm1, DWORD PTR __Tmp$10[esp+116]
movss xmm4, DWORD PTR [eax+ecx]
mov eax, DWORD PTR _out3$1$[esp+112]
add eax, ecx
movaps xmm0, xmm1
mulss xmm0, DWORD PTR [eax+edx+4]
mulss xmm1, xmm4
movaps xmm7, xmm3
mulss xmm3, DWORD PTR [eax+edx+4]
mov edx, DWORD PTR _out3$1$[esp+112]
mulss xmm7, xmm4
addss xmm3, xmm1
movss xmm1, DWORD PTR _w$5[esp+112]
subss xmm7, xmm0
mov edi, DWORD PTR [edi+ecx+4]
movaps xmm4, xmm1
mulss xmm4, DWORD PTR [edx+ecx]
mulss xmm1, DWORD PTR [eax+4]
movaps xmm0, xmm5
mulss xmm0, DWORD PTR [eax+4]
mulss xmm5, DWORD PTR [edx+ecx]
subss xmm4, xmm0
movss DWORD PTR __Tmp$10[esp+116], xmm3
; 312 :
; 313 : complexf Zsum = Zk + Zpk;
mov eax, DWORD PTR __Tmp$10[esp+116]
addss xmm5, xmm1
movss DWORD PTR __Tmp$10[esp+112], xmm7
mov ecx, DWORD PTR __Tmp$10[esp+112]
; 314 : complexf Zdif = complexf(0.0f, -1.0f) * (Zk - Zpk);
mov DWORD PTR __Tmp$3[esp+116], eax
xorps xmm1, xmm1
mov DWORD PTR __Tmp$4[esp+112], ecx
mov DWORD PTR __Tmp$4[esp+116], eax
movss xmm6, DWORD PTR __Tmp$4[esp+116]
movaps xmm3, xmm4
addss xmm3, xmm7
subss xmm7, xmm4
movss xmm4, DWORD PTR __Tmp$3[esp+116]
subss xmm4, xmm5
mov DWORD PTR __Tmp$3[esp+112], ecx
; 315 :
; 316 : out0[k] = Uk + Zsum;
mov ecx, DWORD PTR _out$[esp+108]
movaps xmm2, xmm7
mov eax, DWORD PTR [ecx]
mulss xmm7, DWORD PTR __real@3f800000
movaps xmm0, xmm4
mulss xmm0, DWORD PTR __real@bf800000
mulss xmm2, xmm1
mov DWORD PTR __Tmp$9[esp+112], eax
mov eax, DWORD PTR [ecx+4]
subss xmm2, xmm0
movss xmm0, DWORD PTR __Tmp$9[esp+112]
addss xmm0, xmm3
addss xmm6, xmm5
mov DWORD PTR __Tmp$9[esp+116], eax
mulss xmm4, xmm1
movss DWORD PTR __Tmp$9[esp+112], xmm0
movss xmm0, DWORD PTR __Tmp$9[esp+116]
mov eax, DWORD PTR __Tmp$9[esp+112]
addss xmm0, xmm6
mov DWORD PTR _Uk$1[esp+112], esi
subss xmm4, xmm7
mov DWORD PTR [ecx], eax
movss DWORD PTR __Tmp$9[esp+116], xmm0
; 317 : out1[k] = Uk_N1 + Zdif;
movss xmm1, DWORD PTR _Uk_N1$2[esp+112]
mov eax, DWORD PTR __Tmp$9[esp+116]
mov edx, DWORD PTR _out1$1$[esp+108]
mov DWORD PTR [ecx+4], eax
mov ecx, DWORD PTR _Uk_N1$2$[esp+112]
movaps xmm0, xmm1
addss xmm0, xmm2
mov DWORD PTR __Tmp$8[esp+112], ecx
mov ecx, DWORD PTR _out$[esp+108]
mov DWORD PTR __Tmp$8[esp+116], edi
; 318 : out2[k] = Uk - Zsum;
mov DWORD PTR __Tmp$7[esp+112], esi
movss DWORD PTR __Tmp$8[esp+112], xmm0
movss xmm0, DWORD PTR __Tmp$8[esp+116]
mov eax, DWORD PTR __Tmp$8[esp+112]
addss xmm0, xmm4
mov DWORD PTR [edx+ecx], eax
; 319 : out3[k] = Uk_N1 - Zdif;
mov DWORD PTR __Tmp$6[esp+116], edi
mov edi, DWORD PTR _out1$1$[esp+108]
subss xmm1, xmm2
movss DWORD PTR __Tmp$8[esp+116], xmm0
movss xmm0, DWORD PTR _Uk$1[esp+112]
mov eax, DWORD PTR __Tmp$8[esp+116]
mov DWORD PTR [edx+ecx+4], eax
mov edx, DWORD PTR _Uk$2$[esp+112]
subss xmm0, xmm3
mov DWORD PTR __Tmp$7[esp+116], edx
mov edx, ecx
mov ecx, DWORD PTR _out2$1$[esp+112]
movss DWORD PTR __Tmp$7[esp+112], xmm0
movss xmm0, DWORD PTR __Tmp$7[esp+116]
mov eax, DWORD PTR __Tmp$7[esp+112]
subss xmm0, xmm6
mov DWORD PTR [ecx+edx], eax
movss DWORD PTR __Tmp$7[esp+116], xmm0
mov eax, DWORD PTR __Tmp$7[esp+116]
movss xmm0, DWORD PTR __Tmp$6[esp+116]
mov DWORD PTR [ecx+edx+4], eax
mov ecx, DWORD PTR _Uk_N1$2$[esp+112]
mov DWORD PTR __Tmp$6[esp+112], ecx
mov ecx, edx
mov edx, DWORD PTR _out3$1$[esp+112]
subss xmm0, xmm4
movss DWORD PTR __Tmp$6[esp+112], xmm1
mov eax, DWORD PTR __Tmp$6[esp+112]
mov DWORD PTR [edx+ecx], eax
add ecx, 8
dec DWORD PTR _N$[esp+108]
movss DWORD PTR __Tmp$6[esp+116], xmm0
mov eax, DWORD PTR __Tmp$6[esp+116]
mov DWORD PTR [edx+ecx-4], eax
mov DWORD PTR _out$[esp+108], ecx
jne $LL3@ffts_rec
$LN1@ffts_rec:
pop edi
pop esi
; ---------------------
; ---- struct version:
; 301 :
; 302 : for (size_t k = 0; k < N1; k++)
test edi, edi
je $LN1@ffts_rec
mov esi, ebx
sub esi, eax
mov DWORD PTR tv840[esp+28], esi
mov esi, DWORD PTR _out$[esp+24]
mov DWORD PTR tv838[esp+24], esi
sub DWORD PTR tv838[esp+24], eax
mov esi, ebp
sub esi, eax
mov DWORD PTR tv836[esp+28], esi
mov esi, edx
sub esi, eax
mov DWORD PTR tv834[esp+28], esi
mov esi, DWORD PTR _out$[esp+24]
sub ebx, esi
sub ebp, esi
sub edx, esi
lea ecx, DWORD PTR [eax+4]
mov eax, DWORD PTR tv838[esp+24]
mov DWORD PTR _out1$1$[esp+24], edx
npad 13
$LL3@ffts_rec:
; 303 : {
; 304 : #if 0
; 305 : complexf Uk = out0[k];
; 306 : complexf Uk_N1 = out1[k];
; 307 : complexf w = twiddle[k];
; 308 :
; 309 : // Twiddle Zk, Z'k then butterfly
; 310 : complexf Zk = w * out2[k];
; 311 : complexf Zpk = std::conj(w) * out3[k];
; 312 :
; 313 : complexf Zsum = Zk + Zpk;
; 314 : complexf Zdif = complexf(0.0f, -1.0f) * (Zk - Zpk);
; 315 :
; 316 : out0[k] = Uk + Zsum;
; 317 : out1[k] = Uk_N1 + Zdif;
; 318 : out2[k] = Uk - Zsum;
; 319 : out3[k] = Uk_N1 - Zdif;
; 320 : #else
; 321 : complexf const &w = twiddle[k];
; 322 : complexf const &in2 = out2[k];
; 323 : complexf const &in3 = out3[k];
; 324 :
; 325 : float Zkr = w.re*in2.re - w.im*in2.im;
movss xmm5, DWORD PTR [ecx-4]
movss xmm2, DWORD PTR [ebx+esi]
movss xmm3, DWORD PTR [ecx]
mov edx, DWORD PTR tv840[esp+28]
; 326 : float Zki = w.re*in2.im + w.im*in2.re;
; 327 : float Zpkr = w.re*in3.re + w.im*in3.im;
mov edi, DWORD PTR tv836[esp+28]
movss xmm0, DWORD PTR [edx+ecx]
mulss xmm0, xmm3
movaps xmm4, xmm2
mulss xmm2, xmm3
movaps xmm7, xmm5
mulss xmm7, DWORD PTR [edx+ecx]
mulss xmm4, xmm5
addss xmm7, xmm2
movss xmm2, DWORD PTR [esi+ebp]
subss xmm4, xmm0
movss xmm0, DWORD PTR [edi+ecx]
mulss xmm0, xmm3
movaps xmm6, xmm2
mulss xmm6, xmm5
; 328 : float Zpki = w.re*in3.im - w.im*in3.re;
mulss xmm5, DWORD PTR [edi+ecx]
addss xmm6, xmm0
; 329 :
; 330 : float Zsumr = Zkr + Zpkr;
; 331 : float Zsumi = Zki + Zpki;
; 332 : float Zdifr = Zki - Zpki;
; 333 : float Zdifi = Zpkr - Zkr;
; 334 :
; 335 : out2[k].re = out0[k].re - Zsumr;
movss xmm0, DWORD PTR [esi]
mulss xmm2, xmm3
add ecx, 8
movaps xmm1, xmm6
subss xmm5, xmm2
addss xmm1, xmm4
subss xmm6, xmm4
add esi, 8
dec DWORD PTR _N$[esp+24]
subss xmm0, xmm1
movss DWORD PTR [ebx+esi-8], xmm0
; 336 : out2[k].im = out0[k].im - Zsumi;
movss xmm0, DWORD PTR [eax+ecx-8]
movaps xmm2, xmm5
addss xmm2, xmm7
subss xmm7, xmm5
subss xmm0, xmm2
movss DWORD PTR [edx+ecx-8], xmm0
; 337 : out0[k].re += Zsumr;
movss xmm0, DWORD PTR [esi-8]
; 338 : out0[k].im += Zsumi;
addss xmm2, DWORD PTR [eax+ecx-8]
; 339 : out3[k].re = out1[k].re - Zdifr;
mov edx, DWORD PTR _out1$1$[esp+24]
addss xmm0, xmm1
movss DWORD PTR [eax+ecx-8], xmm2
; 340 : out3[k].im = out1[k].im - Zdifi;
mov eax, DWORD PTR tv834[esp+28]
movss DWORD PTR [esi-8], xmm0
movss xmm0, DWORD PTR [edx+esi-8]
subss xmm0, xmm7
movss DWORD PTR [esi+ebp-8], xmm0
movss xmm0, DWORD PTR [eax+ecx-8]
subss xmm0, xmm6
movss DWORD PTR [edi+ecx-8], xmm0
; 341 : out1[k].re += Zdifr;
; 342 : out1[k].im += Zdifi;
movss xmm0, DWORD PTR [eax+ecx-8]
addss xmm7, DWORD PTR [edx+esi-8]
addss xmm0, xmm6
movss DWORD PTR [edx+esi-8], xmm7
movss DWORD PTR [eax+ecx-8], xmm0
mov eax, DWORD PTR tv838[esp+24]
jne $LL3@ffts_rec
$LN1@ffts_rec:
pop edi
pop esi
pop ebp
pop ebx
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment