Last active
March 7, 2021 21:15
4-point FFT
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
static void fft4(void *s, FFTComplex *z, FFTComplex *temp) | |
{ | |
FFTSample r1 = z[0].re - z[2].re; | |
FFTSample r2 = z[0].im - z[2].im; | |
FFTSample r3 = z[1].re - z[3].re; | |
FFTSample r4 = z[1].im - z[3].im; | |
/* r5-r8 second transform */ | |
FFTSample t1 = z[0].re + z[2].re; | |
FFTSample t2 = z[0].im + z[2].im; | |
FFTSample t3 = z[1].re + z[3].re; | |
FFTSample t4 = z[1].im + z[3].im; | |
/* t5-t8 second transform */ | |
/* 1sub + 1add = 2 instructions */ | |
/* 2 shufs */ | |
FFTSample a3 = t1 - t3; | |
FFTSample a4 = t2 - t4; | |
FFTSample b3 = r1 - r4; | |
FFTSample b2 = r2 - r3; | |
FFTSample a1 = t1 + t3; | |
FFTSample a2 = t2 + t4; | |
FFTSample b1 = r1 + r4; | |
FFTSample b4 = r2 + r3; | |
/* 1 add 1 sub 3 shufs */ | |
z[0].re = a1; | |
z[0].im = a2; | |
z[2].re = a3; | |
z[2].im = a4; | |
z[1].re = b1; | |
z[1].im = b2; | |
z[3].re = b3; | |
z[3].im = b4; | |
} | |
#if 0 | |
; Single 4-point in-place complex FFT (will do 2 transforms at once in [AVX] mode) | |
; %1 - even coefficients (r0.reim, r2.reim, r4.reim, r6.reim) | |
; %2 - odd coefficients (r1.reim, r3.reim, r5.reim, r7.reim) | |
; %3 - temporary | |
%macro FFT4 3 | |
subps %3, %1, %2 ; r1234, [r5678] | |
addps %1, %2 ; t1234, [t5678] | |
shufps %2, %1, %3, q1010 ; t12, r12 | |
shufps %1, %3, q2332 ; t34, r43 | |
subps %3, %2, %1 ; a34, b32 | |
addps %2, %1 ; a12, b14 | |
shufps %1, %2, %3, q1010 ; a1234 even | |
shufps %2, %3, q2332 ; b1423 | |
shufps %2, %2, q1320 ; b1234 odd | |
%endmacro | |
#endif |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment