Skip to content

Instantly share code, notes, and snippets.

@cyanreg
Last active March 7, 2021 21:15
4-point FFT
static void fft4(void *s, FFTComplex *z, FFTComplex *temp)
{
FFTSample r1 = z[0].re - z[2].re;
FFTSample r2 = z[0].im - z[2].im;
FFTSample r3 = z[1].re - z[3].re;
FFTSample r4 = z[1].im - z[3].im;
/* r5-r8 second transform */
FFTSample t1 = z[0].re + z[2].re;
FFTSample t2 = z[0].im + z[2].im;
FFTSample t3 = z[1].re + z[3].re;
FFTSample t4 = z[1].im + z[3].im;
/* t5-t8 second transform */
/* 1sub + 1add = 2 instructions */
/* 2 shufs */
FFTSample a3 = t1 - t3;
FFTSample a4 = t2 - t4;
FFTSample b3 = r1 - r4;
FFTSample b2 = r2 - r3;
FFTSample a1 = t1 + t3;
FFTSample a2 = t2 + t4;
FFTSample b1 = r1 + r4;
FFTSample b4 = r2 + r3;
/* 1 add 1 sub 3 shufs */
z[0].re = a1;
z[0].im = a2;
z[2].re = a3;
z[2].im = a4;
z[1].re = b1;
z[1].im = b2;
z[3].re = b3;
z[3].im = b4;
}
#if 0
; Single 4-point in-place complex FFT (will do 2 transforms at once in [AVX] mode)
; %1 - even coefficients (r0.reim, r2.reim, r4.reim, r6.reim)
; %2 - odd coefficients (r1.reim, r3.reim, r5.reim, r7.reim)
; %3 - temporary
%macro FFT4 3
subps %3, %1, %2 ; r1234, [r5678]
addps %1, %2 ; t1234, [t5678]
shufps %2, %1, %3, q1010 ; t12, r12
shufps %1, %3, q2332 ; t34, r43
subps %3, %2, %1 ; a34, b32
addps %2, %1 ; a12, b14
shufps %1, %2, %3, q1010 ; a1234 even
shufps %2, %3, q2332 ; b1423
shufps %2, %2, q1320 ; b1234 odd
%endmacro
#endif
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment